[llvm] [DAG] Lower frem of power-2 using div/trunc/mul+sub (PR #91148)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue May 7 05:51:57 PDT 2024
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/91148
>From 196e1fdb818863f73fdcb368c584dbcb0c2b64b2 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Sun, 5 May 2024 21:47:07 +0100
Subject: [PATCH 1/2] [AArch64] Add tests for frem power2 lowering. NFC
---
llvm/test/CodeGen/AArch64/frem-power2.ll | 469 +++++++++++++++++++++++
1 file changed, 469 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/frem-power2.ll
diff --git a/llvm/test/CodeGen/AArch64/frem-power2.ll b/llvm/test/CodeGen/AArch64/frem-power2.ll
new file mode 100644
index 0000000000000..8efcca3e5f132
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/frem-power2.ll
@@ -0,0 +1,469 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+define float @frem2(float %x) {
+; CHECK-LABEL: frem2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s1, #2.00000000
+; CHECK-NEXT: b fmodf
+entry:
+ %fmod = frem float %x, 2.0
+ ret float %fmod
+}
+
+define float @frem2_nsz(float %x) {
+; CHECK-LABEL: frem2_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s1, #2.00000000
+; CHECK-NEXT: b fmodf
+entry:
+ %fmod = frem nsz float %x, 2.0
+ ret float %fmod
+}
+
+define float @frem2_abs(float %x) {
+; CHECK-LABEL: frem2_abs:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fabs s0, s0
+; CHECK-NEXT: fmov s1, #2.00000000
+; CHECK-NEXT: b fmodf
+entry:
+ %a = tail call float @llvm.fabs.f32(float %x)
+ %fmod = frem float %a, 2.0
+ ret float %fmod
+}
+
+define half @hrem2_nsz(half %x) {
+; CHECK-SD-LABEL: hrem2_nsz:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: fcvt s0, h0
+; CHECK-SD-NEXT: fmov s1, #2.00000000
+; CHECK-SD-NEXT: bl fmodf
+; CHECK-SD-NEXT: fcvt h0, s0
+; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: hrem2_nsz:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: fmov h1, #2.00000000
+; CHECK-GI-NEXT: fcvt s0, h0
+; CHECK-GI-NEXT: fcvt s1, h1
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: fcvt h0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+entry:
+ %fmod = frem nsz half %x, 2.0
+ ret half %fmod
+}
+
+define double @drem2_nsz(double %x) {
+; CHECK-LABEL: drem2_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov d1, #2.00000000
+; CHECK-NEXT: b fmod
+entry:
+ %fmod = frem nsz double %x, 2.0
+ ret double %fmod
+}
+
+define float @frem3_nsz(float %x) {
+; CHECK-LABEL: frem3_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s1, #3.00000000
+; CHECK-NEXT: b fmodf
+entry:
+ %fmod = frem nsz float %x, 3.0
+ ret float %fmod
+}
+
+define float @frem05_nsz(float %x) {
+; CHECK-LABEL: frem05_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s1, #0.50000000
+; CHECK-NEXT: b fmodf
+entry:
+ %fmod = frem nsz float %x, 0.5
+ ret float %fmod
+}
+
+define float @frem1_nsz(float %x) {
+; CHECK-LABEL: frem1_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s1, #1.00000000
+; CHECK-NEXT: b fmodf
+entry:
+ %fmod = frem nsz float %x, 1.0
+ ret float %fmod
+}
+
+define float @frem0_nsz(float %x) {
+; CHECK-LABEL: frem0_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi d1, #0000000000000000
+; CHECK-NEXT: b fmodf
+entry:
+ %fmod = frem nsz float %x, 0.0
+ ret float %fmod
+}
+
+define float @fremm2_nsz(float %x) {
+; CHECK-LABEL: fremm2_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s1, #-2.00000000
+; CHECK-NEXT: b fmodf
+entry:
+ %fmod = frem nsz float %x, -2.0
+ ret float %fmod
+}
+
+define float @frem4_abs(float %x) {
+; CHECK-LABEL: frem4_abs:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fabs s0, s0
+; CHECK-NEXT: fmov s1, #4.00000000
+; CHECK-NEXT: b fmodf
+entry:
+ %a = tail call float @llvm.fabs.f32(float %x)
+ %fmod = frem float %a, 4.0
+ ret float %fmod
+}
+
+define float @frem16_abs(float %x) {
+; CHECK-LABEL: frem16_abs:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fabs s0, s0
+; CHECK-NEXT: fmov s1, #16.00000000
+; CHECK-NEXT: b fmodf
+entry:
+ %a = tail call float @llvm.fabs.f32(float %x)
+ %fmod = frem float %a, 16.0
+ ret float %fmod
+}
+
+define float @frem4294967296_abs(float %x) {
+; CHECK-LABEL: frem4294967296_abs:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fabs s0, s0
+; CHECK-NEXT: mov w8, #1333788672 // =0x4f800000
+; CHECK-NEXT: fmov s1, w8
+; CHECK-NEXT: b fmodf
+entry:
+ %a = tail call float @llvm.fabs.f32(float %x)
+ %fmod = frem float %a, 4294967296.0
+ ret float %fmod
+}
+
+define float @frem1152921504606846976_abs(float %x) {
+; CHECK-LABEL: frem1152921504606846976_abs:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fabs s0, s0
+; CHECK-NEXT: mov w8, #1568669696 // =0x5d800000
+; CHECK-NEXT: fmov s1, w8
+; CHECK-NEXT: b fmodf
+entry:
+ %a = tail call float @llvm.fabs.f32(float %x)
+ %fmod = frem float %a, 1152921504606846976.0
+ ret float %fmod
+}
+
+define float @frem4611686018427387904_abs(float %x) {
+; CHECK-LABEL: frem4611686018427387904_abs:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fabs s0, s0
+; CHECK-NEXT: mov w8, #1585446912 // =0x5e800000
+; CHECK-NEXT: fmov s1, w8
+; CHECK-NEXT: b fmodf
+entry:
+ %a = tail call float @llvm.fabs.f32(float %x)
+ %fmod = frem float %a, 4611686018427387904.0
+ ret float %fmod
+}
+
+define float @frem9223372036854775808_abs(float %x) {
+; CHECK-LABEL: frem9223372036854775808_abs:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fabs s0, s0
+; CHECK-NEXT: movi v1.2s, #95, lsl #24
+; CHECK-NEXT: b fmodf
+entry:
+ %a = tail call float @llvm.fabs.f32(float %x)
+ %fmod = frem float %a, 9223372036854775808.0
+ ret float %fmod
+}
+
+define <4 x float> @frem2_nsz_vec(<4 x float> %x) {
+; CHECK-SD-LABEL: frem2_nsz_vec:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sub sp, sp, #48
+; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: mov s0, v0.s[1]
+; CHECK-SD-NEXT: fmov s1, #2.00000000
+; CHECK-SD-NEXT: bl fmodf
+; CHECK-SD-NEXT: fmov s1, #2.00000000
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT: bl fmodf
+; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT: fmov s1, #2.00000000
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: mov s0, v0.s[2]
+; CHECK-SD-NEXT: bl fmodf
+; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: mov v1.s[2], v0.s[0]
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: mov s0, v0.s[3]
+; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: fmov s1, #2.00000000
+; CHECK-SD-NEXT: bl fmodf
+; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-SD-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem2_nsz_vec:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub sp, sp, #80
+; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill
+; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 80
+; CHECK-GI-NEXT: .cfi_offset w30, -8
+; CHECK-GI-NEXT: .cfi_offset b8, -16
+; CHECK-GI-NEXT: .cfi_offset b9, -24
+; CHECK-GI-NEXT: .cfi_offset b10, -32
+; CHECK-GI-NEXT: fmov s1, #2.00000000
+; CHECK-GI-NEXT: mov s8, v0.s[1]
+; CHECK-GI-NEXT: mov s9, v0.s[2]
+; CHECK-GI-NEXT: mov s10, v0.s[3]
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT: fmov s1, #2.00000000
+; CHECK-GI-NEXT: fmov s0, s8
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: fmov s1, #2.00000000
+; CHECK-GI-NEXT: fmov s0, s9
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: fmov s1, #2.00000000
+; CHECK-GI-NEXT: fmov s0, s10
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload
+; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload
+; CHECK-GI-NEXT: mov v1.s[1], v2.s[0]
+; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v1.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-GI-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NEXT: add sp, sp, #80
+; CHECK-GI-NEXT: ret
+entry:
+ %fmod = frem nsz <4 x float> %x, <float 2.0, float 2.0, float 2.0, float 2.0>
+ ret <4 x float> %fmod
+}
+
+define <4 x float> @frem1152921504606846976_absv(<4 x float> %x) {
+; CHECK-SD-LABEL: frem1152921504606846976_absv:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sub sp, sp, #48
+; CHECK-SD-NEXT: str d8, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT: .cfi_offset w30, -8
+; CHECK-SD-NEXT: .cfi_offset b8, -16
+; CHECK-SD-NEXT: fabs v0.4s, v0.4s
+; CHECK-SD-NEXT: mov w8, #1568669696 // =0x5d800000
+; CHECK-SD-NEXT: fmov s8, w8
+; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: mov s0, v0.s[1]
+; CHECK-SD-NEXT: fmov s1, s8
+; CHECK-SD-NEXT: bl fmodf
+; CHECK-SD-NEXT: fmov s1, s8
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT: bl fmodf
+; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT: fmov s1, s8
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: mov s0, v0.s[2]
+; CHECK-SD-NEXT: bl fmodf
+; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: mov v1.s[2], v0.s[0]
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: mov s0, v0.s[3]
+; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: fmov s1, s8
+; CHECK-SD-NEXT: bl fmodf
+; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload
+; CHECK-SD-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-SD-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem1152921504606846976_absv:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub sp, sp, #96
+; CHECK-GI-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 96
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: .cfi_offset b8, -24
+; CHECK-GI-NEXT: .cfi_offset b9, -32
+; CHECK-GI-NEXT: .cfi_offset b10, -40
+; CHECK-GI-NEXT: .cfi_offset b11, -48
+; CHECK-GI-NEXT: mov w8, #1568669696 // =0x5d800000
+; CHECK-GI-NEXT: fabs v0.4s, v0.4s
+; CHECK-GI-NEXT: fmov s11, w8
+; CHECK-GI-NEXT: fmov s1, s11
+; CHECK-GI-NEXT: mov s8, v0.s[1]
+; CHECK-GI-NEXT: mov s9, v0.s[2]
+; CHECK-GI-NEXT: mov s10, v0.s[3]
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT: fmov s1, s11
+; CHECK-GI-NEXT: fmov s0, s8
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: fmov s1, s11
+; CHECK-GI-NEXT: fmov s0, s9
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: fmov s1, s11
+; CHECK-GI-NEXT: fmov s0, s10
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
+; CHECK-GI-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v1.s[1], v2.s[0]
+; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v1.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-GI-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NEXT: add sp, sp, #96
+; CHECK-GI-NEXT: ret
+entry:
+ %a = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
+ %fmod = frem <4 x float> %a, <float 1152921504606846976.0, float 1152921504606846976.0, float 1152921504606846976.0, float 1152921504606846976.0>
+ ret <4 x float> %fmod
+}
+
+define float @frem2_nsz_sitofp(float %x, i32 %sa) {
+; CHECK-LABEL: frem2_nsz_sitofp:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: lsl w8, w8, w0
+; CHECK-NEXT: scvtf s1, w8
+; CHECK-NEXT: b fmodf
+entry:
+ %s = shl i32 1, %sa
+ %y = sitofp i32 %s to float
+ %fmod = frem nsz float %x, %y
+ ret float %fmod
+}
+
+define float @frem2_nsz_uitofp(float %x, i32 %sa) {
+; CHECK-LABEL: frem2_nsz_uitofp:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: lsl w8, w8, w0
+; CHECK-NEXT: ucvtf s1, w8
+; CHECK-NEXT: b fmodf
+entry:
+ %s = shl i32 1, %sa
+ %y = uitofp i32 %s to float
+ %fmod = frem nsz float %x, %y
+ ret float %fmod
+}
+
+define float @frem2_const_sitofp(float %x, i32 %sa) {
+; CHECK-SD-LABEL: frem2_const_sitofp:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov w8, #1 // =0x1
+; CHECK-SD-NEXT: fmov s0, #12.50000000
+; CHECK-SD-NEXT: lsl w8, w8, w0
+; CHECK-SD-NEXT: scvtf s1, w8
+; CHECK-SD-NEXT: b fmodf
+;
+; CHECK-GI-LABEL: frem2_const_sitofp:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: and w9, w0, #0x1f
+; CHECK-GI-NEXT: fmov s0, #12.50000000
+; CHECK-GI-NEXT: lsl w8, w8, w9
+; CHECK-GI-NEXT: scvtf s1, w8
+; CHECK-GI-NEXT: b fmodf
+entry:
+ %sa2 = and i32 %sa, 31
+ %s = shl i32 1, %sa2
+ %y = sitofp i32 %s to float
+ %fmod = frem float 12.50, %y
+ ret float %fmod
+}
+
+define float @frem2_constneg_sitofp(float %x, i32 %sa) {
+; CHECK-SD-LABEL: frem2_constneg_sitofp:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov w8, #1 // =0x1
+; CHECK-SD-NEXT: fmov s0, #-12.50000000
+; CHECK-SD-NEXT: lsl w8, w8, w0
+; CHECK-SD-NEXT: scvtf s1, w8
+; CHECK-SD-NEXT: b fmodf
+;
+; CHECK-GI-LABEL: frem2_constneg_sitofp:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: and w9, w0, #0x1f
+; CHECK-GI-NEXT: fmov s0, #-12.50000000
+; CHECK-GI-NEXT: lsl w8, w8, w9
+; CHECK-GI-NEXT: scvtf s1, w8
+; CHECK-GI-NEXT: b fmodf
+entry:
+ %sa2 = and i32 %sa, 31
+ %s = shl i32 1, %sa2
+ %y = sitofp i32 %s to float
+ %fmod = frem float -12.50, %y
+ ret float %fmod
+}
>From affe0e510ee4e9c92b620ce65c5e0fc4b1a5e273 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Tue, 7 May 2024 13:50:50 +0100
Subject: [PATCH 2/2] [DAG] Lower frem of power-2 using div/trunk/mul+sub.
If we are lowering a frem and the divisor is known to me an integer power-2, we
can use the formula 'frem = x - trunc(x / d) * d'. This avoids the more
expensive call to fmod. The results are identical as fmod so long as d is a
power-2 (so the mul does not round incorrectly), and the sign of the return is
either always positive or signed-zero not important (nsz).
Unfortunately Alive2 does not handle this well at the moment. I was using
exhaustive checking to test this, hopefully I didn't make a mistake in it
(https://gist.github.com/davemgreen/6078015f30d3bacd1e9572f8db5d4b64).
I found this in cpythons implementation of float_pow. I currently added it as a
DAG combine for frem with power-2 fp constants, with some extra utility
functions for checking if a floating-point value is known non-negative or a
integer power-2.
---
llvm/include/llvm/CodeGen/SelectionDAG.h | 8 +
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 17 +-
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 17 +
llvm/test/CodeGen/AArch64/frem-power2.ll | 371 ++++++++++--------
4 files changed, 258 insertions(+), 155 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 4b1b58d4af0bb..bc4200a68f2d8 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1996,6 +1996,10 @@ class SelectionDAG {
/// is set.
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth = 0) const;
+ /// Test if the given _fp_ value is known to be an integer power-of-2, either
+ /// positive or negative.
+ bool isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth = 0) const;
+
/// Return the number of times the sign bit of the register is replicated into
/// the other bits. We know that at least 1 bit is always equal to the sign
/// bit (itself), but other cases can give us information. For example,
@@ -2111,6 +2115,10 @@ class SelectionDAG {
/// Test whether the given SDValue is known to contain non-zero value(s).
bool isKnownNeverZero(SDValue Op, unsigned Depth = 0) const;
+ /// Test whether the given float value is known to not be negative. 0.0 is
+ /// considered non-negative, -0.0 is considered negative.
+ bool isKnownNonNegativeFP(SDValue Op) const;
+
/// Test whether two SDValues are known to compare equal. This
/// is true if they are the same value, or if one is negative zero and the
/// other positive zero.
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 05ab6e2e48206..38a3eb3abb2e2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17264,17 +17264,32 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
EVT VT = N->getValueType(0);
SDNodeFlags Flags = N->getFlags();
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+ SDLoc DL(N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
// fold (frem c1, c2) -> fmod(c1,c2)
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1}))
return C;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
+ // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
+ // power of 2.
+ if (DAG.isKnownToBeAPowerOfTwoFP(N1) &&
+ (Flags.hasNoSignedZeros() || DAG.isKnownNonNegativeFP(N0)) &&
+ !TLI.isOperationLegal(ISD::FREM, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FMUL, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FDIV, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FTRUNC, VT)) {
+ SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
+ SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
+ SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
+ }
+
return SDValue();
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index eef5acd032345..af5a3c8fde3a7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4373,6 +4373,16 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const {
return false;
}
+bool SelectionDAG::isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth) const {
+ if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Val, true))
+ return C1->getValueAPF().getExactLog2Abs() >= 0;
+
+ if (Val.getOpcode() == ISD::UINT_TO_FP || Val.getOpcode() == ISD::SINT_TO_FP)
+ return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1);
+
+ return false;
+}
+
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
EVT VT = Op.getValueType();
@@ -5555,6 +5565,13 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
return computeKnownBits(Op, Depth).isNonZero();
}
+bool SelectionDAG::isKnownNonNegativeFP(SDValue Op) const {
+ if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Op, true))
+ return !C1->isNegative();
+
+ return Op.getOpcode() == ISD::FABS;
+}
+
bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
// Check the obvious case.
if (A == B) return true;
diff --git a/llvm/test/CodeGen/AArch64/frem-power2.ll b/llvm/test/CodeGen/AArch64/frem-power2.ll
index 8efcca3e5f132..e885adf0c2bf9 100644
--- a/llvm/test/CodeGen/AArch64/frem-power2.ll
+++ b/llvm/test/CodeGen/AArch64/frem-power2.ll
@@ -13,21 +13,40 @@ entry:
}
define float @frem2_nsz(float %x) {
-; CHECK-LABEL: frem2_nsz:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s1, #2.00000000
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem2_nsz:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s1, #2.00000000
+; CHECK-SD-NEXT: fdiv s1, s0, s1
+; CHECK-SD-NEXT: frintz s1, s1
+; CHECK-SD-NEXT: fadd s1, s1, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem2_nsz:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov s1, #2.00000000
+; CHECK-GI-NEXT: b fmodf
entry:
%fmod = frem nsz float %x, 2.0
ret float %fmod
}
define float @frem2_abs(float %x) {
-; CHECK-LABEL: frem2_abs:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fabs s0, s0
-; CHECK-NEXT: fmov s1, #2.00000000
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem2_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: fmov s1, #2.00000000
+; CHECK-SD-NEXT: fdiv s1, s0, s1
+; CHECK-SD-NEXT: frintz s1, s1
+; CHECK-SD-NEXT: fadd s1, s1, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem2_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: fmov s1, #2.00000000
+; CHECK-GI-NEXT: b fmodf
entry:
%a = tail call float @llvm.fabs.f32(float %x)
%fmod = frem float %a, 2.0
@@ -37,14 +56,11 @@ entry:
define half @hrem2_nsz(half %x) {
; CHECK-SD-LABEL: hrem2_nsz:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
-; CHECK-SD-NEXT: .cfi_offset w30, -16
-; CHECK-SD-NEXT: fcvt s0, h0
-; CHECK-SD-NEXT: fmov s1, #2.00000000
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: fcvt h0, s0
-; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: fmov h1, #2.00000000
+; CHECK-SD-NEXT: fdiv h1, h0, h1
+; CHECK-SD-NEXT: frintz h1, h1
+; CHECK-SD-NEXT: fadd h1, h1, h1
+; CHECK-SD-NEXT: fsub h0, h0, h1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: hrem2_nsz:
@@ -65,10 +81,19 @@ entry:
}
define double @drem2_nsz(double %x) {
-; CHECK-LABEL: drem2_nsz:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov d1, #2.00000000
-; CHECK-NEXT: b fmod
+; CHECK-SD-LABEL: drem2_nsz:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov d1, #2.00000000
+; CHECK-SD-NEXT: fdiv d1, d0, d1
+; CHECK-SD-NEXT: frintz d1, d1
+; CHECK-SD-NEXT: fadd d1, d1, d1
+; CHECK-SD-NEXT: fsub d0, d0, d1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: drem2_nsz:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov d1, #2.00000000
+; CHECK-GI-NEXT: b fmod
entry:
%fmod = frem nsz double %x, 2.0
ret double %fmod
@@ -95,10 +120,16 @@ entry:
}
define float @frem1_nsz(float %x) {
-; CHECK-LABEL: frem1_nsz:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s1, #1.00000000
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem1_nsz:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: frintz s1, s0
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem1_nsz:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov s1, #1.00000000
+; CHECK-GI-NEXT: b fmodf
entry:
%fmod = frem nsz float %x, 1.0
ret float %fmod
@@ -115,21 +146,40 @@ entry:
}
define float @fremm2_nsz(float %x) {
-; CHECK-LABEL: fremm2_nsz:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s1, #-2.00000000
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: fremm2_nsz:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s1, #-2.00000000
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fremm2_nsz:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov s1, #-2.00000000
+; CHECK-GI-NEXT: b fmodf
entry:
%fmod = frem nsz float %x, -2.0
ret float %fmod
}
define float @frem4_abs(float %x) {
-; CHECK-LABEL: frem4_abs:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fabs s0, s0
-; CHECK-NEXT: fmov s1, #4.00000000
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem4_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: fmov s1, #4.00000000
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem4_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: fmov s1, #4.00000000
+; CHECK-GI-NEXT: b fmodf
entry:
%a = tail call float @llvm.fabs.f32(float %x)
%fmod = frem float %a, 4.0
@@ -137,11 +187,21 @@ entry:
}
define float @frem16_abs(float %x) {
-; CHECK-LABEL: frem16_abs:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fabs s0, s0
-; CHECK-NEXT: fmov s1, #16.00000000
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem16_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: fmov s1, #16.00000000
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem16_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: fmov s1, #16.00000000
+; CHECK-GI-NEXT: b fmodf
entry:
%a = tail call float @llvm.fabs.f32(float %x)
%fmod = frem float %a, 16.0
@@ -149,12 +209,23 @@ entry:
}
define float @frem4294967296_abs(float %x) {
-; CHECK-LABEL: frem4294967296_abs:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fabs s0, s0
-; CHECK-NEXT: mov w8, #1333788672 // =0x4f800000
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem4294967296_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: mov w8, #1333788672 // =0x4f800000
+; CHECK-SD-NEXT: fmov s1, w8
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem4294967296_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: mov w8, #1333788672 // =0x4f800000
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: b fmodf
entry:
%a = tail call float @llvm.fabs.f32(float %x)
%fmod = frem float %a, 4294967296.0
@@ -162,12 +233,23 @@ entry:
}
define float @frem1152921504606846976_abs(float %x) {
-; CHECK-LABEL: frem1152921504606846976_abs:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fabs s0, s0
-; CHECK-NEXT: mov w8, #1568669696 // =0x5d800000
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem1152921504606846976_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: mov w8, #1568669696 // =0x5d800000
+; CHECK-SD-NEXT: fmov s1, w8
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem1152921504606846976_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: mov w8, #1568669696 // =0x5d800000
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: b fmodf
entry:
%a = tail call float @llvm.fabs.f32(float %x)
%fmod = frem float %a, 1152921504606846976.0
@@ -175,12 +257,23 @@ entry:
}
define float @frem4611686018427387904_abs(float %x) {
-; CHECK-LABEL: frem4611686018427387904_abs:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fabs s0, s0
-; CHECK-NEXT: mov w8, #1585446912 // =0x5e800000
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem4611686018427387904_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: mov w8, #1585446912 // =0x5e800000
+; CHECK-SD-NEXT: fmov s1, w8
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem4611686018427387904_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: mov w8, #1585446912 // =0x5e800000
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: b fmodf
entry:
%a = tail call float @llvm.fabs.f32(float %x)
%fmod = frem float %a, 4611686018427387904.0
@@ -188,11 +281,21 @@ entry:
}
define float @frem9223372036854775808_abs(float %x) {
-; CHECK-LABEL: frem9223372036854775808_abs:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fabs s0, s0
-; CHECK-NEXT: movi v1.2s, #95, lsl #24
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem9223372036854775808_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi v1.2s, #95, lsl #24
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem9223372036854775808_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: movi v1.2s, #95, lsl #24
+; CHECK-GI-NEXT: b fmodf
entry:
%a = tail call float @llvm.fabs.f32(float %x)
%fmod = frem float %a, 9223372036854775808.0
@@ -202,42 +305,11 @@ entry:
define <4 x float> @frem2_nsz_vec(<4 x float> %x) {
; CHECK-SD-LABEL: frem2_nsz_vec:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: sub sp, sp, #48
-; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
-; CHECK-SD-NEXT: .cfi_offset w30, -16
-; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov s0, v0.s[1]
-; CHECK-SD-NEXT: fmov s1, #2.00000000
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: fmov s1, #2.00000000
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT: fmov s1, #2.00000000
-; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov s0, v0.s[2]
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: mov v1.s[2], v0.s[0]
-; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov s0, v0.s[3]
-; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: fmov s1, #2.00000000
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v1.s[3], v0.s[0]
-; CHECK-SD-NEXT: mov v0.16b, v1.16b
-; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: movi v1.4s, #64, lsl #24
+; CHECK-SD-NEXT: fdiv v1.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: frintz v1.4s, v1.4s
+; CHECK-SD-NEXT: fadd v1.4s, v1.4s, v1.4s
+; CHECK-SD-NEXT: fsub v0.4s, v0.4s, v1.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem2_nsz_vec:
@@ -292,48 +364,13 @@ entry:
define <4 x float> @frem1152921504606846976_absv(<4 x float> %x) {
; CHECK-SD-LABEL: frem1152921504606846976_absv:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: sub sp, sp, #48
-; CHECK-SD-NEXT: str d8, [sp, #32] // 8-byte Folded Spill
-; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill
-; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
-; CHECK-SD-NEXT: .cfi_offset w30, -8
-; CHECK-SD-NEXT: .cfi_offset b8, -16
-; CHECK-SD-NEXT: fabs v0.4s, v0.4s
; CHECK-SD-NEXT: mov w8, #1568669696 // =0x5d800000
-; CHECK-SD-NEXT: fmov s8, w8
-; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov s0, v0.s[1]
-; CHECK-SD-NEXT: fmov s1, s8
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: fmov s1, s8
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT: fmov s1, s8
-; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov s0, v0.s[2]
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: mov v1.s[2], v0.s[0]
-; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov s0, v0.s[3]
-; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: fmov s1, s8
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-SD-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v1.s[3], v0.s[0]
-; CHECK-SD-NEXT: mov v0.16b, v1.16b
-; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: fabs v0.4s, v0.4s
+; CHECK-SD-NEXT: dup v1.4s, w8
+; CHECK-SD-NEXT: fdiv v2.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: frintz v2.4s, v2.4s
+; CHECK-SD-NEXT: fmul v1.4s, v2.4s, v1.4s
+; CHECK-SD-NEXT: fsub v0.4s, v0.4s, v1.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem1152921504606846976_absv:
@@ -391,12 +428,23 @@ entry:
}
define float @frem2_nsz_sitofp(float %x, i32 %sa) {
-; CHECK-LABEL: frem2_nsz_sitofp:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #1 // =0x1
-; CHECK-NEXT: lsl w8, w8, w0
-; CHECK-NEXT: scvtf s1, w8
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem2_nsz_sitofp:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov w8, #1 // =0x1
+; CHECK-SD-NEXT: lsl w8, w8, w0
+; CHECK-SD-NEXT: scvtf s1, w8
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem2_nsz_sitofp:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: lsl w8, w8, w0
+; CHECK-GI-NEXT: scvtf s1, w8
+; CHECK-GI-NEXT: b fmodf
entry:
%s = shl i32 1, %sa
%y = sitofp i32 %s to float
@@ -405,12 +453,23 @@ entry:
}
define float @frem2_nsz_uitofp(float %x, i32 %sa) {
-; CHECK-LABEL: frem2_nsz_uitofp:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #1 // =0x1
-; CHECK-NEXT: lsl w8, w8, w0
-; CHECK-NEXT: ucvtf s1, w8
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem2_nsz_uitofp:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov w8, #1 // =0x1
+; CHECK-SD-NEXT: lsl w8, w8, w0
+; CHECK-SD-NEXT: ucvtf s1, w8
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem2_nsz_uitofp:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: lsl w8, w8, w0
+; CHECK-GI-NEXT: ucvtf s1, w8
+; CHECK-GI-NEXT: b fmodf
entry:
%s = shl i32 1, %sa
%y = uitofp i32 %s to float
@@ -422,10 +481,14 @@ define float @frem2_const_sitofp(float %x, i32 %sa) {
; CHECK-SD-LABEL: frem2_const_sitofp:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: mov w8, #1 // =0x1
-; CHECK-SD-NEXT: fmov s0, #12.50000000
+; CHECK-SD-NEXT: fmov s1, #12.50000000
; CHECK-SD-NEXT: lsl w8, w8, w0
-; CHECK-SD-NEXT: scvtf s1, w8
-; CHECK-SD-NEXT: b fmodf
+; CHECK-SD-NEXT: scvtf s0, w8
+; CHECK-SD-NEXT: fdiv s2, s1, s0
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s0, s2, s0
+; CHECK-SD-NEXT: fsub s0, s1, s0
+; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem2_const_sitofp:
; CHECK-GI: // %bb.0: // %entry
More information about the llvm-commits
mailing list