[llvm] [DAG] Lower frem of power-2 using div/trunc/mul+sub (PR #91148)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon May 6 01:10:23 PDT 2024
================
@@ -0,0 +1,383 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+define float @frem2(float %x) {
+; CHECK-LABEL: frem2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s1, #2.00000000
+; CHECK-NEXT: b fmodf
+entry:
+ %fmod = frem float %x, 2.0
+ ret float %fmod
+}
+
+define float @frem2_nsz(float %x) {
+; CHECK-SD-LABEL: frem2_nsz:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s1, #2.00000000
+; CHECK-SD-NEXT: fdiv s1, s0, s1
+; CHECK-SD-NEXT: frintz s1, s1
+; CHECK-SD-NEXT: fadd s1, s1, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem2_nsz:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov s1, #2.00000000
+; CHECK-GI-NEXT: b fmodf
+entry:
+ %fmod = frem nsz float %x, 2.0
+ ret float %fmod
+}
+
+define float @frem2_abs(float %x) {
+; CHECK-SD-LABEL: frem2_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: fmov s1, #2.00000000
+; CHECK-SD-NEXT: fdiv s1, s0, s1
+; CHECK-SD-NEXT: frintz s1, s1
+; CHECK-SD-NEXT: fadd s1, s1, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem2_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: fmov s1, #2.00000000
+; CHECK-GI-NEXT: b fmodf
+entry:
+ %a = tail call float @llvm.fabs.f32(float %x)
+ %fmod = frem float %a, 2.0
+ ret float %fmod
+}
+
+define half @hrem2_nsz(half %x) {
+; CHECK-SD-LABEL: hrem2_nsz:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov h1, #2.00000000
+; CHECK-SD-NEXT: fdiv h1, h0, h1
+; CHECK-SD-NEXT: frintz h1, h1
+; CHECK-SD-NEXT: fadd h1, h1, h1
+; CHECK-SD-NEXT: fsub h0, h0, h1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: hrem2_nsz:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: fmov h1, #2.00000000
+; CHECK-GI-NEXT: fcvt s0, h0
+; CHECK-GI-NEXT: fcvt s1, h1
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: fcvt h0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+entry:
+ %fmod = frem nsz half %x, 2.0
+ ret half %fmod
+}
+
+define double @drem2_nsz(double %x) {
+; CHECK-SD-LABEL: drem2_nsz:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov d1, #2.00000000
+; CHECK-SD-NEXT: fdiv d1, d0, d1
+; CHECK-SD-NEXT: frintz d1, d1
+; CHECK-SD-NEXT: fadd d1, d1, d1
+; CHECK-SD-NEXT: fsub d0, d0, d1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: drem2_nsz:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov d1, #2.00000000
+; CHECK-GI-NEXT: b fmod
+entry:
+ %fmod = frem nsz double %x, 2.0
+ ret double %fmod
+}
+
+define float @frem3_nsz(float %x) {
+; CHECK-LABEL: frem3_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s1, #3.00000000
+; CHECK-NEXT: b fmodf
+entry:
+ %fmod = frem nsz float %x, 3.0
+ ret float %fmod
+}
+
+define float @frem05_nsz(float %x) {
+; CHECK-LABEL: frem05_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s1, #0.50000000
+; CHECK-NEXT: b fmodf
+entry:
+ %fmod = frem nsz float %x, 0.5
+ ret float %fmod
+}
+
+define float @fremm2_nsz(float %x) {
+; CHECK-LABEL: fremm2_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s1, #-2.00000000
+; CHECK-NEXT: b fmodf
+entry:
+ %fmod = frem nsz float %x, -2.0
+ ret float %fmod
+}
+
+define float @frem4_abs(float %x) {
+; CHECK-SD-LABEL: frem4_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: fmov s1, #4.00000000
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem4_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: fmov s1, #4.00000000
+; CHECK-GI-NEXT: b fmodf
+entry:
+ %a = tail call float @llvm.fabs.f32(float %x)
+ %fmod = frem float %a, 4.0
+ ret float %fmod
+}
+
+define float @frem16_abs(float %x) {
+; CHECK-SD-LABEL: frem16_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: fmov s1, #16.00000000
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem16_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: fmov s1, #16.00000000
+; CHECK-GI-NEXT: b fmodf
+entry:
+ %a = tail call float @llvm.fabs.f32(float %x)
+ %fmod = frem float %a, 16.0
+ ret float %fmod
+}
+
+define float @frem4294967296_abs(float %x) {
+; CHECK-SD-LABEL: frem4294967296_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: mov w8, #1333788672 // =0x4f800000
+; CHECK-SD-NEXT: fmov s1, w8
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem4294967296_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: mov w8, #1333788672 // =0x4f800000
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: b fmodf
+entry:
+ %a = tail call float @llvm.fabs.f32(float %x)
+ %fmod = frem float %a, 4294967296.0
+ ret float %fmod
+}
+
+define float @frem1152921504606846976_abs(float %x) {
+; CHECK-SD-LABEL: frem1152921504606846976_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: mov w8, #1568669696 // =0x5d800000
+; CHECK-SD-NEXT: fmov s1, w8
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem1152921504606846976_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: mov w8, #1568669696 // =0x5d800000
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: b fmodf
+entry:
+ %a = tail call float @llvm.fabs.f32(float %x)
+ %fmod = frem float %a, 1152921504606846976.0
+ ret float %fmod
+}
+
+define float @frem4611686018427387904_abs(float %x) {
+; CHECK-SD-LABEL: frem4611686018427387904_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: mov w8, #1585446912 // =0x5e800000
+; CHECK-SD-NEXT: fmov s1, w8
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem4611686018427387904_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: mov w8, #1585446912 // =0x5e800000
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: b fmodf
+entry:
+ %a = tail call float @llvm.fabs.f32(float %x)
+ %fmod = frem float %a, 4611686018427387904.0
+ ret float %fmod
+}
+
+define float @frem9223372036854775808_abs(float %x) {
+; CHECK-LABEL: frem9223372036854775808_abs:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fabs s0, s0
+; CHECK-NEXT: movi v1.2s, #95, lsl #24
+; CHECK-NEXT: b fmodf
+entry:
+ %a = tail call float @llvm.fabs.f32(float %x)
+ %fmod = frem float %a, 9223372036854775808.0
+ ret float %fmod
+}
+
+define <4 x float> @frem2_nsz_vec(<4 x float> %x) {
+; CHECK-SD-LABEL: frem2_nsz_vec:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi v1.4s, #64, lsl #24
+; CHECK-SD-NEXT: fdiv v1.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: frintz v1.4s, v1.4s
+; CHECK-SD-NEXT: fadd v1.4s, v1.4s, v1.4s
+; CHECK-SD-NEXT: fsub v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem2_nsz_vec:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub sp, sp, #80
+; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill
+; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 80
+; CHECK-GI-NEXT: .cfi_offset w30, -8
+; CHECK-GI-NEXT: .cfi_offset b8, -16
+; CHECK-GI-NEXT: .cfi_offset b9, -24
+; CHECK-GI-NEXT: .cfi_offset b10, -32
+; CHECK-GI-NEXT: fmov s1, #2.00000000
+; CHECK-GI-NEXT: mov s8, v0.s[1]
+; CHECK-GI-NEXT: mov s9, v0.s[2]
+; CHECK-GI-NEXT: mov s10, v0.s[3]
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT: fmov s1, #2.00000000
+; CHECK-GI-NEXT: fmov s0, s8
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: fmov s1, #2.00000000
+; CHECK-GI-NEXT: fmov s0, s9
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: fmov s1, #2.00000000
+; CHECK-GI-NEXT: fmov s0, s10
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload
+; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload
+; CHECK-GI-NEXT: mov v1.s[1], v2.s[0]
+; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v1.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-GI-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NEXT: add sp, sp, #80
+; CHECK-GI-NEXT: ret
+entry:
+ %fmod = frem nsz <4 x float> %x, <float 2.0, float 2.0, float 2.0, float 2.0>
+ ret <4 x float> %fmod
+}
+
+define <4 x float> @frem1152921504606846976_absv(<4 x float> %x) {
+; CHECK-SD-LABEL: frem1152921504606846976_absv:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov w8, #1568669696 // =0x5d800000
+; CHECK-SD-NEXT: fabs v0.4s, v0.4s
+; CHECK-SD-NEXT: dup v1.4s, w8
+; CHECK-SD-NEXT: fdiv v2.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: frintz v2.4s, v2.4s
+; CHECK-SD-NEXT: fmul v1.4s, v2.4s, v1.4s
+; CHECK-SD-NEXT: fsub v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem1152921504606846976_absv:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub sp, sp, #96
+; CHECK-GI-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 96
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: .cfi_offset b8, -24
+; CHECK-GI-NEXT: .cfi_offset b9, -32
+; CHECK-GI-NEXT: .cfi_offset b10, -40
+; CHECK-GI-NEXT: .cfi_offset b11, -48
+; CHECK-GI-NEXT: mov w8, #1568669696 // =0x5d800000
+; CHECK-GI-NEXT: fabs v0.4s, v0.4s
+; CHECK-GI-NEXT: fmov s11, w8
+; CHECK-GI-NEXT: fmov s1, s11
+; CHECK-GI-NEXT: mov s8, v0.s[1]
+; CHECK-GI-NEXT: mov s9, v0.s[2]
+; CHECK-GI-NEXT: mov s10, v0.s[3]
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT: fmov s1, s11
+; CHECK-GI-NEXT: fmov s0, s8
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: fmov s1, s11
+; CHECK-GI-NEXT: fmov s0, s9
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: fmov s1, s11
+; CHECK-GI-NEXT: fmov s0, s10
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
+; CHECK-GI-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v1.s[1], v2.s[0]
+; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v1.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-GI-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NEXT: add sp, sp, #96
+; CHECK-GI-NEXT: ret
+entry:
+ %a = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
+ %fmod = frem <4 x float> %a, <float 1152921504606846976.0, float 1152921504606846976.0, float 1152921504606846976.0, float 1152921504606846976.0>
+ ret <4 x float> %fmod
+}
----------------
arsenm wrote:
Test the sitofp / uitofp cases
https://github.com/llvm/llvm-project/pull/91148
More information about the llvm-commits
mailing list