[llvm] [SDAG] Reverse the canonicalization of isInf/isNanOrInf (PR #81404)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 11 18:21:41 PST 2024
https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/81404
>From 644a3f31ccde5e05c0e30a65db5e3cfc78995c1f Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 12 Feb 2024 10:11:37 +0800
Subject: [PATCH 1/2] [SDAG] Add pre-commit tests. NFC.
---
llvm/test/CodeGen/AArch64/fpclass-test.ll | 234 ++++++++++++++++++
llvm/test/CodeGen/RISCV/fpclass-test.ll | 245 +++++++++++++++++++
llvm/test/CodeGen/X86/fpclass-test.ll | 274 ++++++++++++++++++++++
3 files changed, 753 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/fpclass-test.ll
create mode 100644 llvm/test/CodeGen/RISCV/fpclass-test.ll
create mode 100644 llvm/test/CodeGen/X86/fpclass-test.ll
diff --git a/llvm/test/CodeGen/AArch64/fpclass-test.ll b/llvm/test/CodeGen/AArch64/fpclass-test.ll
new file mode 100644
index 00000000000000..0a7f11149494fb
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fpclass-test.ll
@@ -0,0 +1,234 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s
+
+define i1 @test_is_inf_or_nan(double %arg) {
+; CHECK-LABEL: test_is_inf_or_nan:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs d0, d0
+; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
+; CHECK-NEXT: fmov d1, x8
+; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: cset w8, eq
+; CHECK-NEXT: csinc w0, w8, wzr, vc
+; CHECK-NEXT: ret
+ %abs = tail call double @llvm.fabs.f64(double %arg)
+ %ret = fcmp ueq double %abs, 0x7FF0000000000000
+ ret i1 %ret
+}
+
+define i1 @test_is_not_inf_or_nan(double %arg) {
+; CHECK-LABEL: test_is_not_inf_or_nan:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs d0, d0
+; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
+; CHECK-NEXT: fmov d1, x8
+; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: cset w8, mi
+; CHECK-NEXT: csinc w0, w8, wzr, le
+; CHECK-NEXT: ret
+ %abs = tail call double @llvm.fabs.f64(double %arg)
+ %ret = fcmp one double %abs, 0x7FF0000000000000
+ ret i1 %ret
+}
+
+define i1 @test_is_inf(double %arg) {
+; CHECK-LABEL: test_is_inf:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs d0, d0
+; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
+; CHECK-NEXT: fmov d1, x8
+; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %abs = tail call double @llvm.fabs.f64(double %arg)
+ %ret = fcmp oeq double %abs, 0x7FF0000000000000
+ ret i1 %ret
+}
+
+define i1 @test_is_not_inf(double %arg) {
+; CHECK-LABEL: test_is_not_inf:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs d0, d0
+; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
+; CHECK-NEXT: fmov d1, x8
+; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %abs = tail call double @llvm.fabs.f64(double %arg)
+ %ret = fcmp une double %abs, 0x7FF0000000000000
+ ret i1 %ret
+}
+
+define <vscale x 2 x i1> @test_vec_is_inf_or_nan(<vscale x 2 x double> %arg) {
+; CHECK-LABEL: test_vec_is_inf_or_nan:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: fabs z0.d, p0/m, z0.d
+; CHECK-NEXT: fcmuo p1.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
+; CHECK-NEXT: ret
+ %abs = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %arg)
+ %ret = fcmp ueq <vscale x 2 x double> %abs, splat (double 0x7FF0000000000000)
+ ret <vscale x 2 x i1> %ret
+}
+
+define <vscale x 2 x i1> @test_vec_is_not_inf_or_nan(<vscale x 2 x double> %arg) {
+; CHECK-LABEL: test_vec_is_not_inf_or_nan:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: fabs z0.d, p0/m, z0.d
+; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z0.d
+; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
+; CHECK-NEXT: ret
+ %abs = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %arg)
+ %ret = fcmp one <vscale x 2 x double> %abs, splat (double 0x7FF0000000000000)
+ ret <vscale x 2 x i1> %ret
+}
+
+define <vscale x 2 x i1> @test_vec_is_inf(<vscale x 2 x double> %arg) {
+; CHECK-LABEL: test_vec_is_inf:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: fabs z0.d, p0/m, z0.d
+; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: ret
+ %abs = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %arg)
+ %ret = fcmp oeq <vscale x 2 x double> %abs, splat (double 0x7FF0000000000000)
+ ret <vscale x 2 x i1> %ret
+}
+
+define <vscale x 2 x i1> @test_vec_is_not_inf(<vscale x 2 x double> %arg) {
+; CHECK-LABEL: test_vec_is_not_inf:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: fabs z0.d, p0/m, z0.d
+; CHECK-NEXT: fcmne p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: ret
+ %abs = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %arg)
+ %ret = fcmp une <vscale x 2 x double> %abs, splat (double 0x7FF0000000000000)
+ ret <vscale x 2 x i1> %ret
+}
+
+define i1 @test_fp128_is_inf_or_nan(fp128 %arg) {
+; CHECK-LABEL: test_fp128_is_inf_or_nan:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: str q0, [sp, #32]
+; CHECK-NEXT: ldrb w8, [sp, #47]
+; CHECK-NEXT: and w8, w8, #0x7f
+; CHECK-NEXT: strb w8, [sp, #47]
+; CHECK-NEXT: adrp x8, .LCPI8_0
+; CHECK-NEXT: ldr q0, [sp, #32]
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_0]
+; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: bl __eqtf2
+; CHECK-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
+; CHECK-NEXT: mov w19, w0
+; CHECK-NEXT: bl __unordtf2
+; CHECK-NEXT: cmp w0, #0
+; CHECK-NEXT: ccmp w19, #0, #4, eq
+; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+ %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+ %ret = fcmp ueq fp128 %abs, 0xL00000000000000007FFF000000000000
+ ret i1 %ret
+}
+
+define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) {
+; CHECK-LABEL: test_fp128_is_not_inf_or_nan:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: str q0, [sp, #32]
+; CHECK-NEXT: ldrb w8, [sp, #47]
+; CHECK-NEXT: and w8, w8, #0x7f
+; CHECK-NEXT: strb w8, [sp, #47]
+; CHECK-NEXT: adrp x8, .LCPI9_0
+; CHECK-NEXT: ldr q0, [sp, #32]
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_0]
+; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: bl __eqtf2
+; CHECK-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
+; CHECK-NEXT: mov w19, w0
+; CHECK-NEXT: bl __unordtf2
+; CHECK-NEXT: cmp w0, #0
+; CHECK-NEXT: ccmp w19, #0, #4, eq
+; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+ %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+ %ret = fcmp one fp128 %abs, 0xL00000000000000007FFF000000000000
+ ret i1 %ret
+}
+
+define i1 @test_fp128_is_inf(fp128 %arg) {
+; CHECK-LABEL: test_fp128_is_inf:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: str q0, [sp]
+; CHECK-NEXT: ldrb w8, [sp, #15]
+; CHECK-NEXT: and w8, w8, #0x7f
+; CHECK-NEXT: strb w8, [sp, #15]
+; CHECK-NEXT: adrp x8, .LCPI10_0
+; CHECK-NEXT: ldr q0, [sp]
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI10_0]
+; CHECK-NEXT: bl __eqtf2
+; CHECK-NEXT: cmp w0, #0
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: ret
+ %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+ %ret = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000
+ ret i1 %ret
+}
+
+define i1 @test_fp128_is_not_inf(fp128 %arg) {
+; CHECK-LABEL: test_fp128_is_not_inf:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: str q0, [sp]
+; CHECK-NEXT: ldrb w8, [sp, #15]
+; CHECK-NEXT: and w8, w8, #0x7f
+; CHECK-NEXT: strb w8, [sp, #15]
+; CHECK-NEXT: adrp x8, .LCPI11_0
+; CHECK-NEXT: ldr q0, [sp]
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI11_0]
+; CHECK-NEXT: bl __netf2
+; CHECK-NEXT: cmp w0, #0
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: ret
+ %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+ %ret = fcmp une fp128 %abs, 0xL00000000000000007FFF000000000000
+ ret i1 %ret
+}
diff --git a/llvm/test/CodeGen/RISCV/fpclass-test.ll b/llvm/test/CodeGen/RISCV/fpclass-test.ll
new file mode 100644
index 00000000000000..90b9c0d8bfed8a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/fpclass-test.ll
@@ -0,0 +1,245 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv64 -mattr=+d,+v < %s | FileCheck %s
+
+define i1 @test_is_inf_or_nan(double %arg) {
+; CHECK-LABEL: test_is_inf_or_nan:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fclass.d a0, fa0
+; CHECK-NEXT: andi a0, a0, 897
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: ret
+ %abs = tail call double @llvm.fabs.f64(double %arg)
+ %ret = fcmp ueq double %abs, 0x7FF0000000000000
+ ret i1 %ret
+}
+
+define i1 @test_is_not_inf_or_nan(double %arg) {
+; CHECK-LABEL: test_is_not_inf_or_nan:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI1_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI1_0)(a0)
+; CHECK-NEXT: fabs.d fa4, fa0
+; CHECK-NEXT: flt.d a0, fa4, fa5
+; CHECK-NEXT: ret
+ %abs = tail call double @llvm.fabs.f64(double %arg)
+ %ret = fcmp one double %abs, 0x7FF0000000000000
+ ret i1 %ret
+}
+
+define i1 @test_is_inf(double %arg) {
+; CHECK-LABEL: test_is_inf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fclass.d a0, fa0
+; CHECK-NEXT: andi a0, a0, 129
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: ret
+ %abs = tail call double @llvm.fabs.f64(double %arg)
+ %ret = fcmp oeq double %abs, 0x7FF0000000000000
+ ret i1 %ret
+}
+
+define i1 @test_is_not_inf(double %arg) {
+; CHECK-LABEL: test_is_not_inf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI3_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI3_0)(a0)
+; CHECK-NEXT: fabs.d fa4, fa0
+; CHECK-NEXT: feq.d a0, fa4, fa5
+; CHECK-NEXT: xori a0, a0, 1
+; CHECK-NEXT: ret
+ %abs = tail call double @llvm.fabs.f64(double %arg)
+ %ret = fcmp une double %abs, 0x7FF0000000000000
+ ret i1 %ret
+}
+
+define <vscale x 4 x i1> @test_vec_is_inf_or_nan(<vscale x 4 x double> %arg) {
+; CHECK-LABEL: test_vec_is_inf_or_nan:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI4_0)(a0)
+; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT: vfabs.v v8, v8
+; CHECK-NEXT: vmflt.vf v12, v8, fa5
+; CHECK-NEXT: vmfgt.vf v13, v8, fa5
+; CHECK-NEXT: vmnor.mm v0, v13, v12
+; CHECK-NEXT: ret
+ %abs = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> %arg)
+ %ret = fcmp ueq <vscale x 4 x double> %abs, splat (double 0x7FF0000000000000)
+ ret <vscale x 4 x i1> %ret
+}
+
+define <vscale x 4 x i1> @test_vec_is_not_inf_or_nan(<vscale x 4 x double> %arg) {
+; CHECK-LABEL: test_vec_is_not_inf_or_nan:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI5_0)(a0)
+; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT: vfabs.v v8, v8
+; CHECK-NEXT: vmflt.vf v12, v8, fa5
+; CHECK-NEXT: vmfgt.vf v13, v8, fa5
+; CHECK-NEXT: vmor.mm v0, v13, v12
+; CHECK-NEXT: ret
+ %abs = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> %arg)
+ %ret = fcmp one <vscale x 4 x double> %abs, splat (double 0x7FF0000000000000)
+ ret <vscale x 4 x i1> %ret
+}
+
+define <vscale x 4 x i1> @test_vec_is_inf(<vscale x 4 x double> %arg) {
+; CHECK-LABEL: test_vec_is_inf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI6_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI6_0)(a0)
+; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT: vfabs.v v8, v8
+; CHECK-NEXT: vmfeq.vf v0, v8, fa5
+; CHECK-NEXT: ret
+ %abs = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> %arg)
+ %ret = fcmp oeq <vscale x 4 x double> %abs, splat (double 0x7FF0000000000000)
+ ret <vscale x 4 x i1> %ret
+}
+
+define <vscale x 4 x i1> @test_vec_is_not_inf(<vscale x 4 x double> %arg) {
+; CHECK-LABEL: test_vec_is_not_inf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI7_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI7_0)(a0)
+; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT: vfabs.v v8, v8
+; CHECK-NEXT: vmfne.vf v0, v8, fa5
+; CHECK-NEXT: ret
+ %abs = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> %arg)
+ %ret = fcmp une <vscale x 4 x double> %abs, splat (double 0x7FF0000000000000)
+ ret <vscale x 4 x i1> %ret
+}
+
+define i1 @test_fp128_is_inf_or_nan(fp128 %arg) {
+; CHECK-LABEL: test_fp128_is_inf_or_nan:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -48
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: .cfi_offset s3, -40
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: srli s1, a1, 1
+; CHECK-NEXT: lui s2, 32767
+; CHECK-NEXT: slli s2, s2, 36
+; CHECK-NEXT: mv a1, s1
+; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: mv a3, s2
+; CHECK-NEXT: call __eqtf2
+; CHECK-NEXT: seqz s3, a0
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: mv a1, s1
+; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: mv a3, s2
+; CHECK-NEXT: call __unordtf2
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: or a0, a0, s3
+; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 48
+; CHECK-NEXT: ret
+ %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+ %ret = fcmp ueq fp128 %abs, 0xL00000000000000007FFF000000000000
+ ret i1 %ret
+}
+
+define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) {
+; CHECK-LABEL: test_fp128_is_not_inf_or_nan:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -48
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: .cfi_offset s3, -40
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: srli s1, a1, 1
+; CHECK-NEXT: lui s2, 32767
+; CHECK-NEXT: slli s2, s2, 36
+; CHECK-NEXT: mv a1, s1
+; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: mv a3, s2
+; CHECK-NEXT: call __eqtf2
+; CHECK-NEXT: snez s3, a0
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: mv a1, s1
+; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: mv a3, s2
+; CHECK-NEXT: call __unordtf2
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: and a0, a0, s3
+; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 48
+; CHECK-NEXT: ret
+ %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+ %ret = fcmp one fp128 %abs, 0xL00000000000000007FFF000000000000
+ ret i1 %ret
+}
+
+define i1 @test_fp128_is_inf(fp128 %arg) {
+; CHECK-LABEL: test_fp128_is_inf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: srli a1, a1, 1
+; CHECK-NEXT: lui a3, 32767
+; CHECK-NEXT: slli a3, a3, 36
+; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: call __eqtf2
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+ %ret = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000
+ ret i1 %ret
+}
+
+define i1 @test_fp128_is_not_inf(fp128 %arg) {
+; CHECK-LABEL: test_fp128_is_not_inf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: srli a1, a1, 1
+; CHECK-NEXT: lui a3, 32767
+; CHECK-NEXT: slli a3, a3, 36
+; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: call __netf2
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+ %ret = fcmp une fp128 %abs, 0xL00000000000000007FFF000000000000
+ ret i1 %ret
+}
diff --git a/llvm/test/CodeGen/X86/fpclass-test.ll b/llvm/test/CodeGen/X86/fpclass-test.ll
new file mode 100644
index 00000000000000..7d8a3f37d1b3ee
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fpclass-test.ll
@@ -0,0 +1,274 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s
+
+define i1 @test_is_inf_or_nan(double %arg) {
+; CHECK-LABEL: test_is_inf_or_nan:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: retq
+ %abs = tail call double @llvm.fabs.f64(double %arg)
+ %ret = fcmp ueq double %abs, 0x7FF0000000000000
+ ret i1 %ret
+}
+
+define i1 @test_is_not_inf_or_nan(double %arg) {
+; CHECK-LABEL: test_is_not_inf_or_nan:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %abs = tail call double @llvm.fabs.f64(double %arg)
+ %ret = fcmp one double %abs, 0x7FF0000000000000
+ ret i1 %ret
+}
+
+define i1 @test_is_inf(double %arg) {
+; CHECK-LABEL: test_is_inf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: setae %al
+; CHECK-NEXT: retq
+ %abs = tail call double @llvm.fabs.f64(double %arg)
+ %ret = fcmp oeq double %abs, 0x7FF0000000000000
+ ret i1 %ret
+}
+
+define i1 @test_is_not_inf(double %arg) {
+; CHECK-LABEL: test_is_not_inf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: setb %al
+; CHECK-NEXT: retq
+ %abs = tail call double @llvm.fabs.f64(double %arg)
+ %ret = fcmp une double %abs, 0x7FF0000000000000
+ ret i1 %ret
+}
+
+define <4 x i1> @test_vec_is_inf_or_nan(<4 x double> %arg) {
+; CHECK-LABEL: test_vec_is_inf_or_nan:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN]
+; CHECK-NEXT: vandpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [+Inf,+Inf,+Inf,+Inf]
+; CHECK-NEXT: vcmpeq_uqpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %abs = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %arg)
+ %ret = fcmp ueq <4 x double> %abs, splat (double 0x7FF0000000000000)
+ ret <4 x i1> %ret
+}
+
+define <4 x i1> @test_vec_is_not_inf_or_nan(<4 x double> %arg) {
+; CHECK-LABEL: test_vec_is_not_inf_or_nan:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN]
+; CHECK-NEXT: vandpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [+Inf,+Inf,+Inf,+Inf]
+; CHECK-NEXT: vcmpneq_oqpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %abs = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %arg)
+ %ret = fcmp one <4 x double> %abs, splat (double 0x7FF0000000000000)
+ ret <4 x i1> %ret
+}
+
+define <4 x i1> @test_vec_is_inf(<4 x double> %arg) {
+; CHECK-LABEL: test_vec_is_inf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN]
+; CHECK-NEXT: vandpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [+Inf,+Inf,+Inf,+Inf]
+; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %abs = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %arg)
+ %ret = fcmp oeq <4 x double> %abs, splat (double 0x7FF0000000000000)
+ ret <4 x i1> %ret
+}
+
+define <4 x i1> @test_vec_is_not_inf(<4 x double> %arg) {
+; CHECK-LABEL: test_vec_is_not_inf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN]
+; CHECK-NEXT: vandpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [+Inf,+Inf,+Inf,+Inf]
+; CHECK-NEXT: vcmpneqpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %abs = tail call <4 x double> @llvm.fabs.v4f64(<4 x double> %arg)
+ %ret = fcmp une <4 x double> %abs, splat (double 0x7FF0000000000000)
+ ret <4 x i1> %ret
+}
+
+define i1 @test_fp128_is_inf_or_nan(fp128 %arg) {
+; CHECK-LABEL: test_fp128_is_inf_or_nan:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: subq $16, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset %rbx, -16
+; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [+Inf]
+; CHECK-NEXT: callq __eqtf2 at PLT
+; CHECK-NEXT: testl %eax, %eax
+; CHECK-NEXT: sete %bl
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [+Inf]
+; CHECK-NEXT: callq __unordtf2 at PLT
+; CHECK-NEXT: testl %eax, %eax
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: orb %bl, %al
+; CHECK-NEXT: addq $16, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+ %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+ %ret = fcmp ueq fp128 %abs, 0xL00000000000000007FFF000000000000
+ ret i1 %ret
+}
+
+define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) {
+; CHECK-LABEL: test_fp128_is_not_inf_or_nan:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: subq $16, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset %rbx, -16
+; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [+Inf]
+; CHECK-NEXT: callq __eqtf2 at PLT
+; CHECK-NEXT: testl %eax, %eax
+; CHECK-NEXT: setne %bl
+; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [+Inf]
+; CHECK-NEXT: callq __unordtf2 at PLT
+; CHECK-NEXT: testl %eax, %eax
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: andb %bl, %al
+; CHECK-NEXT: addq $16, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+ %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+ %ret = fcmp one fp128 %abs, 0xL00000000000000007FFF000000000000
+ ret i1 %ret
+}
+
+define i1 @test_fp128_is_inf(fp128 %arg) {
+; CHECK-LABEL: test_fp128_is_inf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [+Inf]
+; CHECK-NEXT: callq __eqtf2 at PLT
+; CHECK-NEXT: testl %eax, %eax
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+ %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+ %ret = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000
+ ret i1 %ret
+}
+
+define i1 @test_fp128_is_not_inf(fp128 %arg) {
+; CHECK-LABEL: test_fp128_is_not_inf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [+Inf]
+; CHECK-NEXT: callq __netf2 at PLT
+; CHECK-NEXT: testl %eax, %eax
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+ %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+ %ret = fcmp une fp128 %abs, 0xL00000000000000007FFF000000000000
+ ret i1 %ret
+}
+
+define i1 @test_x86_fp80_is_inf_or_nan(x86_fp80 %arg) {
+; CHECK-LABEL: test_x86_fp80_is_inf_or_nan:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fabs
+; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; CHECK-NEXT: fxch %st(1)
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: retq
+ %abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg)
+ %ret = fcmp ueq x86_fp80 %abs, 0xK7FFF8000000000000000
+ ret i1 %ret
+}
+
+define i1 @test_x86_fp80_is_not_inf_or_nan(x86_fp80 %arg) {
+; CHECK-LABEL: test_x86_fp80_is_not_inf_or_nan:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fabs
+; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; CHECK-NEXT: fxch %st(1)
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg)
+ %ret = fcmp one x86_fp80 %abs, 0xK7FFF8000000000000000
+ ret i1 %ret
+}
+
+define i1 @test_x86_fp80_is_inf(x86_fp80 %arg) {
+; CHECK-LABEL: test_x86_fp80_is_inf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fabs
+; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; CHECK-NEXT: fxch %st(1)
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: setae %al
+; CHECK-NEXT: retq
+ %abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg)
+ %ret = fcmp oeq x86_fp80 %abs, 0xK7FFF8000000000000000
+ ret i1 %ret
+}
+
+define i1 @test_x86_fp80_is_not_inf(x86_fp80 %arg) {
+; CHECK-LABEL: test_x86_fp80_is_not_inf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fabs
+; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; CHECK-NEXT: fxch %st(1)
+; CHECK-NEXT: fucompi %st(1), %st
+; CHECK-NEXT: fstp %st(0)
+; CHECK-NEXT: setb %al
+; CHECK-NEXT: retq
+ %abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg)
+ %ret = fcmp une x86_fp80 %abs, 0xK7FFF8000000000000000
+ ret i1 %ret
+}
>From 4558a9d9992b5a23222dbabaa3468d04198c04ee Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 12 Feb 2024 10:21:12 +0800
Subject: [PATCH 2/2] [SDAG] Reverse the canonicalization of isInf/isNanOrInf
---
.../SelectionDAG/SelectionDAGBuilder.cpp | 70 +++--
.../SelectionDAG/SelectionDAGBuilder.h | 1 +
llvm/test/CodeGen/AArch64/fpclass-test.ll | 144 +++-------
llvm/test/CodeGen/AArch64/isinf.ll | 22 +-
llvm/test/CodeGen/AMDGPU/fp-classify.ll | 196 +++++++------
llvm/test/CodeGen/AMDGPU/fract-match.ll | 259 +++++++++---------
llvm/test/CodeGen/RISCV/fpclass-test.ll | 156 +++--------
llvm/test/CodeGen/X86/compare-inf.ll | 112 ++++++--
llvm/test/CodeGen/X86/fpclass-test.ll | 198 +++++++------
9 files changed, 562 insertions(+), 596 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 5ce1013f30fd1b..22ceca33195cab 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3467,12 +3467,50 @@ void SelectionDAGBuilder::visitICmp(const User &I) {
setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
}
+SDValue SelectionDAGBuilder::lowerIsFpClass(Value *ClassVal,
+ FPClassTest ClassTest) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
+ SDLoc sdl = getCurSDLoc();
+
+ EVT DestVT =
+ TLI.getValueType(DL, CmpInst::makeCmpResultType(ClassVal->getType()));
+ EVT ArgVT = TLI.getValueType(DL, ClassVal->getType());
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Function &F = MF.getFunction();
+ SDValue Op = getValue(ClassVal);
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(!F.getAttributes().hasFnAttr(llvm::Attribute::StrictFP));
+ // If ISD::IS_FPCLASS should be expanded, do it right now, because the
+ // expansion can use illegal types. Making expansion early allows
+ // legalizing these types prior to selection.
+ if (!TLI.isOperationLegalOrCustom(ISD::IS_FPCLASS, ArgVT))
+ return TLI.expandIS_FPCLASS(DestVT, Op, ClassTest, Flags, sdl, DAG);
+
+ SDValue Check = DAG.getTargetConstant(ClassTest, sdl, MVT::i32);
+ return DAG.getNode(ISD::IS_FPCLASS, sdl, DestVT, {Op, Check}, Flags);
+}
+
void SelectionDAGBuilder::visitFCmp(const User &I) {
FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
- if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
+ if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I)) {
predicate = FC->getPredicate();
- else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+
+ // Reverse the canonicalization if it is a FP class test
+ auto ShouldReverseTransform = [](FPClassTest ClassTest) {
+ return ClassTest == fcInf || ClassTest == (fcInf | fcNan);
+ };
+ auto [ClassVal, ClassTest] =
+ fcmpToClassTest(predicate, *FC->getParent()->getParent(),
+ FC->getOperand(0), FC->getOperand(1));
+ if (ClassVal && (ShouldReverseTransform(ClassTest) ||
+ ShouldReverseTransform(~ClassTest))) {
+ setValue(&I, lowerIsFpClass(ClassVal, ClassTest));
+ return;
+ }
+ } else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
predicate = FCmpInst::Predicate(FC->getPredicate());
+
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
@@ -6666,29 +6704,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DAG.setRoot(Res.getValue(0));
return;
case Intrinsic::is_fpclass: {
- const DataLayout DLayout = DAG.getDataLayout();
- EVT DestVT = TLI.getValueType(DLayout, I.getType());
- EVT ArgVT = TLI.getValueType(DLayout, I.getArgOperand(0)->getType());
- FPClassTest Test = static_cast<FPClassTest>(
- cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
- MachineFunction &MF = DAG.getMachineFunction();
- const Function &F = MF.getFunction();
- SDValue Op = getValue(I.getArgOperand(0));
- SDNodeFlags Flags;
- Flags.setNoFPExcept(
- !F.getAttributes().hasFnAttr(llvm::Attribute::StrictFP));
- // If ISD::IS_FPCLASS should be expanded, do it right now, because the
- // expansion can use illegal types. Making expansion early allows
- // legalizing these types prior to selection.
- if (!TLI.isOperationLegalOrCustom(ISD::IS_FPCLASS, ArgVT)) {
- SDValue Result = TLI.expandIS_FPCLASS(DestVT, Op, Test, Flags, sdl, DAG);
- setValue(&I, Result);
- return;
- }
-
- SDValue Check = DAG.getTargetConstant(Test, sdl, MVT::i32);
- SDValue V = DAG.getNode(ISD::IS_FPCLASS, sdl, DestVT, {Op, Check}, Flags);
- setValue(&I, V);
+ setValue(&I,
+ lowerIsFpClass(
+ I.getArgOperand(0),
+ static_cast<FPClassTest>(
+ cast<ConstantInt>(I.getArgOperand(1))->getZExtValue())));
return;
}
case Intrinsic::get_fpenv: {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 47657313cb6a3b..dfc9369117c79d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -700,6 +700,7 @@ class SelectionDAGBuilder {
MCSymbol *&BeginLabel);
SDValue lowerEndEH(SDValue Chain, const InvokeInst *II,
const BasicBlock *EHPadBB, MCSymbol *BeginLabel);
+ SDValue lowerIsFpClass(Value *ClassVal, FPClassTest ClassTest);
};
/// This struct represents the registers (physical or virtual)
diff --git a/llvm/test/CodeGen/AArch64/fpclass-test.ll b/llvm/test/CodeGen/AArch64/fpclass-test.ll
index 0a7f11149494fb..e058c724d9e42b 100644
--- a/llvm/test/CodeGen/AArch64/fpclass-test.ll
+++ b/llvm/test/CodeGen/AArch64/fpclass-test.ll
@@ -4,12 +4,11 @@
define i1 @test_is_inf_or_nan(double %arg) {
; CHECK-LABEL: test_is_inf_or_nan:
; CHECK: // %bb.0:
-; CHECK-NEXT: fabs d0, d0
-; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
-; CHECK-NEXT: fmov d1, x8
-; CHECK-NEXT: fcmp d0, d1
-; CHECK-NEXT: cset w8, eq
-; CHECK-NEXT: csinc w0, w8, wzr, vc
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: mov x8, #9218868437227405311 // =0x7fefffffffffffff
+; CHECK-NEXT: and x9, x9, #0x7fffffffffffffff
+; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
%abs = tail call double @llvm.fabs.f64(double %arg)
%ret = fcmp ueq double %abs, 0x7FF0000000000000
@@ -19,12 +18,11 @@ define i1 @test_is_inf_or_nan(double %arg) {
define i1 @test_is_not_inf_or_nan(double %arg) {
; CHECK-LABEL: test_is_not_inf_or_nan:
; CHECK: // %bb.0:
-; CHECK-NEXT: fabs d0, d0
+; CHECK-NEXT: fmov x9, d0
; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
-; CHECK-NEXT: fmov d1, x8
-; CHECK-NEXT: fcmp d0, d1
-; CHECK-NEXT: cset w8, mi
-; CHECK-NEXT: csinc w0, w8, wzr, le
+; CHECK-NEXT: and x9, x9, #0x7fffffffffffffff
+; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: cset w0, lt
; CHECK-NEXT: ret
%abs = tail call double @llvm.fabs.f64(double %arg)
%ret = fcmp one double %abs, 0x7FF0000000000000
@@ -63,12 +61,9 @@ define <vscale x 2 x i1> @test_vec_is_inf_or_nan(<vscale x 2 x double> %arg) {
; CHECK-LABEL: test_vec_is_inf_or_nan:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
-; CHECK-NEXT: mov z1.d, x8
-; CHECK-NEXT: fabs z0.d, p0/m, z0.d
-; CHECK-NEXT: fcmuo p1.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
+; CHECK-NEXT: mov z1.d, #0x7ff0000000000000
+; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff
+; CHECK-NEXT: cmpge p0.d, p0/z, z0.d, z1.d
; CHECK-NEXT: ret
%abs = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %arg)
%ret = fcmp ueq <vscale x 2 x double> %abs, splat (double 0x7FF0000000000000)
@@ -79,12 +74,9 @@ define <vscale x 2 x i1> @test_vec_is_not_inf_or_nan(<vscale x 2 x double> %arg)
; CHECK-LABEL: test_vec_is_not_inf_or_nan:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
-; CHECK-NEXT: mov z1.d, x8
-; CHECK-NEXT: fabs z0.d, p0/m, z0.d
-; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z0.d
-; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
+; CHECK-NEXT: mov z1.d, #0x7ff0000000000000
+; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff
+; CHECK-NEXT: cmpgt p0.d, p0/z, z1.d, z0.d
; CHECK-NEXT: ret
%abs = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %arg)
%ret = fcmp one <vscale x 2 x double> %abs, splat (double 0x7FF0000000000000)
@@ -122,29 +114,14 @@ define <vscale x 2 x i1> @test_vec_is_not_inf(<vscale x 2 x double> %arg) {
define i1 @test_fp128_is_inf_or_nan(fp128 %arg) {
; CHECK-LABEL: test_fp128_is_inf_or_nan:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #64
-; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: str q0, [sp, #32]
-; CHECK-NEXT: ldrb w8, [sp, #47]
-; CHECK-NEXT: and w8, w8, #0x7f
-; CHECK-NEXT: strb w8, [sp, #47]
-; CHECK-NEXT: adrp x8, .LCPI8_0
-; CHECK-NEXT: ldr q0, [sp, #32]
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_0]
-; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: bl __eqtf2
-; CHECK-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
-; CHECK-NEXT: mov w19, w0
-; CHECK-NEXT: bl __unordtf2
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: ccmp w19, #0, #4, eq
-; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: mov x8, #9223090561878065151 // =0x7ffeffffffffffff
+; CHECK-NEXT: str q0, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: ldr x9, [sp, #8]
+; CHECK-NEXT: and x9, x9, #0x7fffffffffffffff
+; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: cset w0, gt
+; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
%ret = fcmp ueq fp128 %abs, 0xL00000000000000007FFF000000000000
@@ -154,29 +131,14 @@ define i1 @test_fp128_is_inf_or_nan(fp128 %arg) {
define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) {
; CHECK-LABEL: test_fp128_is_not_inf_or_nan:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #64
-; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: str q0, [sp, #32]
-; CHECK-NEXT: ldrb w8, [sp, #47]
-; CHECK-NEXT: and w8, w8, #0x7f
-; CHECK-NEXT: strb w8, [sp, #47]
-; CHECK-NEXT: adrp x8, .LCPI9_0
-; CHECK-NEXT: ldr q0, [sp, #32]
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_0]
-; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: bl __eqtf2
-; CHECK-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
-; CHECK-NEXT: mov w19, w0
-; CHECK-NEXT: bl __unordtf2
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: ccmp w19, #0, #4, eq
-; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: mov x8, #9223090561878065152 // =0x7fff000000000000
+; CHECK-NEXT: str q0, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: ldr x9, [sp, #8]
+; CHECK-NEXT: and x9, x9, #0x7fffffffffffffff
+; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: cset w0, lt
+; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
%ret = fcmp one fp128 %abs, 0xL00000000000000007FFF000000000000
@@ -186,22 +148,14 @@ define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) {
define i1 @test_fp128_is_inf(fp128 %arg) {
; CHECK-LABEL: test_fp128_is_inf:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #32
-; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: str q0, [sp]
-; CHECK-NEXT: ldrb w8, [sp, #15]
-; CHECK-NEXT: and w8, w8, #0x7f
-; CHECK-NEXT: strb w8, [sp, #15]
-; CHECK-NEXT: adrp x8, .LCPI10_0
-; CHECK-NEXT: ldr q0, [sp]
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI10_0]
-; CHECK-NEXT: bl __eqtf2
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: str q0, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: ldp x9, x8, [sp], #16
+; CHECK-NEXT: and x8, x8, #0x7fffffffffffffff
+; CHECK-NEXT: eor x8, x8, #0x7fff000000000000
+; CHECK-NEXT: orr x8, x9, x8
+; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
%abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
%ret = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000
@@ -211,22 +165,14 @@ define i1 @test_fp128_is_inf(fp128 %arg) {
define i1 @test_fp128_is_not_inf(fp128 %arg) {
; CHECK-LABEL: test_fp128_is_not_inf:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #32
-; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: str q0, [sp]
-; CHECK-NEXT: ldrb w8, [sp, #15]
-; CHECK-NEXT: and w8, w8, #0x7f
-; CHECK-NEXT: strb w8, [sp, #15]
-; CHECK-NEXT: adrp x8, .LCPI11_0
-; CHECK-NEXT: ldr q0, [sp]
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI11_0]
-; CHECK-NEXT: bl __netf2
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: str q0, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: ldp x9, x8, [sp], #16
+; CHECK-NEXT: and x8, x8, #0x7fffffffffffffff
+; CHECK-NEXT: eor x8, x8, #0x7fff000000000000
+; CHECK-NEXT: orr x8, x9, x8
+; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, ne
-; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
%abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
%ret = fcmp une fp128 %abs, 0xL00000000000000007FFF000000000000
diff --git a/llvm/test/CodeGen/AArch64/isinf.ll b/llvm/test/CodeGen/AArch64/isinf.ll
index 458bd7eeba16cf..834417b98743a8 100644
--- a/llvm/test/CodeGen/AArch64/isinf.ll
+++ b/llvm/test/CodeGen/AArch64/isinf.ll
@@ -58,22 +58,14 @@ define i32 @replace_isinf_call_f64(double %x) {
define i32 @replace_isinf_call_f128(fp128 %x) {
; CHECK-LABEL: replace_isinf_call_f128:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #32
-; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: str q0, [sp]
-; CHECK-NEXT: ldrb w8, [sp, #15]
-; CHECK-NEXT: and w8, w8, #0x7f
-; CHECK-NEXT: strb w8, [sp, #15]
-; CHECK-NEXT: adrp x8, .LCPI3_0
-; CHECK-NEXT: ldr q0, [sp]
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
-; CHECK-NEXT: bl __eqtf2
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: str q0, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: ldp x9, x8, [sp], #16
+; CHECK-NEXT: and x8, x8, #0x7fffffffffffffff
+; CHECK-NEXT: eor x8, x8, #0x7fff000000000000
+; CHECK-NEXT: orr x8, x9, x8
+; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
%abs = tail call fp128 @llvm.fabs.f128(fp128 %x)
%cmpinf = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000
diff --git a/llvm/test/CodeGen/AMDGPU/fp-classify.ll b/llvm/test/CodeGen/AMDGPU/fp-classify.ll
index 6fa7df913812a3..ed9ce4d62383b1 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-classify.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-classify.ll
@@ -61,10 +61,10 @@ define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture %
; SI-NEXT: s_load_dword s0, s[0:1], 0xb
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
-; SI-NEXT: v_mov_b32_e32 v0, 0x7f800000
+; SI-NEXT: v_mov_b32_e32 v0, 0x207
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: v_cmp_nlg_f32_e64 s[0:1], |s0|, v0
-; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; SI-NEXT: v_cmp_class_f32_e32 vcc, s0, v0
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
@@ -72,11 +72,11 @@ define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture %
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x7f800000
+; VI-NEXT: v_mov_b32_e32 v0, 0x207
; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_nlg_f32_e64 s[2:3], |s2|, v0
+; VI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0
; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3]
+; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
@@ -88,7 +88,7 @@ define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture %
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_cmp_nlg_f32_e64 s2, 0x7f800000, |s2|
+; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x207
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -143,25 +143,29 @@ define amdgpu_kernel void @test_isfinite_pattern_0(ptr addrspace(1) nocapture %o
; SI-LABEL: test_isfinite_pattern_0:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT: s_load_dword s0, s[0:1], 0xb
+; SI-NEXT: s_load_dword s2, s[0:1], 0xb
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
-; SI-NEXT: v_mov_b32_e32 v0, 0x1f8
+; SI-NEXT: v_mov_b32_e32 v0, 0x1fb
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: v_cmp_class_f32_e32 vcc, s0, v0
-; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-NEXT: v_cmp_o_f32_e64 s[0:1], s2, s2
+; SI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], vcc
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: test_isfinite_pattern_0:
; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x1f8
+; VI-NEXT: v_mov_b32_e32 v0, 0x1fb
; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0
+; VI-NEXT: v_cmp_o_f32_e64 s[2:3], s4, s4
+; VI-NEXT: v_cmp_class_f32_e32 vcc, s4, v0
+; VI-NEXT: s_and_b64 s[2:3], s[2:3], vcc
; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3]
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
@@ -173,8 +177,10 @@ define amdgpu_kernel void @test_isfinite_pattern_0(ptr addrspace(1) nocapture %o
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x1f8
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cmp_o_f32_e64 s3, s2, s2
+; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x1fb
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: s_and_b32 s2, s3, s2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_nop 0
@@ -349,13 +355,13 @@ define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocaptur
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
-; SI-NEXT: v_mov_b32_e32 v0, 0x7f800000
+; SI-NEXT: v_mov_b32_e32 v0, 0x1fb
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s4, s0
; SI-NEXT: s_mov_b32 s5, s1
; SI-NEXT: v_cmp_o_f32_e64 s[0:1], s2, s2
-; SI-NEXT: v_cmp_neq_f32_e64 s[2:3], |s3|, v0
-; SI-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
+; SI-NEXT: v_cmp_class_f32_e32 vcc, s3, v0
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], vcc
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
@@ -363,11 +369,11 @@ define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocaptur
; VI-LABEL: test_isfinite_not_pattern_2:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x7f800000
+; VI-NEXT: v_mov_b32_e32 v0, 0x1fb
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_cmp_o_f32_e64 s[4:5], s2, s2
-; VI-NEXT: v_cmp_neq_f32_e64 s[2:3], |s3|, v0
-; VI-NEXT: s_and_b64 s[2:3], s[4:5], s[2:3]
+; VI-NEXT: v_cmp_class_f32_e32 vcc, s3, v0
+; VI-NEXT: s_and_b64 s[2:3], s[4:5], vcc
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3]
; VI-NEXT: v_mov_b32_e32 v1, s1
@@ -380,7 +386,7 @@ define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocaptur
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_cmp_o_f32_e64 s2, s2, s2
-; GFX11-NEXT: v_cmp_neq_f32_e64 s3, 0x7f800000, |s3|
+; GFX11-NEXT: v_cmp_class_f32_e64 s3, s3, 0x1fb
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_and_b32 s2, s2, s3
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
@@ -405,11 +411,11 @@ define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocaptur
; SI-NEXT: s_load_dword s2, s[0:1], 0xb
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
-; SI-NEXT: v_mov_b32_e32 v0, 0x7f800000
+; SI-NEXT: v_mov_b32_e32 v0, 0x1fb
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: v_cmp_u_f32_e64 s[0:1], s2, s2
-; SI-NEXT: v_cmp_neq_f32_e64 s[2:3], |s2|, v0
-; SI-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
+; SI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], vcc
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
@@ -418,11 +424,11 @@ define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocaptur
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x7f800000
+; VI-NEXT: v_mov_b32_e32 v0, 0x1fb
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_cmp_u_f32_e64 s[2:3], s4, s4
-; VI-NEXT: v_cmp_neq_f32_e64 s[4:5], |s4|, v0
-; VI-NEXT: s_and_b64 s[2:3], s[2:3], s[4:5]
+; VI-NEXT: v_cmp_class_f32_e32 vcc, s4, v0
+; VI-NEXT: s_and_b64 s[2:3], s[2:3], vcc
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3]
; VI-NEXT: v_mov_b32_e32 v1, s1
@@ -437,7 +443,7 @@ define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocaptur
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_cmp_u_f32_e64 s3, s2, s2
-; GFX11-NEXT: v_cmp_neq_f32_e64 s2, 0x7f800000, |s2|
+; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x1fb
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_and_b32 s2, s3, s2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
@@ -458,25 +464,29 @@ define amdgpu_kernel void @test_isfinite_pattern_4(ptr addrspace(1) nocapture %o
; SI-LABEL: test_isfinite_pattern_4:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT: s_load_dword s0, s[0:1], 0xb
+; SI-NEXT: s_load_dword s2, s[0:1], 0xb
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: v_mov_b32_e32 v0, 0x1f8
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: v_cmp_class_f32_e32 vcc, s0, v0
-; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-NEXT: v_cmp_o_f32_e64 s[0:1], s2, s2
+; SI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], vcc
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: test_isfinite_pattern_4:
; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: v_mov_b32_e32 v0, 0x1f8
; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0
+; VI-NEXT: v_cmp_o_f32_e64 s[2:3], s4, s4
+; VI-NEXT: v_cmp_class_f32_e32 vcc, s4, v0
+; VI-NEXT: s_and_b64 s[2:3], s[2:3], vcc
; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3]
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
@@ -488,8 +498,10 @@ define amdgpu_kernel void @test_isfinite_pattern_4(ptr addrspace(1) nocapture %o
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_o_f32_e64 s3, s2, s2
; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x1f8
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: s_and_b32 s2, s3, s2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_nop 0
@@ -508,25 +520,29 @@ define amdgpu_kernel void @test_isfinite_pattern_4_commute_and(ptr addrspace(1)
; SI-LABEL: test_isfinite_pattern_4_commute_and:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT: s_load_dword s0, s[0:1], 0xb
+; SI-NEXT: s_load_dword s2, s[0:1], 0xb
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: v_mov_b32_e32 v0, 0x1f8
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: v_cmp_class_f32_e32 vcc, s0, v0
-; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-NEXT: v_cmp_o_f32_e64 s[0:1], s2, s2
+; SI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0
+; SI-NEXT: s_and_b64 s[0:1], vcc, s[0:1]
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: test_isfinite_pattern_4_commute_and:
; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: v_mov_b32_e32 v0, 0x1f8
; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0
+; VI-NEXT: v_cmp_o_f32_e64 s[2:3], s4, s4
+; VI-NEXT: v_cmp_class_f32_e32 vcc, s4, v0
+; VI-NEXT: s_and_b64 s[2:3], vcc, s[2:3]
; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3]
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
@@ -538,8 +554,10 @@ define amdgpu_kernel void @test_isfinite_pattern_4_commute_and(ptr addrspace(1)
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_o_f32_e64 s3, s2, s2
; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x1f8
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: s_and_b32 s2, s2, s3
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_nop 0
@@ -618,16 +636,16 @@ define amdgpu_kernel void @test_not_isfinite_pattern_4_wrong_ord_test(ptr addrsp
define amdgpu_kernel void @test_isinf_pattern_f16(ptr addrspace(1) nocapture %out, half %x) #0 {
; SI-LABEL: test_isinf_pattern_f16:
; SI: ; %bb.0:
-; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT: s_load_dword s0, s[0:1], 0xb
-; SI-NEXT: s_mov_b32 s7, 0xf000
-; SI-NEXT: s_mov_b32 s6, -1
-; SI-NEXT: s_mov_b32 s1, 0x7f800000
+; SI-NEXT: s_load_dword s4, s[0:1], 0xb
+; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: v_cvt_f32_f16_e64 v0, |s0|
-; SI-NEXT: v_cmp_eq_f32_e32 vcc, s1, v0
-; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT: s_and_b32 s4, s4, 0x7fff
+; SI-NEXT: s_cmpk_eq_i32 s4, 0x7c00
+; SI-NEXT: s_cselect_b64 s[4:5], -1, 0
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: test_isinf_pattern_f16:
@@ -667,27 +685,32 @@ define amdgpu_kernel void @test_isinf_pattern_f16(ptr addrspace(1) nocapture %ou
define amdgpu_kernel void @test_isfinite_pattern_0_f16(ptr addrspace(1) nocapture %out, half %x) #0 {
; SI-LABEL: test_isfinite_pattern_0_f16:
; SI: ; %bb.0:
-; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT: s_load_dword s0, s[0:1], 0xb
-; SI-NEXT: s_mov_b32 s7, 0xf000
-; SI-NEXT: s_mov_b32 s6, -1
-; SI-NEXT: s_movk_i32 s1, 0x1f8
+; SI-NEXT: s_load_dword s4, s[0:1], 0xb
+; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: v_cvt_f32_f16_e32 v0, s0
-; SI-NEXT: v_cmp_class_f32_e64 s[0:1], v0, s1
-; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT: v_cvt_f32_f16_e32 v0, s4
+; SI-NEXT: s_and_b32 s4, s4, 0x7fff
+; SI-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; SI-NEXT: s_cmpk_lg_i32 s4, 0x7c00
+; SI-NEXT: s_cselect_b64 s[4:5], -1, 0
+; SI-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: test_isfinite_pattern_0_f16:
; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x1f8
+; VI-NEXT: v_mov_b32_e32 v0, 0x1fb
; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_class_f16_e32 vcc, s2, v0
+; VI-NEXT: v_cmp_o_f16_e64 s[2:3], s4, s4
+; VI-NEXT: v_cmp_class_f16_e32 vcc, s4, v0
+; VI-NEXT: s_and_b64 s[2:3], s[2:3], vcc
; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3]
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
@@ -699,8 +722,10 @@ define amdgpu_kernel void @test_isfinite_pattern_0_f16(ptr addrspace(1) nocaptur
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_cmp_class_f16_e64 s2, s2, 0x1f8
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cmp_o_f16_e64 s3, s2, s2
+; GFX11-NEXT: v_cmp_class_f16_e64 s2, s2, 0x1fb
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: s_and_b32 s2, s3, s2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_nop 0
@@ -718,27 +743,32 @@ define amdgpu_kernel void @test_isfinite_pattern_0_f16(ptr addrspace(1) nocaptur
define amdgpu_kernel void @test_isfinite_pattern_4_f16(ptr addrspace(1) nocapture %out, half %x) #0 {
; SI-LABEL: test_isfinite_pattern_4_f16:
; SI: ; %bb.0:
-; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT: s_load_dword s0, s[0:1], 0xb
-; SI-NEXT: s_mov_b32 s7, 0xf000
-; SI-NEXT: s_mov_b32 s6, -1
-; SI-NEXT: s_movk_i32 s1, 0x1f8
+; SI-NEXT: s_load_dword s4, s[0:1], 0xb
+; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: v_cvt_f32_f16_e32 v0, s0
-; SI-NEXT: v_cmp_class_f32_e64 s[0:1], v0, s1
-; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT: v_cvt_f32_f16_e32 v0, s4
+; SI-NEXT: s_and_b32 s4, s4, 0x7fff
+; SI-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
+; SI-NEXT: s_cmpk_lt_i32 s4, 0x7c00
+; SI-NEXT: s_cselect_b64 s[4:5], -1, 0
+; SI-NEXT: s_and_b64 s[4:5], vcc, s[4:5]
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: test_isfinite_pattern_4_f16:
; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: v_mov_b32_e32 v0, 0x1f8
; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_class_f16_e32 vcc, s2, v0
+; VI-NEXT: v_cmp_o_f16_e64 s[2:3], s4, s4
+; VI-NEXT: v_cmp_class_f16_e32 vcc, s4, v0
+; VI-NEXT: s_and_b64 s[2:3], s[2:3], vcc
; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3]
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
@@ -750,8 +780,10 @@ define amdgpu_kernel void @test_isfinite_pattern_4_f16(ptr addrspace(1) nocaptur
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_o_f16_e64 s3, s2, s2
; GFX11-NEXT: v_cmp_class_f16_e64 s2, s2, 0x1f8
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: s_and_b32 s2, s3, s2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_nop 0
diff --git a/llvm/test/CodeGen/AMDGPU/fract-match.ll b/llvm/test/CodeGen/AMDGPU/fract-match.ll
index 3a0b8259d08496..d65dff7bc44627 100644
--- a/llvm/test/CodeGen/AMDGPU/fract-match.ll
+++ b/llvm/test/CodeGen/AMDGPU/fract-match.ll
@@ -53,14 +53,14 @@ define float @safe_math_fract_f32(float %x, ptr addrspace(1) nocapture writeonly
; GFX6-NEXT: v_sub_f32_e32 v4, v0, v3
; GFX6-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
-; GFX6-NEXT: s_mov_b32 s8, 0x7f800000
+; GFX6-NEXT: v_mov_b32_e32 v5, 0x204
; GFX6-NEXT: s_mov_b32 s6, 0
; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc
-; GFX6-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8
+; GFX6-NEXT: v_cmp_class_f32_e32 vcc, v0, v5
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s4, s6
; GFX6-NEXT: s_mov_b32 s5, s6
-; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX6-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX6-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT: s_setpc_b64 s[30:31]
@@ -68,15 +68,15 @@ define float @safe_math_fract_f32(float %x, ptr addrspace(1) nocapture writeonly
; GFX7-LABEL: safe_math_fract_f32:
; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: s_mov_b32 s8, 0x7f800000
+; GFX7-NEXT: v_mov_b32_e32 v5, 0x204
; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: v_fract_f32_e32 v4, v0
-; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8
+; GFX7-NEXT: v_cmp_class_f32_e32 vcc, v0, v5
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b32 s4, s6
; GFX7-NEXT: s_mov_b32 s5, s6
; GFX7-NEXT: v_floor_f32_e32 v3, v0
-; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -84,11 +84,11 @@ define float @safe_math_fract_f32(float %x, ptr addrspace(1) nocapture writeonly
; GFX8-LABEL: safe_math_fract_f32:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX8-NEXT: v_mov_b32_e32 v5, 0x204
; GFX8-NEXT: v_fract_f32_e32 v4, v0
-; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s4
+; GFX8-NEXT: v_cmp_class_f32_e32 vcc, v0, v5
; GFX8-NEXT: v_floor_f32_e32 v3, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX8-NEXT: global_store_dword v[1:2], v3, off
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -97,10 +97,10 @@ define float @safe_math_fract_f32(float %x, ptr addrspace(1) nocapture writeonly
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_fract_f32_e32 v3, v0
-; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX11-NEXT: v_cmp_class_f32_e64 s0, v0, 0x204
; GFX11-NEXT: v_floor_f32_e32 v4, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v3, 0, s0
; GFX11-NEXT: global_store_b32 v[1:2], v4, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
@@ -210,14 +210,14 @@ define float @no_nan_check_math_fract_f32(float %x, ptr addrspace(1) nocapture w
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_floor_f32_e32 v3, v0
; GFX6-NEXT: v_sub_f32_e32 v4, v0, v3
-; GFX6-NEXT: s_mov_b32 s8, 0x7f800000
+; GFX6-NEXT: v_mov_b32_e32 v5, 0x204
; GFX6-NEXT: s_mov_b32 s6, 0
; GFX6-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
-; GFX6-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8
+; GFX6-NEXT: v_cmp_class_f32_e32 vcc, v0, v5
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s4, s6
; GFX6-NEXT: s_mov_b32 s5, s6
-; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX6-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX6-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT: s_setpc_b64 s[30:31]
@@ -227,14 +227,14 @@ define float @no_nan_check_math_fract_f32(float %x, ptr addrspace(1) nocapture w
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_floor_f32_e32 v3, v0
; GFX7-NEXT: v_sub_f32_e32 v4, v0, v3
-; GFX7-NEXT: s_mov_b32 s8, 0x7f800000
+; GFX7-NEXT: v_mov_b32_e32 v5, 0x204
; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
-; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8
+; GFX7-NEXT: v_cmp_class_f32_e32 vcc, v0, v5
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b32 s4, s6
; GFX7-NEXT: s_mov_b32 s5, s6
-; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -244,10 +244,10 @@ define float @no_nan_check_math_fract_f32(float %x, ptr addrspace(1) nocapture w
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_floor_f32_e32 v3, v0
; GFX8-NEXT: v_sub_f32_e32 v4, v0, v3
-; GFX8-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX8-NEXT: v_mov_b32_e32 v5, 0x204
; GFX8-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
-; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s4
-; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX8-NEXT: v_cmp_class_f32_e32 vcc, v0, v5
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX8-NEXT: global_store_dword v[1:2], v3, off
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -256,12 +256,12 @@ define float @no_nan_check_math_fract_f32(float %x, ptr addrspace(1) nocapture w
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_floor_f32_e32 v3, v0
-; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX11-NEXT: v_cmp_class_f32_e64 s0, v0, 0x204
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_sub_f32_e32 v4, v0, v3
; GFX11-NEXT: global_store_b32 v[1:2], v3, off
; GFX11-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
-; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
@@ -1705,16 +1705,16 @@ define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocap
; GFX6-NEXT: v_min_f32_e32 v7, 0x3f7fffff, v7
; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc
; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
-; GFX6-NEXT: s_movk_i32 s10, 0x204
+; GFX6-NEXT: v_mov_b32_e32 v8, 0x204
; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc
-; GFX6-NEXT: v_cmp_class_f32_e64 s[8:9], v0, s10
+; GFX6-NEXT: v_cmp_class_f32_e32 vcc, v0, v8
; GFX6-NEXT: s_mov_b32 s6, 0
-; GFX6-NEXT: v_cndmask_b32_e64 v0, v7, 0, s[8:9]
-; GFX6-NEXT: v_cmp_class_f32_e64 s[8:9], v1, s10
+; GFX6-NEXT: v_cndmask_b32_e64 v0, v7, 0, vcc
+; GFX6-NEXT: v_cmp_class_f32_e32 vcc, v1, v8
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s4, s6
; GFX6-NEXT: s_mov_b32 s5, s6
-; GFX6-NEXT: v_cndmask_b32_e64 v1, v6, 0, s[8:9]
+; GFX6-NEXT: v_cndmask_b32_e64 v1, v6, 0, vcc
; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT: s_setpc_b64 s[30:31]
@@ -1722,19 +1722,19 @@ define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocap
; GFX7-LABEL: safe_math_fract_v2f32:
; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: s_mov_b32 s8, 0x7f800000
+; GFX7-NEXT: v_mov_b32_e32 v8, 0x204
; GFX7-NEXT: v_fract_f32_e32 v6, v0
-; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8
+; GFX7-NEXT: v_cmp_class_f32_e32 vcc, v0, v8
; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: v_floor_f32_e32 v4, v0
; GFX7-NEXT: v_fract_f32_e32 v7, v1
-; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
-; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v1|, s8
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX7-NEXT: v_cmp_class_f32_e32 vcc, v1, v8
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b32 s4, s6
; GFX7-NEXT: s_mov_b32 s5, s6
; GFX7-NEXT: v_floor_f32_e32 v5, v1
-; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v1, v7, 0, vcc
; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -1742,15 +1742,15 @@ define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocap
; GFX8-LABEL: safe_math_fract_v2f32:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_mov_b32 s4, 0x7f800000
+; GFX8-NEXT: v_mov_b32_e32 v8, 0x204
; GFX8-NEXT: v_fract_f32_e32 v6, v0
-; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s4
+; GFX8-NEXT: v_cmp_class_f32_e32 vcc, v0, v8
; GFX8-NEXT: v_floor_f32_e32 v4, v0
; GFX8-NEXT: v_fract_f32_e32 v7, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
-; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v1|, s4
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX8-NEXT: v_cmp_class_f32_e32 vcc, v1, v8
; GFX8-NEXT: v_floor_f32_e32 v5, v1
-; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v1, v7, 0, vcc
; GFX8-NEXT: global_store_dwordx2 v[2:3], v[4:5], off
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -1759,14 +1759,15 @@ define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocap
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_fract_f32_e32 v6, v0
-; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX11-NEXT: v_cmp_class_f32_e64 s0, v0, 0x204
; GFX11-NEXT: v_fract_f32_e32 v7, v1
; GFX11-NEXT: v_floor_f32_e32 v4, v0
; GFX11-NEXT: v_floor_f32_e32 v5, v1
-; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc_lo
-; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v1|
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v6, 0, s0
+; GFX11-NEXT: v_cmp_class_f32_e64 s0, v1, 0x204
; GFX11-NEXT: global_store_b64 v[2:3], v[4:5], off
-; GFX11-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v7, 0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %x)
@@ -1823,17 +1824,16 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) nocapture writeon
; GFX6-NEXT: v_add_f64 v[6:7], v[0:1], -v[4:5]
; GFX6-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
; GFX6-NEXT: v_min_f64 v[6:7], v[6:7], s[8:9]
-; GFX6-NEXT: s_mov_b32 s8, 0
-; GFX6-NEXT: s_mov_b32 s9, 0x7ff00000
+; GFX6-NEXT: v_mov_b32_e32 v8, 0x204
; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v1, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc
-; GFX6-NEXT: v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[8:9]
+; GFX6-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v8
; GFX6-NEXT: s_mov_b32 s6, 0
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s4, s6
; GFX6-NEXT: s_mov_b32 s5, s6
-; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc
-; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc
+; GFX6-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
+; GFX6-NEXT: v_cndmask_b32_e64 v1, v7, 0, vcc
; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT: s_setpc_b64 s[30:31]
@@ -1841,17 +1841,16 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) nocapture writeon
; GFX7-LABEL: safe_math_fract_f64:
; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: s_mov_b32 s4, 0
-; GFX7-NEXT: s_mov_b32 s5, 0x7ff00000
+; GFX7-NEXT: v_mov_b32_e32 v6, 0x204
; GFX7-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
-; GFX7-NEXT: v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[4:5]
+; GFX7-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v6
; GFX7-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b32 s4, s6
; GFX7-NEXT: s_mov_b32 s5, s6
-; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
-; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc
; GFX7-NEXT: buffer_store_dwordx2 v[6:7], v[2:3], s[4:7], 0 addr64
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]
@@ -1859,13 +1858,12 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) nocapture writeon
; GFX8-LABEL: safe_math_fract_f64:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_mov_b32 s4, 0
-; GFX8-NEXT: s_mov_b32 s5, 0x7ff00000
+; GFX8-NEXT: v_mov_b32_e32 v6, 0x204
; GFX8-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
-; GFX8-NEXT: v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[4:5]
+; GFX8-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v6
; GFX8-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
-; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
-; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc
; GFX8-NEXT: global_store_dwordx2 v[2:3], v[6:7], off
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -1874,10 +1872,11 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) nocapture writeon
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
-; GFX11-NEXT: v_cmp_neq_f64_e64 vcc_lo, 0x7ff00000, |v[0:1]|
+; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 0x204
; GFX11-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_dual_cndmask_b32 v0, 0, v4 :: v_dual_cndmask_b32 v1, 0, v5
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, 0, s0
+; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, 0, s0
; GFX11-NEXT: global_store_b64 v[2:3], v[6:7], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
@@ -1937,21 +1936,22 @@ define half @safe_math_fract_f16(half %x, ptr addrspace(1) nocapture writeonly %
; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX6-NEXT: s_mov_b32 s8, 0x7f800000
+; GFX6-NEXT: s_movk_i32 s8, 0x7c00
; GFX6-NEXT: s_mov_b32 s6, 0
; GFX6-NEXT: s_mov_b32 s7, 0xf000
-; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GFX6-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX6-NEXT: s_mov_b32 s4, s6
; GFX6-NEXT: s_mov_b32 s5, s6
-; GFX6-NEXT: v_floor_f32_e32 v3, v0
-; GFX6-NEXT: v_sub_f32_e32 v4, v0, v3
-; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX6-NEXT: v_min_f32_e32 v4, 0x3f7fe000, v4
-; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
-; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc
-; GFX6-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8
-; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
-; GFX6-NEXT: buffer_store_short v3, v[1:2], s[4:7], 0 addr64
+; GFX6-NEXT: v_floor_f32_e32 v4, v3
+; GFX6-NEXT: v_sub_f32_e32 v5, v3, v4
+; GFX6-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX6-NEXT: v_min_f32_e32 v5, 0x3f7fe000, v5
+; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
+; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
+; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, s8, v0
+; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; GFX6-NEXT: buffer_store_short v4, v[1:2], s[4:7], 0 addr64
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
@@ -1959,32 +1959,33 @@ define half @safe_math_fract_f16(half %x, ptr addrspace(1) nocapture writeonly %
; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: s_mov_b32 s8, 0x7f800000
+; GFX7-NEXT: s_movk_i32 s8, 0x7c00
; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: s_mov_b32 s7, 0xf000
-; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX7-NEXT: s_mov_b32 s4, s6
; GFX7-NEXT: s_mov_b32 s5, s6
-; GFX7-NEXT: v_floor_f32_e32 v3, v0
-; GFX7-NEXT: v_sub_f32_e32 v4, v0, v3
-; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GFX7-NEXT: v_min_f32_e32 v4, 0x3f7fe000, v4
-; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
-; GFX7-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc
-; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8
-; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
-; GFX7-NEXT: buffer_store_short v3, v[1:2], s[4:7], 0 addr64
+; GFX7-NEXT: v_floor_f32_e32 v4, v3
+; GFX7-NEXT: v_sub_f32_e32 v5, v3, v4
+; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
+; GFX7-NEXT: v_min_f32_e32 v5, 0x3f7fe000, v5
+; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
+; GFX7-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
+; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, s8, v0
+; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc
+; GFX7-NEXT: buffer_store_short v4, v[1:2], s[4:7], 0 addr64
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: safe_math_fract_f16:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_movk_i32 s4, 0x7c00
+; GFX8-NEXT: v_mov_b32_e32 v5, 0x204
; GFX8-NEXT: v_fract_f16_e32 v4, v0
-; GFX8-NEXT: v_cmp_neq_f16_e64 vcc, |v0|, s4
+; GFX8-NEXT: v_cmp_class_f16_e32 vcc, v0, v5
; GFX8-NEXT: v_floor_f16_e32 v3, v0
-; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc
; GFX8-NEXT: global_store_short v[1:2], v3, off
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -1993,10 +1994,10 @@ define half @safe_math_fract_f16(half %x, ptr addrspace(1) nocapture writeonly %
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_fract_f16_e32 v3, v0
-; GFX11-NEXT: v_cmp_neq_f16_e64 vcc_lo, 0x7c00, |v0|
+; GFX11-NEXT: v_cmp_class_f16_e64 s0, v0, 0x204
; GFX11-NEXT: v_floor_f16_e32 v4, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e64 v0, v3, 0, s0
; GFX11-NEXT: global_store_b16 v[1:2], v4, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
@@ -2062,12 +2063,12 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX6-NEXT: s_mov_b32 s8, 0x7f800000
+; GFX6-NEXT: s_movk_i32 s8, 0x7c00
; GFX6-NEXT: s_mov_b32 s6, 0
; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v1
; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v0
-; GFX6-NEXT: v_cvt_f32_f16_e64 v0, |v0|
-; GFX6-NEXT: v_cvt_f32_f16_e64 v1, |v1|
+; GFX6-NEXT: v_and_b32_e32 v0, 0x7fff, v0
+; GFX6-NEXT: v_and_b32_e32 v1, 0x7fff, v1
; GFX6-NEXT: v_floor_f32_e32 v6, v4
; GFX6-NEXT: v_cvt_f16_f32_e32 v7, v6
; GFX6-NEXT: v_floor_f32_e32 v8, v5
@@ -2080,10 +2081,10 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu
; GFX6-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX6-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX6-NEXT: v_cmp_neq_f32_e32 vcc, s8, v0
+; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, s8, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v5, vcc
-; GFX6-NEXT: v_cmp_neq_f32_e32 vcc, s8, v1
+; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, s8, v1
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s4, s6
; GFX6-NEXT: s_mov_b32 s5, s6
@@ -2098,12 +2099,12 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX7-NEXT: s_mov_b32 s8, 0x7f800000
+; GFX7-NEXT: s_movk_i32 s8, 0x7c00
; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v1
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v0
-; GFX7-NEXT: v_cvt_f32_f16_e64 v0, |v0|
-; GFX7-NEXT: v_cvt_f32_f16_e64 v1, |v1|
+; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff, v0
+; GFX7-NEXT: v_and_b32_e32 v1, 0x7fff, v1
; GFX7-NEXT: v_floor_f32_e32 v6, v4
; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v6
; GFX7-NEXT: v_floor_f32_e32 v8, v5
@@ -2116,10 +2117,10 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu
; GFX7-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX7-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
-; GFX7-NEXT: v_cmp_neq_f32_e32 vcc, s8, v0
+; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, s8, v0
; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v5, vcc
-; GFX7-NEXT: v_cmp_neq_f32_e32 vcc, s8, v1
+; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, s8, v1
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b32 s4, s6
; GFX7-NEXT: s_mov_b32 s5, s6
@@ -2133,16 +2134,16 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX8-NEXT: s_movk_i32 s6, 0x204
+; GFX8-NEXT: v_mov_b32_e32 v7, 0x204
; GFX8-NEXT: v_floor_f16_e32 v4, v3
; GFX8-NEXT: v_floor_f16_e32 v5, v0
; GFX8-NEXT: v_fract_f16_e32 v6, v3
-; GFX8-NEXT: v_cmp_class_f16_e64 s[4:5], v3, s6
+; GFX8-NEXT: v_cmp_class_f16_e32 vcc, v3, v7
; GFX8-NEXT: v_pack_b32_f16 v4, v5, v4
; GFX8-NEXT: v_fract_f16_e32 v5, v0
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v6, 0, s[4:5]
-; GFX8-NEXT: v_cmp_class_f16_e64 s[4:5], v0, s6
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v5, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v6, 0, vcc
+; GFX8-NEXT: v_cmp_class_f16_e32 vcc, v0, v7
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v5, 0, vcc
; GFX8-NEXT: v_pack_b32_f16 v0, v0, v3
; GFX8-NEXT: global_store_dword v[1:2], v4, off
; GFX8-NEXT: s_waitcnt vmcnt(0)
@@ -2237,19 +2238,19 @@ define <2 x double> @safe_math_fract_v2f64(<2 x double> %x, ptr addrspace(1) noc
; GFX6-NEXT: v_cndmask_b32_e32 v11, v11, v3, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v10, v10, v2, vcc
; GFX6-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
-; GFX6-NEXT: s_movk_i32 s10, 0x204
-; GFX6-NEXT: v_cmp_class_f64_e64 s[8:9], v[0:1], s10
+; GFX6-NEXT: v_mov_b32_e32 v14, 0x204
; GFX6-NEXT: v_cndmask_b32_e32 v13, v13, v1, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v12, v12, v0, vcc
-; GFX6-NEXT: v_cndmask_b32_e64 v0, v12, 0, s[8:9]
-; GFX6-NEXT: v_cndmask_b32_e64 v1, v13, 0, s[8:9]
-; GFX6-NEXT: v_cmp_class_f64_e64 s[8:9], v[2:3], s10
+; GFX6-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v14
; GFX6-NEXT: s_mov_b32 s6, 0
+; GFX6-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
+; GFX6-NEXT: v_cndmask_b32_e64 v1, v13, 0, vcc
+; GFX6-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v14
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s4, s6
; GFX6-NEXT: s_mov_b32 s5, s6
-; GFX6-NEXT: v_cndmask_b32_e64 v2, v10, 0, s[8:9]
-; GFX6-NEXT: v_cndmask_b32_e64 v3, v11, 0, s[8:9]
+; GFX6-NEXT: v_cndmask_b32_e64 v2, v10, 0, vcc
+; GFX6-NEXT: v_cndmask_b32_e64 v3, v11, 0, vcc
; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[4:7], 0 addr64
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT: s_setpc_b64 s[30:31]
@@ -2257,39 +2258,39 @@ define <2 x double> @safe_math_fract_v2f64(<2 x double> %x, ptr addrspace(1) noc
; GFX7-LABEL: safe_math_fract_v2f64:
; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: s_movk_i32 s4, 0x204
+; GFX7-NEXT: v_mov_b32_e32 v6, 0x204
; GFX7-NEXT: v_fract_f64_e32 v[10:11], v[0:1]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[8:9], v[0:1], s4
+; GFX7-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v6
; GFX7-NEXT: v_fract_f64_e32 v[12:13], v[2:3]
-; GFX7-NEXT: v_cmp_class_f64_e64 s[10:11], v[2:3], s4
+; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v6
; GFX7-NEXT: v_floor_f64_e32 v[8:9], v[2:3]
; GFX7-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
-; GFX7-NEXT: s_mov_b32 s6, 0
-; GFX7-NEXT: s_mov_b32 s7, 0xf000
-; GFX7-NEXT: s_mov_b32 s4, s6
-; GFX7-NEXT: s_mov_b32 s5, s6
-; GFX7-NEXT: v_cndmask_b32_e64 v0, v10, 0, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v1, v11, 0, s[8:9]
-; GFX7-NEXT: v_cndmask_b32_e64 v2, v12, 0, s[10:11]
-; GFX7-NEXT: v_cndmask_b32_e64 v3, v13, 0, s[10:11]
-; GFX7-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[4:7], 0 addr64
+; GFX7-NEXT: s_mov_b32 s10, 0
+; GFX7-NEXT: s_mov_b32 s11, 0xf000
+; GFX7-NEXT: s_mov_b32 s8, s10
+; GFX7-NEXT: s_mov_b32 s9, s10
+; GFX7-NEXT: v_cndmask_b32_e64 v0, v10, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v1, v11, 0, vcc
+; GFX7-NEXT: v_cndmask_b32_e64 v2, v12, 0, s[4:5]
+; GFX7-NEXT: v_cndmask_b32_e64 v3, v13, 0, s[4:5]
+; GFX7-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[8:11], 0 addr64
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: safe_math_fract_v2f64:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_movk_i32 s6, 0x204
+; GFX8-NEXT: v_mov_b32_e32 v6, 0x204
; GFX8-NEXT: v_fract_f64_e32 v[10:11], v[0:1]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], s6
+; GFX8-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v6
; GFX8-NEXT: v_fract_f64_e32 v[12:13], v[2:3]
-; GFX8-NEXT: v_cmp_class_f64_e64 s[6:7], v[2:3], s6
+; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v6
; GFX8-NEXT: v_floor_f64_e32 v[8:9], v[2:3]
; GFX8-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
-; GFX8-NEXT: v_cndmask_b32_e64 v0, v10, 0, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v1, v11, 0, s[4:5]
-; GFX8-NEXT: v_cndmask_b32_e64 v2, v12, 0, s[6:7]
-; GFX8-NEXT: v_cndmask_b32_e64 v3, v13, 0, s[6:7]
+; GFX8-NEXT: v_cndmask_b32_e64 v0, v10, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v1, v11, 0, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, v12, 0, s[4:5]
+; GFX8-NEXT: v_cndmask_b32_e64 v3, v13, 0, s[4:5]
; GFX8-NEXT: global_store_dwordx4 v[4:5], v[6:9], off
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/RISCV/fpclass-test.ll b/llvm/test/CodeGen/RISCV/fpclass-test.ll
index 90b9c0d8bfed8a..b38e0d28941c90 100644
--- a/llvm/test/CodeGen/RISCV/fpclass-test.ll
+++ b/llvm/test/CodeGen/RISCV/fpclass-test.ll
@@ -16,10 +16,9 @@ define i1 @test_is_inf_or_nan(double %arg) {
define i1 @test_is_not_inf_or_nan(double %arg) {
; CHECK-LABEL: test_is_not_inf_or_nan:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI1_0)(a0)
-; CHECK-NEXT: fabs.d fa4, fa0
-; CHECK-NEXT: flt.d a0, fa4, fa5
+; CHECK-NEXT: fclass.d a0, fa0
+; CHECK-NEXT: andi a0, a0, 126
+; CHECK-NEXT: snez a0, a0
; CHECK-NEXT: ret
%abs = tail call double @llvm.fabs.f64(double %arg)
%ret = fcmp one double %abs, 0x7FF0000000000000
@@ -41,11 +40,9 @@ define i1 @test_is_inf(double %arg) {
define i1 @test_is_not_inf(double %arg) {
; CHECK-LABEL: test_is_not_inf:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI3_0)(a0)
-; CHECK-NEXT: fabs.d fa4, fa0
-; CHECK-NEXT: feq.d a0, fa4, fa5
-; CHECK-NEXT: xori a0, a0, 1
+; CHECK-NEXT: fclass.d a0, fa0
+; CHECK-NEXT: andi a0, a0, 894
+; CHECK-NEXT: snez a0, a0
; CHECK-NEXT: ret
%abs = tail call double @llvm.fabs.f64(double %arg)
%ret = fcmp une double %abs, 0x7FF0000000000000
@@ -55,13 +52,11 @@ define i1 @test_is_not_inf(double %arg) {
define <vscale x 4 x i1> @test_vec_is_inf_or_nan(<vscale x 4 x double> %arg) {
; CHECK-LABEL: test_vec_is_inf_or_nan:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI4_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT: vfabs.v v8, v8
-; CHECK-NEXT: vmflt.vf v12, v8, fa5
-; CHECK-NEXT: vmfgt.vf v13, v8, fa5
-; CHECK-NEXT: vmnor.mm v0, v13, v12
+; CHECK-NEXT: vfclass.v v8, v8
+; CHECK-NEXT: li a0, 897
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%abs = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> %arg)
%ret = fcmp ueq <vscale x 4 x double> %abs, splat (double 0x7FF0000000000000)
@@ -71,13 +66,11 @@ define <vscale x 4 x i1> @test_vec_is_inf_or_nan(<vscale x 4 x double> %arg) {
define <vscale x 4 x i1> @test_vec_is_not_inf_or_nan(<vscale x 4 x double> %arg) {
; CHECK-LABEL: test_vec_is_not_inf_or_nan:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI5_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT: vfabs.v v8, v8
-; CHECK-NEXT: vmflt.vf v12, v8, fa5
-; CHECK-NEXT: vmfgt.vf v13, v8, fa5
-; CHECK-NEXT: vmor.mm v0, v13, v12
+; CHECK-NEXT: vfclass.v v8, v8
+; CHECK-NEXT: li a0, 126
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%abs = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> %arg)
%ret = fcmp one <vscale x 4 x double> %abs, splat (double 0x7FF0000000000000)
@@ -87,11 +80,11 @@ define <vscale x 4 x i1> @test_vec_is_not_inf_or_nan(<vscale x 4 x double> %arg)
define <vscale x 4 x i1> @test_vec_is_inf(<vscale x 4 x double> %arg) {
; CHECK-LABEL: test_vec_is_inf:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI6_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI6_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT: vfabs.v v8, v8
-; CHECK-NEXT: vmfeq.vf v0, v8, fa5
+; CHECK-NEXT: vfclass.v v8, v8
+; CHECK-NEXT: li a0, 129
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%abs = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> %arg)
%ret = fcmp oeq <vscale x 4 x double> %abs, splat (double 0x7FF0000000000000)
@@ -101,11 +94,11 @@ define <vscale x 4 x i1> @test_vec_is_inf(<vscale x 4 x double> %arg) {
define <vscale x 4 x i1> @test_vec_is_not_inf(<vscale x 4 x double> %arg) {
; CHECK-LABEL: test_vec_is_not_inf:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI7_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI7_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT: vfabs.v v8, v8
-; CHECK-NEXT: vmfne.vf v0, v8, fa5
+; CHECK-NEXT: vfclass.v v8, v8
+; CHECK-NEXT: li a0, 894
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%abs = tail call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> %arg)
%ret = fcmp une <vscale x 4 x double> %abs, splat (double 0x7FF0000000000000)
@@ -115,41 +108,12 @@ define <vscale x 4 x i1> @test_vec_is_not_inf(<vscale x 4 x double> %arg) {
define i1 @test_fp128_is_inf_or_nan(fp128 %arg) {
; CHECK-LABEL: test_fp128_is_inf_or_nan:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -48
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset ra, -8
-; CHECK-NEXT: .cfi_offset s0, -16
-; CHECK-NEXT: .cfi_offset s1, -24
-; CHECK-NEXT: .cfi_offset s2, -32
-; CHECK-NEXT: .cfi_offset s3, -40
-; CHECK-NEXT: mv s0, a0
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: srli s1, a1, 1
-; CHECK-NEXT: lui s2, 32767
-; CHECK-NEXT: slli s2, s2, 36
-; CHECK-NEXT: mv a1, s1
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: mv a3, s2
-; CHECK-NEXT: call __eqtf2
-; CHECK-NEXT: seqz s3, a0
-; CHECK-NEXT: mv a0, s0
-; CHECK-NEXT: mv a1, s1
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: mv a3, s2
-; CHECK-NEXT: call __unordtf2
-; CHECK-NEXT: snez a0, a0
-; CHECK-NEXT: or a0, a0, s3
-; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 48
+; CHECK-NEXT: srli a1, a1, 1
+; CHECK-NEXT: lui a0, 32767
+; CHECK-NEXT: slli a0, a0, 36
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: slt a0, a0, a1
; CHECK-NEXT: ret
%abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
%ret = fcmp ueq fp128 %abs, 0xL00000000000000007FFF000000000000
@@ -159,41 +123,11 @@ define i1 @test_fp128_is_inf_or_nan(fp128 %arg) {
define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) {
; CHECK-LABEL: test_fp128_is_not_inf_or_nan:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -48
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset ra, -8
-; CHECK-NEXT: .cfi_offset s0, -16
-; CHECK-NEXT: .cfi_offset s1, -24
-; CHECK-NEXT: .cfi_offset s2, -32
-; CHECK-NEXT: .cfi_offset s3, -40
-; CHECK-NEXT: mv s0, a0
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: srli s1, a1, 1
-; CHECK-NEXT: lui s2, 32767
-; CHECK-NEXT: slli s2, s2, 36
-; CHECK-NEXT: mv a1, s1
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: mv a3, s2
-; CHECK-NEXT: call __eqtf2
-; CHECK-NEXT: snez s3, a0
-; CHECK-NEXT: mv a0, s0
-; CHECK-NEXT: mv a1, s1
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: mv a3, s2
-; CHECK-NEXT: call __unordtf2
-; CHECK-NEXT: seqz a0, a0
-; CHECK-NEXT: and a0, a0, s3
-; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 48
+; CHECK-NEXT: srli a1, a1, 1
+; CHECK-NEXT: lui a0, 32767
+; CHECK-NEXT: slli a0, a0, 36
+; CHECK-NEXT: slt a0, a1, a0
; CHECK-NEXT: ret
%abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
%ret = fcmp one fp128 %abs, 0xL00000000000000007FFF000000000000
@@ -203,19 +137,13 @@ define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) {
define i1 @test_fp128_is_inf(fp128 %arg) {
; CHECK-LABEL: test_fp128_is_inf:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset ra, -8
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: srli a1, a1, 1
-; CHECK-NEXT: lui a3, 32767
-; CHECK-NEXT: slli a3, a3, 36
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: call __eqtf2
+; CHECK-NEXT: lui a2, 32767
+; CHECK-NEXT: slli a2, a2, 36
+; CHECK-NEXT: xor a1, a1, a2
+; CHECK-NEXT: or a0, a0, a1
; CHECK-NEXT: seqz a0, a0
-; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
%ret = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000
@@ -225,19 +153,13 @@ define i1 @test_fp128_is_inf(fp128 %arg) {
define i1 @test_fp128_is_not_inf(fp128 %arg) {
; CHECK-LABEL: test_fp128_is_not_inf:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset ra, -8
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: srli a1, a1, 1
-; CHECK-NEXT: lui a3, 32767
-; CHECK-NEXT: slli a3, a3, 36
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: call __netf2
+; CHECK-NEXT: lui a2, 32767
+; CHECK-NEXT: slli a2, a2, 36
+; CHECK-NEXT: xor a1, a1, a2
+; CHECK-NEXT: or a0, a0, a1
; CHECK-NEXT: snez a0, a0
-; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
%ret = fcmp une fp128 %abs, 0xL00000000000000007FFF000000000000
diff --git a/llvm/test/CodeGen/X86/compare-inf.ll b/llvm/test/CodeGen/X86/compare-inf.ll
index 5beec4d76e22e7..499ab98ab12cb0 100644
--- a/llvm/test/CodeGen/X86/compare-inf.ll
+++ b/llvm/test/CodeGen/X86/compare-inf.ll
@@ -1,14 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
-; Convert oeq and une to ole/oge/ule/uge when comparing with infinity
-; and negative infinity, because those are more efficient on x86.
-
declare void @f() nounwind
-; CHECK-LABEL: oeq_inff:
-; CHECK: ucomiss
-; CHECK: jb
define void @oeq_inff(float %x) nounwind {
+; CHECK-LABEL: oeq_inff:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: jb .LBB0_2
+; CHECK-NEXT: # %bb.1: # %true
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq f at PLT
+; CHECK-NEXT: addq $8, %rsp
+; CHECK-NEXT: .LBB0_2: # %false
+; CHECK-NEXT: retq
%t0 = fcmp oeq float %x, 0x7FF0000000000000
br i1 %t0, label %true, label %false
@@ -20,10 +25,17 @@ false:
ret void
}
-; CHECK-LABEL: oeq_inf:
-; CHECK: ucomisd
-; CHECK: jb
define void @oeq_inf(double %x) nounwind {
+; CHECK-LABEL: oeq_inf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: jb .LBB1_2
+; CHECK-NEXT: # %bb.1: # %true
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq f at PLT
+; CHECK-NEXT: addq $8, %rsp
+; CHECK-NEXT: .LBB1_2: # %false
+; CHECK-NEXT: retq
%t0 = fcmp oeq double %x, 0x7FF0000000000000
br i1 %t0, label %true, label %false
@@ -35,10 +47,17 @@ false:
ret void
}
-; CHECK-LABEL: une_inff:
-; CHECK: ucomiss
-; CHECK: jae
define void @une_inff(float %x) nounwind {
+; CHECK-LABEL: une_inff:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: jae .LBB2_2
+; CHECK-NEXT: # %bb.1: # %true
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq f at PLT
+; CHECK-NEXT: addq $8, %rsp
+; CHECK-NEXT: .LBB2_2: # %false
+; CHECK-NEXT: retq
%t0 = fcmp une float %x, 0x7FF0000000000000
br i1 %t0, label %true, label %false
@@ -50,10 +69,17 @@ false:
ret void
}
-; CHECK-LABEL: une_inf:
-; CHECK: ucomisd
-; CHECK: jae
define void @une_inf(double %x) nounwind {
+; CHECK-LABEL: une_inf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: jae .LBB3_2
+; CHECK-NEXT: # %bb.1: # %true
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq f at PLT
+; CHECK-NEXT: addq $8, %rsp
+; CHECK-NEXT: .LBB3_2: # %false
+; CHECK-NEXT: retq
%t0 = fcmp une double %x, 0x7FF0000000000000
br i1 %t0, label %true, label %false
@@ -65,10 +91,18 @@ false:
ret void
}
-; CHECK-LABEL: oeq_neg_inff:
-; CHECK: ucomiss
-; CHECK: jb
define void @oeq_neg_inff(float %x) nounwind {
+; CHECK-LABEL: oeq_neg_inff:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm1 = [-Inf,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: ucomiss %xmm0, %xmm1
+; CHECK-NEXT: jb .LBB4_2
+; CHECK-NEXT: # %bb.1: # %true
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq f at PLT
+; CHECK-NEXT: addq $8, %rsp
+; CHECK-NEXT: .LBB4_2: # %false
+; CHECK-NEXT: retq
%t0 = fcmp oeq float %x, 0xFFF0000000000000
br i1 %t0, label %true, label %false
@@ -80,10 +114,18 @@ false:
ret void
}
-; CHECK-LABEL: oeq_neg_inf:
-; CHECK: ucomisd
-; CHECK: jb
define void @oeq_neg_inf(double %x) nounwind {
+; CHECK-LABEL: oeq_neg_inf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = [-Inf,0.0E+0]
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+; CHECK-NEXT: jb .LBB5_2
+; CHECK-NEXT: # %bb.1: # %true
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq f at PLT
+; CHECK-NEXT: addq $8, %rsp
+; CHECK-NEXT: .LBB5_2: # %false
+; CHECK-NEXT: retq
%t0 = fcmp oeq double %x, 0xFFF0000000000000
br i1 %t0, label %true, label %false
@@ -95,10 +137,18 @@ false:
ret void
}
-; CHECK-LABEL: une_neg_inff:
-; CHECK: ucomiss
-; CHECK: jae
define void @une_neg_inff(float %x) nounwind {
+; CHECK-LABEL: une_neg_inff:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm1 = [-Inf,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: ucomiss %xmm0, %xmm1
+; CHECK-NEXT: jae .LBB6_2
+; CHECK-NEXT: # %bb.1: # %true
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq f at PLT
+; CHECK-NEXT: addq $8, %rsp
+; CHECK-NEXT: .LBB6_2: # %false
+; CHECK-NEXT: retq
%t0 = fcmp une float %x, 0xFFF0000000000000
br i1 %t0, label %true, label %false
@@ -110,10 +160,18 @@ false:
ret void
}
-; CHECK-LABEL: une_neg_inf:
-; CHECK: ucomisd
-; CHECK: jae
define void @une_neg_inf(double %x) nounwind {
+; CHECK-LABEL: une_neg_inf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = [-Inf,0.0E+0]
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+; CHECK-NEXT: jae .LBB7_2
+; CHECK-NEXT: # %bb.1: # %true
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq f at PLT
+; CHECK-NEXT: addq $8, %rsp
+; CHECK-NEXT: .LBB7_2: # %false
+; CHECK-NEXT: retq
%t0 = fcmp une double %x, 0xFFF0000000000000
br i1 %t0, label %true, label %false
diff --git a/llvm/test/CodeGen/X86/fpclass-test.ll b/llvm/test/CodeGen/X86/fpclass-test.ll
index 7d8a3f37d1b3ee..bf3e191efd24ad 100644
--- a/llvm/test/CodeGen/X86/fpclass-test.ll
+++ b/llvm/test/CodeGen/X86/fpclass-test.ll
@@ -4,9 +4,12 @@
define i1 @test_is_inf_or_nan(double %arg) {
; CHECK-LABEL: test_is_inf_or_nan:
; CHECK: # %bb.0:
-; CHECK-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; CHECK-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: sete %al
+; CHECK-NEXT: vmovq %xmm0, %rax
+; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
+; CHECK-NEXT: andq %rax, %rcx
+; CHECK-NEXT: movabsq $9218868437227405311, %rax # imm = 0x7FEFFFFFFFFFFFFF
+; CHECK-NEXT: cmpq %rax, %rcx
+; CHECK-NEXT: setg %al
; CHECK-NEXT: retq
%abs = tail call double @llvm.fabs.f64(double %arg)
%ret = fcmp ueq double %abs, 0x7FF0000000000000
@@ -16,9 +19,12 @@ define i1 @test_is_inf_or_nan(double %arg) {
define i1 @test_is_not_inf_or_nan(double %arg) {
; CHECK-LABEL: test_is_not_inf_or_nan:
; CHECK: # %bb.0:
-; CHECK-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; CHECK-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: setne %al
+; CHECK-NEXT: vmovq %xmm0, %rax
+; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
+; CHECK-NEXT: andq %rax, %rcx
+; CHECK-NEXT: movabsq $9218868437227405312, %rax # imm = 0x7FF0000000000000
+; CHECK-NEXT: cmpq %rax, %rcx
+; CHECK-NEXT: setl %al
; CHECK-NEXT: retq
%abs = tail call double @llvm.fabs.f64(double %arg)
%ret = fcmp one double %abs, 0x7FF0000000000000
@@ -28,9 +34,12 @@ define i1 @test_is_not_inf_or_nan(double %arg) {
define i1 @test_is_inf(double %arg) {
; CHECK-LABEL: test_is_inf:
; CHECK: # %bb.0:
-; CHECK-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; CHECK-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: setae %al
+; CHECK-NEXT: vmovq %xmm0, %rax
+; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
+; CHECK-NEXT: andq %rax, %rcx
+; CHECK-NEXT: movabsq $9218868437227405312, %rax # imm = 0x7FF0000000000000
+; CHECK-NEXT: cmpq %rax, %rcx
+; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%abs = tail call double @llvm.fabs.f64(double %arg)
%ret = fcmp oeq double %abs, 0x7FF0000000000000
@@ -40,9 +49,12 @@ define i1 @test_is_inf(double %arg) {
define i1 @test_is_not_inf(double %arg) {
; CHECK-LABEL: test_is_not_inf:
; CHECK: # %bb.0:
-; CHECK-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; CHECK-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: setb %al
+; CHECK-NEXT: vmovq %xmm0, %rax
+; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
+; CHECK-NEXT: andq %rax, %rcx
+; CHECK-NEXT: movabsq $9218868437227405312, %rax # imm = 0x7FF0000000000000
+; CHECK-NEXT: cmpq %rax, %rcx
+; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%abs = tail call double @llvm.fabs.f64(double %arg)
%ret = fcmp une double %abs, 0x7FF0000000000000
@@ -52,11 +64,11 @@ define i1 @test_is_not_inf(double %arg) {
define <4 x i1> @test_vec_is_inf_or_nan(<4 x double> %arg) {
; CHECK-LABEL: test_vec_is_inf_or_nan:
; CHECK: # %bb.0:
-; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN]
-; CHECK-NEXT: vandpd %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [+Inf,+Inf,+Inf,+Inf]
-; CHECK-NEXT: vcmpeq_uqpd %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807]
+; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9218868437227405311,9218868437227405311,9218868437227405311,9218868437227405311]
+; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
@@ -68,11 +80,11 @@ define <4 x i1> @test_vec_is_inf_or_nan(<4 x double> %arg) {
define <4 x i1> @test_vec_is_not_inf_or_nan(<4 x double> %arg) {
; CHECK-LABEL: test_vec_is_not_inf_or_nan:
; CHECK: # %bb.0:
-; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN]
-; CHECK-NEXT: vandpd %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [+Inf,+Inf,+Inf,+Inf]
-; CHECK-NEXT: vcmpneq_oqpd %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807]
+; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9218868437227405312,9218868437227405312,9218868437227405312,9218868437227405312]
+; CHECK-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
@@ -84,11 +96,11 @@ define <4 x i1> @test_vec_is_not_inf_or_nan(<4 x double> %arg) {
define <4 x i1> @test_vec_is_inf(<4 x double> %arg) {
; CHECK-LABEL: test_vec_is_inf:
; CHECK: # %bb.0:
-; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN]
-; CHECK-NEXT: vandpd %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [+Inf,+Inf,+Inf,+Inf]
-; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807]
+; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9218868437227405312,9218868437227405312,9218868437227405312,9218868437227405312]
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
@@ -100,11 +112,13 @@ define <4 x i1> @test_vec_is_inf(<4 x double> %arg) {
define <4 x i1> @test_vec_is_not_inf(<4 x double> %arg) {
; CHECK-LABEL: test_vec_is_not_inf:
; CHECK: # %bb.0:
-; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN]
-; CHECK-NEXT: vandpd %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [+Inf,+Inf,+Inf,+Inf]
-; CHECK-NEXT: vcmpneqpd %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807]
+; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9218868437227405312,9218868437227405312,9218868437227405312,9218868437227405312]
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
@@ -116,27 +130,12 @@ define <4 x i1> @test_vec_is_not_inf(<4 x double> %arg) {
define i1 @test_fp128_is_inf_or_nan(fp128 %arg) {
; CHECK-LABEL: test_fp128_is_inf_or_nan:
; CHECK: # %bb.0:
-; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: subq $16, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset %rbx, -16
-; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [+Inf]
-; CHECK-NEXT: callq __eqtf2 at PLT
-; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: sete %bl
-; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [+Inf]
-; CHECK-NEXT: callq __unordtf2 at PLT
-; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: setne %al
-; CHECK-NEXT: orb %bl, %al
-; CHECK-NEXT: addq $16, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
+; CHECK-NEXT: andq -{{[0-9]+}}(%rsp), %rax
+; CHECK-NEXT: movabsq $9223090561878065151, %rcx # imm = 0x7FFEFFFFFFFFFFFF
+; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: setg %al
; CHECK-NEXT: retq
%abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
%ret = fcmp ueq fp128 %abs, 0xL00000000000000007FFF000000000000
@@ -146,27 +145,12 @@ define i1 @test_fp128_is_inf_or_nan(fp128 %arg) {
define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) {
; CHECK-LABEL: test_fp128_is_not_inf_or_nan:
; CHECK: # %bb.0:
-; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: subq $16, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset %rbx, -16
-; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [+Inf]
-; CHECK-NEXT: callq __eqtf2 at PLT
-; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: setne %bl
-; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [+Inf]
-; CHECK-NEXT: callq __unordtf2 at PLT
-; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: andb %bl, %al
-; CHECK-NEXT: addq $16, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
+; CHECK-NEXT: andq -{{[0-9]+}}(%rsp), %rax
+; CHECK-NEXT: movabsq $9223090561878065152, %rcx # imm = 0x7FFF000000000000
+; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: setl %al
; CHECK-NEXT: retq
%abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
%ret = fcmp one fp128 %abs, 0xL00000000000000007FFF000000000000
@@ -212,13 +196,18 @@ define i1 @test_fp128_is_not_inf(fp128 %arg) {
define i1 @test_x86_fp80_is_inf_or_nan(x86_fp80 %arg) {
; CHECK-LABEL: test_x86_fp80_is_inf_or_nan:
; CHECK: # %bb.0:
-; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
-; CHECK-NEXT: fabs
-; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; CHECK-NEXT: fxch %st(1)
-; CHECK-NEXT: fucompi %st(1), %st
-; CHECK-NEXT: fstp %st(0)
-; CHECK-NEXT: sete %al
+; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: btq $63, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: setae %cl
+; CHECK-NEXT: andl $32767, %eax # imm = 0x7FFF
+; CHECK-NEXT: leal -1(%rax), %edx
+; CHECK-NEXT: movzwl %dx, %edx
+; CHECK-NEXT: cmpl $32766, %edx # imm = 0x7FFE
+; CHECK-NEXT: setae %dl
+; CHECK-NEXT: orb %cl, %dl
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: andb %dl, %al
; CHECK-NEXT: retq
%abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg)
%ret = fcmp ueq x86_fp80 %abs, 0xK7FFF8000000000000000
@@ -228,13 +217,18 @@ define i1 @test_x86_fp80_is_inf_or_nan(x86_fp80 %arg) {
define i1 @test_x86_fp80_is_not_inf_or_nan(x86_fp80 %arg) {
; CHECK-LABEL: test_x86_fp80_is_not_inf_or_nan:
; CHECK: # %bb.0:
-; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
-; CHECK-NEXT: fabs
-; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; CHECK-NEXT: fxch %st(1)
-; CHECK-NEXT: fucompi %st(1), %st
-; CHECK-NEXT: fstp %st(0)
-; CHECK-NEXT: setne %al
+; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx
+; CHECK-NEXT: shrq $63, %rcx
+; CHECK-NEXT: andl $32767, %eax # imm = 0x7FFF
+; CHECK-NEXT: leal -1(%rax), %edx
+; CHECK-NEXT: movzwl %dx, %edx
+; CHECK-NEXT: cmpl $32766, %edx # imm = 0x7FFE
+; CHECK-NEXT: setb %dl
+; CHECK-NEXT: andb %cl, %dl
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: orb %dl, %al
; CHECK-NEXT: retq
%abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg)
%ret = fcmp one x86_fp80 %abs, 0xK7FFF8000000000000000
@@ -244,13 +238,13 @@ define i1 @test_x86_fp80_is_not_inf_or_nan(x86_fp80 %arg) {
define i1 @test_x86_fp80_is_inf(x86_fp80 %arg) {
; CHECK-LABEL: test_x86_fp80_is_inf:
; CHECK: # %bb.0:
-; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
-; CHECK-NEXT: fabs
-; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; CHECK-NEXT: fxch %st(1)
-; CHECK-NEXT: fucompi %st(1), %st
-; CHECK-NEXT: fstp %st(0)
-; CHECK-NEXT: setae %al
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: notl %eax
+; CHECK-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; CHECK-NEXT: xorq {{[0-9]+}}(%rsp), %rcx
+; CHECK-NEXT: andl $32767, %eax # imm = 0x7FFF
+; CHECK-NEXT: orq %rcx, %rax
+; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg)
%ret = fcmp oeq x86_fp80 %abs, 0xK7FFF8000000000000000
@@ -260,13 +254,13 @@ define i1 @test_x86_fp80_is_inf(x86_fp80 %arg) {
define i1 @test_x86_fp80_is_not_inf(x86_fp80 %arg) {
; CHECK-LABEL: test_x86_fp80_is_not_inf:
; CHECK: # %bb.0:
-; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
-; CHECK-NEXT: fabs
-; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; CHECK-NEXT: fxch %st(1)
-; CHECK-NEXT: fucompi %st(1), %st
-; CHECK-NEXT: fstp %st(0)
-; CHECK-NEXT: setb %al
+; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; CHECK-NEXT: notl %eax
+; CHECK-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; CHECK-NEXT: xorq {{[0-9]+}}(%rsp), %rcx
+; CHECK-NEXT: andl $32767, %eax # imm = 0x7FFF
+; CHECK-NEXT: orq %rcx, %rax
+; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%abs = tail call x86_fp80 @llvm.fabs.f80(x86_fp80 %arg)
%ret = fcmp une x86_fp80 %abs, 0xK7FFF8000000000000000
More information about the llvm-commits
mailing list