[llvm] [CodeGen] Add more more inverse cases for is_fpclass (PR #121519)
Victor Mustya via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 2 12:15:18 PST 2025
https://github.com/vmustya created https://github.com/llvm/llvm-project/pull/121519
It's more efficient to inverse the is_fpclass mask for the cases where
zero, infinite and NaN flags are all set in the mask. In this case, we
can just check that the input value is not normal and not subnormal.
>From 62dd5700098ede8f0ce62d75ccbe54d5ec1685a2 Mon Sep 17 00:00:00 2001
From: Victor Mustya <victor.mustya at intel.com>
Date: Thu, 2 Jan 2025 12:11:16 -0800
Subject: [PATCH] [CodeGen] Add more more inverse cases for is_fpclass
It's more efficient to inverse the is_fpclass mask for the cases where
zero, infinite and NaN flags are all set in the mask. In this case, we
can just check that the input value is not normal and not subnormal.
---
llvm/lib/CodeGen/CodeGenCommonISel.cpp | 3 +
llvm/test/CodeGen/RISCV/float-intrinsics.ll | 60 +++-----
llvm/test/CodeGen/X86/is_fpclass.ll | 151 ++++++++++++++++++++
3 files changed, 176 insertions(+), 38 deletions(-)
diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
index 4cd2f6ae2fdb11..e6cf620e71316b 100644
--- a/llvm/lib/CodeGen/CodeGenCommonISel.cpp
+++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
@@ -200,6 +200,9 @@ FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp) {
case fcZero | fcNan:
case fcSubnormal | fcZero:
case fcSubnormal | fcZero | fcNan:
+ case fcNormal | fcSubnormal:
+ case fcPosNormal | fcPosSubnormal:
+ case fcNegNormal | fcNegSubnormal:
return InvertedTest;
case fcInf | fcNan:
case fcPosInf | fcNan:
diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/float-intrinsics.ll
index e27b5e27dec1d3..1df4f541e58d17 100644
--- a/llvm/test/CodeGen/RISCV/float-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/float-intrinsics.ll
@@ -1640,55 +1640,39 @@ define i1 @fpclass(float %x) {
;
; RV32I-LABEL: fpclass:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a1, a0, 1
+; RV32I-NEXT: addi a1, a0, -1
; RV32I-NEXT: lui a2, 2048
+; RV32I-NEXT: slli a3, a0, 1
+; RV32I-NEXT: addi a2, a2, -2
+; RV32I-NEXT: sltu a1, a2, a1
+; RV32I-NEXT: lui a2, 1046528
+; RV32I-NEXT: srli a3, a3, 1
+; RV32I-NEXT: add a2, a3, a2
+; RV32I-NEXT: srli a2, a2, 24
+; RV32I-NEXT: sltiu a2, a2, 127
+; RV32I-NEXT: xori a2, a2, 1
; RV32I-NEXT: slti a0, a0, 0
-; RV32I-NEXT: lui a3, 522240
-; RV32I-NEXT: lui a4, 1046528
-; RV32I-NEXT: srli a1, a1, 1
-; RV32I-NEXT: addi a2, a2, -1
-; RV32I-NEXT: addi a5, a1, -1
-; RV32I-NEXT: sltu a2, a5, a2
-; RV32I-NEXT: xor a5, a1, a3
-; RV32I-NEXT: slt a3, a3, a1
-; RV32I-NEXT: add a4, a1, a4
-; RV32I-NEXT: seqz a1, a1
-; RV32I-NEXT: seqz a5, a5
-; RV32I-NEXT: srli a4, a4, 24
-; RV32I-NEXT: and a2, a2, a0
-; RV32I-NEXT: or a1, a1, a5
-; RV32I-NEXT: sltiu a4, a4, 127
-; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: or a1, a1, a3
-; RV32I-NEXT: and a0, a4, a0
-; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: or a0, a2, a0
+; RV32I-NEXT: and a0, a1, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: fpclass:
; RV64I: # %bb.0:
; RV64I-NEXT: sext.w a1, a0
+; RV64I-NEXT: addiw a2, a0, -1
+; RV64I-NEXT: lui a3, 2048
; RV64I-NEXT: slli a0, a0, 33
-; RV64I-NEXT: lui a2, 2048
-; RV64I-NEXT: lui a3, 522240
-; RV64I-NEXT: lui a4, 1046528
+; RV64I-NEXT: addiw a3, a3, -2
+; RV64I-NEXT: sltu a2, a3, a2
+; RV64I-NEXT: lui a3, 1046528
; RV64I-NEXT: srli a0, a0, 33
-; RV64I-NEXT: addiw a2, a2, -1
+; RV64I-NEXT: add a0, a0, a3
+; RV64I-NEXT: srliw a0, a0, 24
+; RV64I-NEXT: sltiu a0, a0, 127
+; RV64I-NEXT: xori a0, a0, 1
; RV64I-NEXT: slti a1, a1, 0
-; RV64I-NEXT: addi a5, a0, -1
-; RV64I-NEXT: sltu a2, a5, a2
-; RV64I-NEXT: xor a5, a0, a3
-; RV64I-NEXT: slt a3, a3, a0
-; RV64I-NEXT: add a4, a0, a4
-; RV64I-NEXT: seqz a0, a0
-; RV64I-NEXT: seqz a5, a5
-; RV64I-NEXT: srliw a4, a4, 24
-; RV64I-NEXT: and a2, a2, a1
-; RV64I-NEXT: or a0, a0, a5
-; RV64I-NEXT: sltiu a4, a4, 127
-; RV64I-NEXT: or a0, a0, a2
-; RV64I-NEXT: or a0, a0, a3
-; RV64I-NEXT: and a1, a4, a1
; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: and a0, a2, a0
; RV64I-NEXT: ret
%cmp = call i1 @llvm.is.fpclass.f32(float %x, i32 639)
ret i1 %cmp
diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll
index 97136dafa6c2c0..bfaf58a707203f 100644
--- a/llvm/test/CodeGen/X86/is_fpclass.ll
+++ b/llvm/test/CodeGen/X86/is_fpclass.ll
@@ -2901,6 +2901,157 @@ define i1 @not_issubnormal_or_zero_or_qnan_f(float %x) {
ret i1 %class
}
+define i1 @not_ispositive_normal_or_subnormal_f(float %x) {
+; X86-LABEL: not_ispositive_normal_or_subnormal_f:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: sets %cl
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: andl $2147483647, %edx # imm = 0x7FFFFFFF
+; X86-NEXT: addl $-8388608, %edx # imm = 0xFF800000
+; X86-NEXT: cmpl $2130706432, %edx # imm = 0x7F000000
+; X86-NEXT: setae %dl
+; X86-NEXT: orb %cl, %dl
+; X86-NEXT: decl %eax
+; X86-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF
+; X86-NEXT: setae %al
+; X86-NEXT: andb %dl, %al
+; X86-NEXT: retl
+;
+; X64-GENERIC-LABEL: not_ispositive_normal_or_subnormal_f:
+; X64-GENERIC: # %bb.0:
+; X64-GENERIC-NEXT: movd %xmm0, %eax
+; X64-GENERIC-NEXT: testl %eax, %eax
+; X64-GENERIC-NEXT: sets %cl
+; X64-GENERIC-NEXT: movl %eax, %edx
+; X64-GENERIC-NEXT: andl $2147483647, %edx # imm = 0x7FFFFFFF
+; X64-GENERIC-NEXT: addl $-8388608, %edx # imm = 0xFF800000
+; X64-GENERIC-NEXT: cmpl $2130706432, %edx # imm = 0x7F000000
+; X64-GENERIC-NEXT: setae %dl
+; X64-GENERIC-NEXT: orb %cl, %dl
+; X64-GENERIC-NEXT: decl %eax
+; X64-GENERIC-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF
+; X64-GENERIC-NEXT: setae %al
+; X64-GENERIC-NEXT: andb %dl, %al
+; X64-GENERIC-NEXT: retq
+;
+; X64-NDD-LABEL: not_ispositive_normal_or_subnormal_f:
+; X64-NDD: # %bb.0:
+; X64-NDD-NEXT: movd %xmm0, %eax
+; X64-NDD-NEXT: testl %eax, %eax
+; X64-NDD-NEXT: sets %cl
+; X64-NDD-NEXT: andl $2147483647, %eax, %edx # imm = 0x7FFFFFFF
+; X64-NDD-NEXT: addl $-8388608, %edx # imm = 0xFF800000
+; X64-NDD-NEXT: cmpl $2130706432, %edx # imm = 0x7F000000
+; X64-NDD-NEXT: setae %dl
+; X64-NDD-NEXT: orb %dl, %cl
+; X64-NDD-NEXT: decl %eax
+; X64-NDD-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF
+; X64-NDD-NEXT: setae %al
+; X64-NDD-NEXT: andb %cl, %al
+; X64-NDD-NEXT: retq
+ %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 639) ; ~(0x100|0x80) = ~"posnormal|possubnormal"
+ ret i1 %class
+}
+
+define i1 @not_isnegative_normal_or_subnormal_f(float %x) {
+; X86-LABEL: not_isnegative_normal_or_subnormal_f:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
+; X86-NEXT: leal -8388608(%ecx), %edx
+; X86-NEXT: cmpl $2130706432, %edx # imm = 0x7F000000
+; X86-NEXT: setae %dl
+; X86-NEXT: decl %ecx
+; X86-NEXT: cmpl $8388607, %ecx # imm = 0x7FFFFF
+; X86-NEXT: setae %cl
+; X86-NEXT: andb %dl, %cl
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: setns %al
+; X86-NEXT: orb %cl, %al
+; X86-NEXT: retl
+;
+; X64-GENERIC-LABEL: not_isnegative_normal_or_subnormal_f:
+; X64-GENERIC: # %bb.0:
+; X64-GENERIC-NEXT: movd %xmm0, %eax
+; X64-GENERIC-NEXT: movl %eax, %ecx
+; X64-GENERIC-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
+; X64-GENERIC-NEXT: leal -8388608(%rcx), %edx
+; X64-GENERIC-NEXT: cmpl $2130706432, %edx # imm = 0x7F000000
+; X64-GENERIC-NEXT: setae %dl
+; X64-GENERIC-NEXT: decl %ecx
+; X64-GENERIC-NEXT: cmpl $8388607, %ecx # imm = 0x7FFFFF
+; X64-GENERIC-NEXT: setae %cl
+; X64-GENERIC-NEXT: andb %dl, %cl
+; X64-GENERIC-NEXT: testl %eax, %eax
+; X64-GENERIC-NEXT: setns %al
+; X64-GENERIC-NEXT: orb %cl, %al
+; X64-GENERIC-NEXT: retq
+;
+; X64-NDD-LABEL: not_isnegative_normal_or_subnormal_f:
+; X64-NDD: # %bb.0:
+; X64-NDD-NEXT: movd %xmm0, %eax
+; X64-NDD-NEXT: andl $2147483647, %eax, %ecx # imm = 0x7FFFFFFF
+; X64-NDD-NEXT: addl $-8388608, %ecx, %edx # imm = 0xFF800000
+; X64-NDD-NEXT: cmpl $2130706432, %edx # imm = 0x7F000000
+; X64-NDD-NEXT: setae %dl
+; X64-NDD-NEXT: decl %ecx
+; X64-NDD-NEXT: cmpl $8388607, %ecx # imm = 0x7FFFFF
+; X64-NDD-NEXT: setae %cl
+; X64-NDD-NEXT: andb %dl, %cl
+; X64-NDD-NEXT: testl %eax, %eax
+; X64-NDD-NEXT: setns %al
+; X64-NDD-NEXT: orb %cl, %al
+; X64-NDD-NEXT: retq
+ %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 999) ; ~(0x10|0x8) = ~"negsubnormal|negnormal"
+ ret i1 %class
+}
+
+define i1 @not_isnormal_or_subnormal_f(float %x) {
+; X86-LABEL: not_isnormal_or_subnormal_f:
+; X86: # %bb.0:
+; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal -8388608(%eax), %ecx
+; X86-NEXT: cmpl $2130706432, %ecx # imm = 0x7F000000
+; X86-NEXT: setae %cl
+; X86-NEXT: decl %eax
+; X86-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF
+; X86-NEXT: setae %al
+; X86-NEXT: andb %cl, %al
+; X86-NEXT: retl
+;
+; X64-GENERIC-LABEL: not_isnormal_or_subnormal_f:
+; X64-GENERIC: # %bb.0:
+; X64-GENERIC-NEXT: movd %xmm0, %eax
+; X64-GENERIC-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-GENERIC-NEXT: leal -8388608(%rax), %ecx
+; X64-GENERIC-NEXT: cmpl $2130706432, %ecx # imm = 0x7F000000
+; X64-GENERIC-NEXT: setae %cl
+; X64-GENERIC-NEXT: decl %eax
+; X64-GENERIC-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF
+; X64-GENERIC-NEXT: setae %al
+; X64-GENERIC-NEXT: andb %cl, %al
+; X64-GENERIC-NEXT: retq
+;
+; X64-NDD-LABEL: not_isnormal_or_subnormal_f:
+; X64-NDD: # %bb.0:
+; X64-NDD-NEXT: movd %xmm0, %eax
+; X64-NDD-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NDD-NEXT: addl $-8388608, %eax, %ecx # imm = 0xFF800000
+; X64-NDD-NEXT: cmpl $2130706432, %ecx # imm = 0x7F000000
+; X64-NDD-NEXT: setae %cl
+; X64-NDD-NEXT: decl %eax
+; X64-NDD-NEXT: cmpl $8388607, %eax # imm = 0x7FFFFF
+; X64-NDD-NEXT: setae %al
+; X64-NDD-NEXT: andb %cl, %al
+; X64-NDD-NEXT: retq
+ %class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 615) ; ~(0x110|0x88) = ~"normal|subnormal"
+ ret i1 %class
+}
+
declare i1 @llvm.is.fpclass.f32(float, i32)
declare i1 @llvm.is.fpclass.f64(double, i32)
declare <1 x i1> @llvm.is.fpclass.v1f32(<1 x float>, i32)
More information about the llvm-commits
mailing list