[llvm] [DAGCombiner] Fold and/or of NaN SETCC - tests follow up (PR #136168)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 17 10:39:32 PDT 2025
https://github.com/AlexMaclean created https://github.com/llvm/llvm-project/pull/136168
Follow up to https://github.com/llvm/llvm-project/pull/135645 to address test cleanup review.
>From d144da63d0f5fb309ac868bb9e13b4d4ae1e882b Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Thu, 17 Apr 2025 17:37:39 +0000
Subject: [PATCH] [DAGCombiner] Fold and/or of NaN SETCC - tests follow up
---
llvm/test/CodeGen/NVPTX/and-or-setcc.ll | 4 +-
llvm/test/CodeGen/X86/and-or-setcc.ll | 118 ++++++++++++++++++++++++
2 files changed, 120 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/NVPTX/and-or-setcc.ll b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll
index 21be9df94d553..6c3514c1ad946 100644
--- a/llvm/test/CodeGen/NVPTX/and-or-setcc.ll
+++ b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %}
+; RUN: llc < %s | FileCheck %s
+; RUN: %if ptxas %{ llc < %s | %ptxas-verify %}
target triple = "nvptx64-nvidia-cuda"
diff --git a/llvm/test/CodeGen/X86/and-or-setcc.ll b/llvm/test/CodeGen/X86/and-or-setcc.ll
index cb8ecca9348e6..a6a9362908811 100644
--- a/llvm/test/CodeGen/X86/and-or-setcc.ll
+++ b/llvm/test/CodeGen/X86/and-or-setcc.ll
@@ -55,3 +55,121 @@ define i1 @or_uno(float %a, float %b) {
%e = or i1 %c, %d
ret i1 %e
}
+
+define <4 x i1> @and_ord_vec(<4 x float> %a, <4 x float> %b) {
+; X86-LABEL: and_ord_vec:
+; X86: # %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fstps (%esp) # 4-byte Folded Spill
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fucompp
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: fucompp
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: fucompp
+; X86-NEXT: setnp %dh
+; X86-NEXT: shlb $2, %dh
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: flds (%esp) # 4-byte Folded Reload
+; X86-NEXT: fxch %st(1)
+; X86-NEXT: fucompp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: setnp %dl
+; X86-NEXT: shlb $3, %dl
+; X86-NEXT: orb %dh, %dl
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setnp %dh
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setnp %al
+; X86-NEXT: addb %al, %al
+; X86-NEXT: orb %dh, %al
+; X86-NEXT: orb %dl, %al
+; X86-NEXT: movb %al, (%ecx)
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: popl %ecx
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl $4
+;
+; X64-LABEL: and_ord_vec:
+; X64: # %bb.0:
+; X64-NEXT: cmpordps %xmm1, %xmm0
+; X64-NEXT: retq
+ %c = fcmp ord <4 x float> %a, %a
+ %d = fcmp ord <4 x float> %b, %b
+ %e = and <4 x i1> %c, %d
+ ret <4 x i1> %e
+}
+
+define <4 x i1> @or_uno_vec(<4 x float> %a, <4 x float> %b) {
+; X86-LABEL: or_uno_vec:
+; X86: # %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fstps (%esp) # 4-byte Folded Spill
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fucompp
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: fucompp
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: fucompp
+; X86-NEXT: setp %dh
+; X86-NEXT: shlb $2, %dh
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: flds (%esp) # 4-byte Folded Reload
+; X86-NEXT: fxch %st(1)
+; X86-NEXT: fucompp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: setp %dl
+; X86-NEXT: shlb $3, %dl
+; X86-NEXT: orb %dh, %dl
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setp %dh
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setp %al
+; X86-NEXT: addb %al, %al
+; X86-NEXT: orb %dh, %al
+; X86-NEXT: orb %dl, %al
+; X86-NEXT: movb %al, (%ecx)
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: popl %ecx
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl $4
+;
+; X64-LABEL: or_uno_vec:
+; X64: # %bb.0:
+; X64-NEXT: cmpunordps %xmm1, %xmm0
+; X64-NEXT: retq
+ %c = fcmp uno <4 x float> %a, %a
+ %d = fcmp uno <4 x float> %b, %b
+ %e = or <4 x i1> %c, %d
+ ret <4 x i1> %e
+}
More information about the llvm-commits
mailing list