[llvm] 1bfd444 - [DAGCombiner] Fold and/or of NaN SETCC (#135645)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 16 06:48:46 PDT 2025
Author: Alex MacLean
Date: 2025-04-16T06:48:42-07:00
New Revision: 1bfd44462886b167f0d82e44e6a9856a830c1f8b
URL: https://github.com/llvm/llvm-project/commit/1bfd44462886b167f0d82e44e6a9856a830c1f8b
DIFF: https://github.com/llvm/llvm-project/commit/1bfd44462886b167f0d82e44e6a9856a830c1f8b.diff
LOG: [DAGCombiner] Fold and/or of NaN SETCC (#135645)
Fold an AND or OR of two NaN SETCC nodes into a single SETCC where
possible. This optimization already exists in InstCombine but adding in
here as well can allow for additional folding if more logical operations
are exposed.
Added:
llvm/test/CodeGen/NVPTX/and-or-setcc.ll
llvm/test/CodeGen/X86/and-or-setcc.ll
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d72be359867ca..ab8e18267f3f5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6448,6 +6448,12 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
}
}
+ if (LHS0 == LHS1 && RHS0 == RHS1 && CCL == CCR &&
+ LHS0.getValueType() == RHS0.getValueType() &&
+ ((LogicOp->getOpcode() == ISD::AND && CCL == ISD::SETO) ||
+ (LogicOp->getOpcode() == ISD::OR && CCL == ISD::SETUO)))
+ return DAG.getSetCC(DL, VT, LHS0, RHS0, CCL);
+
if (TargetPreference == AndOrSETCCFoldKind::None)
return SDValue();
diff --git a/llvm/test/CodeGen/NVPTX/and-or-setcc.ll b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll
new file mode 100644
index 0000000000000..21be9df94d553
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+define i1 @and_ord(float %a, float %b) {
+; CHECK-LABEL: and_ord(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.f32 %f1, [and_ord_param_0];
+; CHECK-NEXT: ld.param.f32 %f2, [and_ord_param_1];
+; CHECK-NEXT: setp.num.f32 %p1, %f1, %f2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
+ %c = fcmp ord float %a, 0.0
+ %d = fcmp ord float %b, 0.0
+ %e = and i1 %c, %d
+ ret i1 %e
+}
+
+define i1 @or_uno(float %a, float %b) {
+; CHECK-LABEL: or_uno(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.f32 %f1, [or_uno_param_0];
+; CHECK-NEXT: ld.param.f32 %f2, [or_uno_param_1];
+; CHECK-NEXT: setp.nan.f32 %p1, %f1, %f2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
+ %c = fcmp uno float %a, 0.0
+ %d = fcmp uno float %b, 0.0
+ %e = or i1 %c, %d
+ ret i1 %e
+}
diff --git a/llvm/test/CodeGen/X86/and-or-setcc.ll b/llvm/test/CodeGen/X86/and-or-setcc.ll
new file mode 100644
index 0000000000000..cb8ecca9348e6
--- /dev/null
+++ b/llvm/test/CodeGen/X86/and-or-setcc.ll
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
+
+define i1 @and_ord(float %a, float %b) {
+; X86-LABEL: and_ord:
+; X86: # %bb.0:
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fucompp
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setnp %al
+; X86-NEXT: retl
+;
+; X64-LABEL: and_ord:
+; X64: # %bb.0:
+; X64-NEXT: xorps %xmm2, %xmm2
+; X64-NEXT: cmpordps %xmm2, %xmm1
+; X64-NEXT: cmpordps %xmm2, %xmm0
+; X64-NEXT: andps %xmm1, %xmm0
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: retq
+ %c = fcmp ord float %a, 0.0
+ %d = fcmp ord float %b, 0.0
+ %e = and i1 %c, %d
+ ret i1 %e
+}
+
+define i1 @or_uno(float %a, float %b) {
+; X86-LABEL: or_uno:
+; X86: # %bb.0:
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fucompp
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setp %al
+; X86-NEXT: retl
+;
+; X64-LABEL: or_uno:
+; X64: # %bb.0:
+; X64-NEXT: xorps %xmm2, %xmm2
+; X64-NEXT: cmpunordps %xmm2, %xmm1
+; X64-NEXT: cmpunordps %xmm2, %xmm0
+; X64-NEXT: orps %xmm1, %xmm0
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: retq
+ %c = fcmp uno float %a, 0.0
+ %d = fcmp uno float %b, 0.0
+ %e = or i1 %c, %d
+ ret i1 %e
+}
More information about the llvm-commits
mailing list