[llvm] [AArch64][SelectionDAG] Fold setm with eor even if it is fcmp (PR #184445)

Sat Mar 7 09:09:04 PST 2026

https://github.com/SiliconA-Z updated https://github.com/llvm/llvm-project/pull/184445

>From da1396e89a0fe0484d7ced9f3957a6c0950bbb5e Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Tue, 3 Mar 2026 17:09:07 -0500
Subject: [PATCH 1/2] Pre-commit test (NFC)

---
 llvm/test/CodeGen/AArch64/xor-fp-csinv.ll | 74 +++++++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/xor-fp-csinv.ll

diff --git a/llvm/test/CodeGen/AArch64/xor-fp-csinv.ll b/llvm/test/CodeGen/AArch64/xor-fp-csinv.ll
new file mode 100644
index 0000000000000..4e28463c6d5d9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/xor-fp-csinv.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
+
+; Test that (xor x, (select_cc fp_a, fp_b, cc, 0, -1)) folds to CSINV
+; with an FCMP instead of falling back to CSETM + EOR.
+
+define i32 @xor_fp_ogt_f32(float %a, float %b, i32 %x) {
+; CHECK-LABEL: xor_fp_ogt_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    csetm w8, gt
+; CHECK-NEXT:    eor w0, w0, w8
+; CHECK-NEXT:    ret
+  %cmp = fcmp ogt float %a, %b
+  %mask = sext i1 %cmp to i32
+  %result = xor i32 %x, %mask
+  ret i32 %result
+}
+
+define i32 @xor_fp_olt_f64(double %a, double %b, i32 %x) {
+; CHECK-LABEL: xor_fp_olt_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp d0, d1
+; CHECK-NEXT:    csetm w8, mi
+; CHECK-NEXT:    eor w0, w0, w8
+; CHECK-NEXT:    ret
+  %cmp = fcmp olt double %a, %b
+  %mask = sext i1 %cmp to i32
+  %result = xor i32 %x, %mask
+  ret i32 %result
+}
+
+; Inverted pattern: TVal=-1, FVal=0 (should still fold via CC inversion)
+define i32 @xor_fp_ole_f32_inverted(float %a, float %b, i32 %x) {
+; CHECK-LABEL: xor_fp_ole_f32_inverted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    csetm w8, ls
+; CHECK-NEXT:    eor w0, w0, w8
+; CHECK-NEXT:    ret
+  %cmp = fcmp ole float %a, %b
+  %mask = sext i1 %cmp to i32
+  %result = xor i32 %x, %mask
+  ret i32 %result
+}
+
+; 64-bit integer result
+define i64 @xor_fp_oge_f32_i64(float %a, float %b, i64 %x) {
+; CHECK-LABEL: xor_fp_oge_f32_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    csetm x8, ge
+; CHECK-NEXT:    eor x0, x0, x8
+; CHECK-NEXT:    ret
+  %cmp = fcmp oge float %a, %b
+  %mask = sext i1 %cmp to i64
+  %result = xor i64 %x, %mask
+  ret i64 %result
+}
+
+; Dual-CC condition (SETONE) — should NOT fold to CSINV (bails out)
+define i32 @xor_fp_one_f32(float %a, float %b, i32 %x) {
+; CHECK-LABEL: xor_fp_one_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    csetm w8, mi
+; CHECK-NEXT:    csinv w8, w8, wzr, le
+; CHECK-NEXT:    eor w0, w0, w8
+; CHECK-NEXT:    ret
+  %cmp = fcmp one float %a, %b
+  %mask = sext i1 %cmp to i32
+  %result = xor i32 %x, %mask
+  ret i32 %result
+}

>From 4dbd09cb919b6601d4d191f4611a16cefbb2f521 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Tue, 3 Mar 2026 17:11:11 -0500
Subject: [PATCH 2/2] [AArch64] Fold setm with eor even if it is fcmp

---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 30 +++++++++++++++++--
 llvm/test/CodeGen/AArch64/xor-fp-csinv.ll     | 12 +++-----
 2 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index dc5a3736ecaa1..b58e133a8b904 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4566,10 +4566,24 @@ SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
   SDValue TVal = Sel.getOperand(2);
   SDValue FVal = Sel.getOperand(3);
 
-  // FIXME: This could be generalized to non-integer comparisons.
-  if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
+  EVT CmpVT = LHS.getValueType();
+
+  // Only handle scalar i32/i64 or the common scalar FP types we can lower
+  // efficiently. This excludes vectors and larger/odd FP types like f128.
+  if (CmpVT.isVector() || CmpVT.isScalableVector())
     return Op;
 
+  if (CmpVT.isInteger()) {
+    if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
+      return Op;
+  } else if (CmpVT.isFloatingPoint()) {
+    if (CmpVT != MVT::f16 && CmpVT != MVT::bf16 && CmpVT != MVT::f32 &&
+        CmpVT != MVT::f64)
+      return Op;
+  } else {
+    return Op;
+  }
+
   ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
   ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
 
@@ -4588,7 +4602,17 @@ SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
   // If the constants line up, perform the transform!
   if (CTVal->isZero() && CFVal->isAllOnes()) {
     SDValue CCVal;
-    SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, DL);
+    SDValue Cmp;
+    if (CmpVT.isInteger()) {
+      Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, DL);
+    } else {
+      AArch64CC::CondCode CC1, CC2;
+      changeFPCCToAArch64CC(CC, CC1, CC2);
+      if (CC2 != AArch64CC::AL)
+        return Op; // Bail out for conditions needing two CCs (e.g. SETONE)
+      Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
+      CCVal = getCondCode(DAG, CC1);
+    }
 
     FVal = Other;
     TVal = DAG.getNode(ISD::XOR, DL, Other.getValueType(), Other,
diff --git a/llvm/test/CodeGen/AArch64/xor-fp-csinv.ll b/llvm/test/CodeGen/AArch64/xor-fp-csinv.ll
index 4e28463c6d5d9..84aed5b0350ba 100644
--- a/llvm/test/CodeGen/AArch64/xor-fp-csinv.ll
+++ b/llvm/test/CodeGen/AArch64/xor-fp-csinv.ll
@@ -8,8 +8,7 @@ define i32 @xor_fp_ogt_f32(float %a, float %b, i32 %x) {
 ; CHECK-LABEL: xor_fp_ogt_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcmp s0, s1
-; CHECK-NEXT:    csetm w8, gt
-; CHECK-NEXT:    eor w0, w0, w8
+; CHECK-NEXT:    cinv w0, w0, gt
 ; CHECK-NEXT:    ret
   %cmp = fcmp ogt float %a, %b
   %mask = sext i1 %cmp to i32
@@ -21,8 +20,7 @@ define i32 @xor_fp_olt_f64(double %a, double %b, i32 %x) {
 ; CHECK-LABEL: xor_fp_olt_f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcmp d0, d1
-; CHECK-NEXT:    csetm w8, mi
-; CHECK-NEXT:    eor w0, w0, w8
+; CHECK-NEXT:    cinv w0, w0, mi
 ; CHECK-NEXT:    ret
   %cmp = fcmp olt double %a, %b
   %mask = sext i1 %cmp to i32
@@ -35,8 +33,7 @@ define i32 @xor_fp_ole_f32_inverted(float %a, float %b, i32 %x) {
 ; CHECK-LABEL: xor_fp_ole_f32_inverted:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcmp s0, s1
-; CHECK-NEXT:    csetm w8, ls
-; CHECK-NEXT:    eor w0, w0, w8
+; CHECK-NEXT:    cinv w0, w0, ls
 ; CHECK-NEXT:    ret
   %cmp = fcmp ole float %a, %b
   %mask = sext i1 %cmp to i32
@@ -49,8 +46,7 @@ define i64 @xor_fp_oge_f32_i64(float %a, float %b, i64 %x) {
 ; CHECK-LABEL: xor_fp_oge_f32_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcmp s0, s1
-; CHECK-NEXT:    csetm x8, ge
-; CHECK-NEXT:    eor x0, x0, x8
+; CHECK-NEXT:    cinv x0, x0, ge
 ; CHECK-NEXT:    ret
   %cmp = fcmp oge float %a, %b
   %mask = sext i1 %cmp to i64