[llvm] [AArch64][SelectionDAG] Fold setm with eor even if it is fcmp (PR #184445)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 7 09:09:04 PST 2026
https://github.com/SiliconA-Z updated https://github.com/llvm/llvm-project/pull/184445
>From da1396e89a0fe0484d7ced9f3957a6c0950bbb5e Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Tue, 3 Mar 2026 17:09:07 -0500
Subject: [PATCH 1/2] Pre-commit test (NFC)
---
llvm/test/CodeGen/AArch64/xor-fp-csinv.ll | 74 +++++++++++++++++++++++
1 file changed, 74 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/xor-fp-csinv.ll
diff --git a/llvm/test/CodeGen/AArch64/xor-fp-csinv.ll b/llvm/test/CodeGen/AArch64/xor-fp-csinv.ll
new file mode 100644
index 0000000000000..4e28463c6d5d9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/xor-fp-csinv.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
+
+; Test that (xor x, (select_cc fp_a, fp_b, cc, 0, -1)) folds to CSINV
+; with an FCMP instead of falling back to CSETM + EOR.
+
+define i32 @xor_fp_ogt_f32(float %a, float %b, i32 %x) {
+; CHECK-LABEL: xor_fp_ogt_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: csetm w8, gt
+; CHECK-NEXT: eor w0, w0, w8
+; CHECK-NEXT: ret
+ %cmp = fcmp ogt float %a, %b
+ %mask = sext i1 %cmp to i32
+ %result = xor i32 %x, %mask
+ ret i32 %result
+}
+
+define i32 @xor_fp_olt_f64(double %a, double %b, i32 %x) {
+; CHECK-LABEL: xor_fp_olt_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: csetm w8, mi
+; CHECK-NEXT: eor w0, w0, w8
+; CHECK-NEXT: ret
+ %cmp = fcmp olt double %a, %b
+ %mask = sext i1 %cmp to i32
+ %result = xor i32 %x, %mask
+ ret i32 %result
+}
+
+; Inverted pattern: TVal=-1, FVal=0 (should still fold via CC inversion)
+define i32 @xor_fp_ole_f32_inverted(float %a, float %b, i32 %x) {
+; CHECK-LABEL: xor_fp_ole_f32_inverted:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: csetm w8, ls
+; CHECK-NEXT: eor w0, w0, w8
+; CHECK-NEXT: ret
+ %cmp = fcmp ole float %a, %b
+ %mask = sext i1 %cmp to i32
+ %result = xor i32 %x, %mask
+ ret i32 %result
+}
+
+; 64-bit integer result
+define i64 @xor_fp_oge_f32_i64(float %a, float %b, i64 %x) {
+; CHECK-LABEL: xor_fp_oge_f32_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: csetm x8, ge
+; CHECK-NEXT: eor x0, x0, x8
+; CHECK-NEXT: ret
+ %cmp = fcmp oge float %a, %b
+ %mask = sext i1 %cmp to i64
+ %result = xor i64 %x, %mask
+ ret i64 %result
+}
+
+; Dual-CC condition (SETONE) — should NOT fold to CSINV (bails out)
+define i32 @xor_fp_one_f32(float %a, float %b, i32 %x) {
+; CHECK-LABEL: xor_fp_one_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: csetm w8, mi
+; CHECK-NEXT: csinv w8, w8, wzr, le
+; CHECK-NEXT: eor w0, w0, w8
+; CHECK-NEXT: ret
+ %cmp = fcmp one float %a, %b
+ %mask = sext i1 %cmp to i32
+ %result = xor i32 %x, %mask
+ ret i32 %result
+}
>From 4dbd09cb919b6601d4d191f4611a16cefbb2f521 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Tue, 3 Mar 2026 17:11:11 -0500
Subject: [PATCH 2/2] [AArch64] Fold setm with eor even if it is fcmp
---
.../Target/AArch64/AArch64ISelLowering.cpp | 30 +++++++++++++++++--
llvm/test/CodeGen/AArch64/xor-fp-csinv.ll | 12 +++-----
2 files changed, 31 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index dc5a3736ecaa1..b58e133a8b904 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4566,10 +4566,24 @@ SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
SDValue TVal = Sel.getOperand(2);
SDValue FVal = Sel.getOperand(3);
- // FIXME: This could be generalized to non-integer comparisons.
- if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
+ EVT CmpVT = LHS.getValueType();
+
+ // Only handle scalar i32/i64 or the common scalar FP types we can lower
+ // efficiently. This excludes vectors and larger/odd FP types like f128.
+ if (CmpVT.isVector() || CmpVT.isScalableVector())
return Op;
+ if (CmpVT.isInteger()) {
+ if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
+ return Op;
+ } else if (CmpVT.isFloatingPoint()) {
+ if (CmpVT != MVT::f16 && CmpVT != MVT::bf16 && CmpVT != MVT::f32 &&
+ CmpVT != MVT::f64)
+ return Op;
+ } else {
+ return Op;
+ }
+
ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
@@ -4588,7 +4602,17 @@ SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
// If the constants line up, perform the transform!
if (CTVal->isZero() && CFVal->isAllOnes()) {
SDValue CCVal;
- SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, DL);
+ SDValue Cmp;
+ if (CmpVT.isInteger()) {
+ Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, DL);
+ } else {
+ AArch64CC::CondCode CC1, CC2;
+ changeFPCCToAArch64CC(CC, CC1, CC2);
+ if (CC2 != AArch64CC::AL)
+ return Op; // Bail out for conditions needing two CCs (e.g. SETONE)
+ Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
+ CCVal = getCondCode(DAG, CC1);
+ }
FVal = Other;
TVal = DAG.getNode(ISD::XOR, DL, Other.getValueType(), Other,
diff --git a/llvm/test/CodeGen/AArch64/xor-fp-csinv.ll b/llvm/test/CodeGen/AArch64/xor-fp-csinv.ll
index 4e28463c6d5d9..84aed5b0350ba 100644
--- a/llvm/test/CodeGen/AArch64/xor-fp-csinv.ll
+++ b/llvm/test/CodeGen/AArch64/xor-fp-csinv.ll
@@ -8,8 +8,7 @@ define i32 @xor_fp_ogt_f32(float %a, float %b, i32 %x) {
; CHECK-LABEL: xor_fp_ogt_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fcmp s0, s1
-; CHECK-NEXT: csetm w8, gt
-; CHECK-NEXT: eor w0, w0, w8
+; CHECK-NEXT: cinv w0, w0, gt
; CHECK-NEXT: ret
%cmp = fcmp ogt float %a, %b
%mask = sext i1 %cmp to i32
@@ -21,8 +20,7 @@ define i32 @xor_fp_olt_f64(double %a, double %b, i32 %x) {
; CHECK-LABEL: xor_fp_olt_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: fcmp d0, d1
-; CHECK-NEXT: csetm w8, mi
-; CHECK-NEXT: eor w0, w0, w8
+; CHECK-NEXT: cinv w0, w0, mi
; CHECK-NEXT: ret
%cmp = fcmp olt double %a, %b
%mask = sext i1 %cmp to i32
@@ -35,8 +33,7 @@ define i32 @xor_fp_ole_f32_inverted(float %a, float %b, i32 %x) {
; CHECK-LABEL: xor_fp_ole_f32_inverted:
; CHECK: // %bb.0:
; CHECK-NEXT: fcmp s0, s1
-; CHECK-NEXT: csetm w8, ls
-; CHECK-NEXT: eor w0, w0, w8
+; CHECK-NEXT: cinv w0, w0, ls
; CHECK-NEXT: ret
%cmp = fcmp ole float %a, %b
%mask = sext i1 %cmp to i32
@@ -49,8 +46,7 @@ define i64 @xor_fp_oge_f32_i64(float %a, float %b, i64 %x) {
; CHECK-LABEL: xor_fp_oge_f32_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: fcmp s0, s1
-; CHECK-NEXT: csetm x8, ge
-; CHECK-NEXT: eor x0, x0, x8
+; CHECK-NEXT: cinv x0, x0, ge
; CHECK-NEXT: ret
%cmp = fcmp oge float %a, %b
%mask = sext i1 %cmp to i64
More information about the llvm-commits
mailing list