[llvm] [SDAG] Do not treat fp x!=0|y!=0 as special case in branch builder. (PR #180895)

via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 11 00:05:31 PST 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: David Green (davemgreen)

<details>
<summary>Changes</summary>

This fixes a regression from #<!-- -->169904 triggering in _more_ cases. There was a specialisation for the branch creation code that treated x!=0|y!=0 and x==0&y==0 as special cases not to be optimised, but that should only apply to Int, not fast-math FP. This patch disables the special case for FP.

You could argue that AArch64 should be using fccmp more / better in this case, but that is a separate issue from whether this special case should act like integer compares.

---
Full diff: https://github.com/llvm/llvm-project/pull/180895.diff


3 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+2-2) 
- (modified) llvm/test/CodeGen/AArch64/branch-cond-split-fcmp.ll (+26-32) 
- (modified) llvm/test/CodeGen/Thumb2/arm_canberra_distance_f32.ll (+4-7) 


``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 7c762ed6d91ce..c086d5e12693f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2789,8 +2789,8 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) {
 
   // Handle: (X != null) | (Y != null) --> (X|Y) != 0
   // Handle: (X == null) & (Y == null) --> (X|Y) == 0
-  if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
-      Cases[0].CC == Cases[1].CC &&
+  if (Cases[0].CmpRHS == Cases[1].CmpRHS && Cases[0].CC == Cases[1].CC &&
+      !Cases[0].CmpRHS->getType()->isFloatTy() &&
       isa<Constant>(Cases[0].CmpRHS) &&
       cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
     if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
diff --git a/llvm/test/CodeGen/AArch64/branch-cond-split-fcmp.ll b/llvm/test/CodeGen/AArch64/branch-cond-split-fcmp.ll
index 10c958abc71d6..cdf4a4b4e525d 100644
--- a/llvm/test/CodeGen/AArch64/branch-cond-split-fcmp.ll
+++ b/llvm/test/CodeGen/AArch64/branch-cond-split-fcmp.ll
@@ -216,20 +216,19 @@ bb4:
 define i64 @test_or_fast(float %a, float %b) {
 ; CHECK-SD-LABEL: test_or_fast:
 ; CHECK-SD:       // %bb.0: // %bb1
-; CHECK-SD-NEXT:    movi d2, #0000000000000000
-; CHECK-SD-NEXT:    fcmp s1, #0.0
-; CHECK-SD-NEXT:    fccmp s0, s2, #0, eq
-; CHECK-SD-NEXT:    cset w8, eq
-; CHECK-SD-NEXT:    tbnz w8, #0, .LBB4_2
-; CHECK-SD-NEXT:  // %bb.1:
+; CHECK-SD-NEXT:    fcmp s0, #0.0
 ; CHECK-SD-NEXT:    mov x0, xzr
-; CHECK-SD-NEXT:    ret
-; CHECK-SD-NEXT:  .LBB4_2: // %bb4
+; CHECK-SD-NEXT:    b.ne .LBB4_3
+; CHECK-SD-NEXT:  // %bb.1: // %bb1
+; CHECK-SD-NEXT:    fcmp s1, #0.0
+; CHECK-SD-NEXT:    b.ne .LBB4_3
+; CHECK-SD-NEXT:  // %bb.2: // %bb4
 ; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
 ; CHECK-SD-NEXT:    bl bar
 ; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:  .LBB4_3: // %common.ret
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_or_fast:
@@ -267,20 +266,18 @@ define i64 @test_or_select_fast (float %a, float %b) {
 ; CHECK-SD-LABEL: test_or_select_fast:
 ; CHECK-SD:       // %bb.0: // %bb1
 ; CHECK-SD-NEXT:    fcmp s0, #0.0
-; CHECK-SD-NEXT:    cset w8, ne
-; CHECK-SD-NEXT:    fcmp s1, #0.0
-; CHECK-SD-NEXT:    cset w9, ne
-; CHECK-SD-NEXT:    orr w8, w8, w9
-; CHECK-SD-NEXT:    tbz w8, #0, .LBB5_2
-; CHECK-SD-NEXT:  // %bb.1:
 ; CHECK-SD-NEXT:    mov x0, xzr
-; CHECK-SD-NEXT:    ret
-; CHECK-SD-NEXT:  .LBB5_2: // %bb4
+; CHECK-SD-NEXT:    b.ne .LBB5_3
+; CHECK-SD-NEXT:  // %bb.1: // %bb1
+; CHECK-SD-NEXT:    fcmp s1, #0.0
+; CHECK-SD-NEXT:    b.ne .LBB5_3
+; CHECK-SD-NEXT:  // %bb.2: // %bb4
 ; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
 ; CHECK-SD-NEXT:    bl bar
 ; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:  .LBB5_3: // %common.ret
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_or_select_fast:
@@ -317,20 +314,19 @@ bb4:
 define i64 @test_and_fast(float %a, float %b) {
 ; CHECK-SD-LABEL: test_and_fast:
 ; CHECK-SD:       // %bb.0: // %bb1
-; CHECK-SD-NEXT:    movi d2, #0000000000000000
+; CHECK-SD-NEXT:    fcmp s0, #0.0
+; CHECK-SD-NEXT:    mov x0, xzr
+; CHECK-SD-NEXT:    b.ne .LBB6_3
+; CHECK-SD-NEXT:  // %bb.1: // %bb1
 ; CHECK-SD-NEXT:    fcmp s1, #0.0
-; CHECK-SD-NEXT:    fccmp s0, s2, #0, eq
-; CHECK-SD-NEXT:    cset w8, eq
-; CHECK-SD-NEXT:    tbz w8, #0, .LBB6_2
-; CHECK-SD-NEXT:  // %bb.1: // %bb4
+; CHECK-SD-NEXT:    b.ne .LBB6_3
+; CHECK-SD-NEXT:  // %bb.2: // %bb4
 ; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
 ; CHECK-SD-NEXT:    bl bar
 ; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-SD-NEXT:    ret
-; CHECK-SD-NEXT:  .LBB6_2:
-; CHECK-SD-NEXT:    mov x0, xzr
+; CHECK-SD-NEXT:  .LBB6_3: // %common.ret
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_and_fast:
@@ -371,20 +367,18 @@ define i64 @test_and_select_fast(float %a, float %b) {
 ; CHECK-SD-LABEL: test_and_select_fast:
 ; CHECK-SD:       // %bb.0: // %bb1
 ; CHECK-SD-NEXT:    fcmp s0, #0.0
-; CHECK-SD-NEXT:    cset w8, eq
+; CHECK-SD-NEXT:    mov x0, xzr
+; CHECK-SD-NEXT:    b.ne .LBB7_3
+; CHECK-SD-NEXT:  // %bb.1: // %bb1
 ; CHECK-SD-NEXT:    fcmp s1, #0.0
-; CHECK-SD-NEXT:    cset w9, eq
-; CHECK-SD-NEXT:    and w8, w8, w9
-; CHECK-SD-NEXT:    tbz w8, #0, .LBB7_2
-; CHECK-SD-NEXT:  // %bb.1: // %bb4
+; CHECK-SD-NEXT:    b.ne .LBB7_3
+; CHECK-SD-NEXT:  // %bb.2: // %bb4
 ; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
 ; CHECK-SD-NEXT:    bl bar
 ; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-SD-NEXT:    ret
-; CHECK-SD-NEXT:  .LBB7_2:
-; CHECK-SD-NEXT:    mov x0, xzr
+; CHECK-SD-NEXT:  .LBB7_3: // %common.ret
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_and_select_fast:
diff --git a/llvm/test/CodeGen/Thumb2/arm_canberra_distance_f32.ll b/llvm/test/CodeGen/Thumb2/arm_canberra_distance_f32.ll
index c47a0bf6c00bd..27d4e07ff4cb7 100644
--- a/llvm/test/CodeGen/Thumb2/arm_canberra_distance_f32.ll
+++ b/llvm/test/CodeGen/Thumb2/arm_canberra_distance_f32.ll
@@ -10,16 +10,13 @@ define nofpclass(nan inf) float @arm_canberra_distance_f32(ptr noundef readonly
 ; CHECK-NEXT:    bxeq lr
 ; CHECK-NEXT:  .LBB0_1: @ %while.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vldr s2, [r1]
 ; CHECK-NEXT:    vldr s4, [r0]
-; CHECK-NEXT:    vcmp.f32 s2, #0
-; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-NEXT:    vldr s2, [r1]
 ; CHECK-NEXT:    vcmp.f32 s4, #0
-; CHECK-NEXT:    cset r12, ne
 ; CHECK-NEXT:    vmrs APSR_nzcv, fpscr
-; CHECK-NEXT:    cset r3, ne
-; CHECK-NEXT:    orr.w r3, r3, r12
-; CHECK-NEXT:    lsls r3, r3, #31
+; CHECK-NEXT:    itt eq
+; CHECK-NEXT:    vcmpeq.f32 s2, #0
+; CHECK-NEXT:    vmrseq APSR_nzcv, fpscr
 ; CHECK-NEXT:    beq .LBB0_3
 ; CHECK-NEXT:  @ %bb.2: @ %if.then
 ; CHECK-NEXT:    @ in Loop: Header=BB0_1 Depth=1

``````````

</details>


https://github.com/llvm/llvm-project/pull/180895


More information about the llvm-commits mailing list