[llvm] [AArch64] Fix register check in ConditionOptimizer cross-block logic (PR #176528)

Hussam A. via llvm-commits llvm-commits at lists.llvm.org
Sat Jan 17 08:19:07 PST 2026


https://github.com/hussam-alhassan updated https://github.com/llvm/llvm-project/pull/176528

>From 386f5e82f7ce048dcf1b553e12b52f7b42d04752 Mon Sep 17 00:00:00 2001
From: Hussam Alhassan <hsm.link at proton.me>
Date: Sat, 17 Jan 2026 00:53:57 +0000
Subject: [PATCH] [AArch64] Fix register check in ConditionOptimizer
 cross-block logic

This patch fixes a bug in the AArch64 ConditionOptimizer pass

Previously, the cross-block optimization would not check for register
equivalence before modifying the two comparison instructions.

This means that two cmp instructions with suitable cond codes and
immediates would be modified, even if their registers were different,
which would not trigger CSE afterwards.

A negative test has also been added to confirm this fix.
---
 .../AArch64/AArch64ConditionOptimizer.cpp     | 13 ++++
 .../AArch64/combine-comparisons-by-cse.ll     | 69 +++++++++++++++++++
 2 files changed, 82 insertions(+)

diff --git a/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
index fef5f5a84d937..602b88f48c7de 100644
--- a/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
@@ -80,6 +80,7 @@
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
@@ -100,6 +101,7 @@ namespace {
 
 class AArch64ConditionOptimizer : public MachineFunctionPass {
   const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
   MachineDominatorTree *DomTree;
   const MachineRegisterInfo *MRI;
 
@@ -525,6 +527,16 @@ bool AArch64ConditionOptimizer::optimizeCrossBlock(MachineBasicBlock &HBB) {
     return false;
   }
 
+  // Ensure both compares use the same register, tracing through copies.
+  Register HeadCmpReg =
+      TRI->lookThruCopyLike(HeadCmpMI->getOperand(1).getReg(), MRI);
+  Register TrueCmpReg =
+      TRI->lookThruCopyLike(TrueCmpMI->getOperand(1).getReg(), MRI);
+  if (HeadCmpReg != TrueCmpReg) {
+    LLVM_DEBUG(dbgs() << "CMPs compare different registers\n");
+    return false;
+  }
+
   AArch64CC::CondCode HeadCmp;
   if (HeadCond.empty() || !parseCond(HeadCond, HeadCmp)) {
     return false;
@@ -608,6 +620,7 @@ bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
     return false;
 
   TII = MF.getSubtarget().getInstrInfo();
+  TRI = MF.getSubtarget().getRegisterInfo();
   DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
   MRI = &MF.getRegInfo();
 
diff --git a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
index 4449c2b9193a4..c261013303aa8 100644
--- a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
+++ b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
@@ -949,6 +949,75 @@ return:                                           ; preds = %if.end, %land.lhs.t
   ret i32 %retval.0
 }
 
+; Negative test: cross-block with different registers should not be optimized.
+; Identical to combine_gt_ge_10, but lor.lhs.false compares @b instead of @a.
+; (a > 10 && b == c) || (b >= 10 && b == d)
+define i32 @combine_gt_ge_different_regs() #0 {
+; CHECK-LABEL: combine_gt_ge_different_regs:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, :got:a
+; CHECK-NEXT:    ldr x8, [x8, :got_lo12:a]
+; CHECK-NEXT:    ldr w10, [x8]
+; CHECK-NEXT:    adrp x8, :got:b
+; CHECK-NEXT:    ldr x8, [x8, :got_lo12:b]
+; CHECK-NEXT:    cmp w10, #11
+; CHECK-NEXT:    ldr w9, [x8]
+; CHECK-NEXT:    b.lt .LBB15_3
+; CHECK-NEXT:  // %bb.1: // %land.lhs.true
+; CHECK-NEXT:    adrp x10, :got:c
+; CHECK-NEXT:    ldr x10, [x10, :got_lo12:c]
+; CHECK-NEXT:    ldr w10, [x10]
+; CHECK-NEXT:    cmp w9, w10
+; CHECK-NEXT:    b.ne .LBB15_4
+; CHECK-NEXT:  // %bb.2:
+; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB15_3: // %lor.lhs.false
+; CHECK-NEXT:    cmp w9, #10
+; CHECK-NEXT:    b.lt .LBB15_6
+; CHECK-NEXT:  .LBB15_4: // %land.lhs.true3
+; CHECK-NEXT:    adrp x9, :got:d
+; CHECK-NEXT:    ldr x9, [x9, :got_lo12:d]
+; CHECK-NEXT:    ldr w8, [x8]
+; CHECK-NEXT:    ldr w9, [x9]
+; CHECK-NEXT:    cmp w8, w9
+; CHECK-NEXT:    b.ne .LBB15_6
+; CHECK-NEXT:  // %bb.5:
+; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB15_6: // %if.end
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    ret
+entry:
+  %0 = load i32, ptr @a, align 4
+  %cmp = icmp sgt i32 %0, 10
+  br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
+
+land.lhs.true:                                    ; preds = %entry
+  %1 = load i32, ptr @b, align 4
+  %2 = load i32, ptr @c, align 4
+  %cmp1 = icmp eq i32 %1, %2
+  br i1 %cmp1, label %return, label %land.lhs.true3
+
+lor.lhs.false:                                    ; preds = %entry
+  %3 = load i32, ptr @b, align 4
+  %cmp2 = icmp sgt i32 %3, 9
+  br i1 %cmp2, label %land.lhs.true3, label %if.end
+
+land.lhs.true3:                                   ; preds = %lor.lhs.false, %land.lhs.true
+  %4 = load i32, ptr @b, align 4
+  %5 = load i32, ptr @d, align 4
+  %cmp4 = icmp eq i32 %4, %5
+  br i1 %cmp4, label %return, label %if.end
+
+if.end:                                           ; preds = %land.lhs.true3, %lor.lhs.false
+  br label %return
+
+return:                                           ; preds = %if.end, %land.lhs.true3, %land.lhs.true
+  %retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
+  ret i32 %retval.0
+}
+
 declare i32 @zoo(i32)
 
 declare double @yoo(i32)



More information about the llvm-commits mailing list