[llvm] [AArch64] optimize lowering for icmp on i128 when RHS is an immediate (PR #181822)

Tue Feb 17 05:46:22 PST 2026

https://github.com/clingfei created https://github.com/llvm/llvm-project/pull/181822

Optimize cases including eq, ne, ult, ule, ugt, and uge. 

Closes https://github.com/llvm/llvm-project/issues/161273.

>From 6c5b2bd5fcdb9df3ea28ea46f42a3538e1c36c9d Mon Sep 17 00:00:00 2001
From: clingfei <1599101385 at qq.com>
Date: Tue, 17 Feb 2026 21:22:02 +0800
Subject: [PATCH] [AArch64] optimize lowering for icmp on i128 when RHS is an
 immediate

---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 100 +++++++++++++++++-
 .../CodeGen/AArch64/i128-imm-compare-ccmp.ll  |  61 +++++++++++
 llvm/test/CodeGen/AArch64/isinf.ll            |  10 +-
 3 files changed, 165 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/i128-imm-compare-ccmp.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 66c22db0491d1..7b0f79a4b0a9f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -27145,6 +27145,101 @@ performVecReduceBitwiseCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
   return SDValue();
 }
 
+static bool splitI128ValueToI64Halves(SDValue V, SelectionDAG &DAG,
+                                      const SDLoc &DL, SDValue &Lo, SDValue &Hi) {
+  EVT VT = V.getValueType();
+  if (!VT.isInteger() || VT.getFixedSizeInBits() != 128)
+    return false;
+  Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, V);
+  Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, DAG.getNode(ISD::SRL, DL, VT, V, DAG.getShiftAmountConstant(64, VT, DL)));
+  return true;
+}
+
+static SDValue 
+performExpandedI128CmpCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+                              SelectionDAG &DAG, ISD::CondCode CCCode) {
+  SDValue NewLHS = N->getOperand(0);
+  SDValue NewRHS = N->getOperand(1);
+  if (N->hasOneUse()) {
+    auto User = N->user_begin();
+    // see fshl_i128 in funnel-shift.ll 
+    if ((User)->getOpcode() == ISD::BRCOND)
+      return SDValue();
+  }
+  // Keep the dedicated shift+cmp-zero combine opportunities for wide integers.
+  // Canonicalizing these into split compares here tends to introduce an extra
+  // EXTR on AArch64 (see icmp-shift-opt.ll).
+  if ((CCCode == ISD::SETEQ || CCCode == ISD::SETNE) && isNullConstant(NewRHS) &&
+      (NewLHS.getOpcode() == ISD::SRL || NewLHS.getOpcode() == ISD::SHL))
+    return SDValue();
+
+  if (NewLHS.getNumOperands() != 2)
+    return SDValue();
+  if (NewLHS.getValueType().isScalableVT() || NewRHS.getValueType().isScalableVT() || NewLHS.getNumOperands() != 2 || 
+      !(NewLHS.getValueType().isInteger() && NewLHS.getValueSizeInBits() == 128) || 
+      !(NewRHS.getValueType().isInteger() && NewRHS.getValueSizeInBits() == 128))
+    return SDValue();
+  ConstantSDNode *ConstLHS = dyn_cast<ConstantSDNode>(NewLHS.getNode());
+  ConstantSDNode *ConstRHS = dyn_cast<ConstantSDNode>(NewRHS.getNode());
+  if (ConstLHS || !ConstRHS)
+    return SDValue();
+
+  SDValue LHSLo = NewLHS.getOperand(0);
+  SDValue LHSHi = NewLHS.getOperand(1);
+  SDValue RHSLo = DAG.getConstant(ConstRHS->getAPIntValue().trunc(64), SDLoc(N), MVT::i64);
+  SDValue RHSHi = DAG.getConstant(ConstRHS->getAPIntValue().lshr(64).trunc(64), SDLoc(N), MVT::i64);  
+  if (!splitI128ValueToI64Halves(NewLHS, DAG, SDLoc(N), LHSLo, LHSHi)) {
+    return SDValue();
+  }
+
+  if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {
+    SDValue LoCmpF = DAG.FoldSetCC(N->getValueType(0), LHSLo, RHSLo, CCCode, SDLoc(N));
+    SDValue HiCmpF = DAG.FoldSetCC(N->getValueType(0), LHSHi, RHSHi, CCCode, SDLoc(N));
+    auto IsConstBool = [](SDValue V) {
+      return isa_and_nonnull<ConstantSDNode>(V.getNode());
+    };
+    if (IsConstBool(LoCmpF) || IsConstBool(HiCmpF))
+      return SDValue();
+
+    SDValue LoCmp = DAG.getSetCC(SDLoc(N), N->getValueType(0), LHSLo, RHSLo, CCCode);
+    SDValue HiCmp = DAG.getSetCC(SDLoc(N), N->getValueType(0), LHSHi, RHSHi, CCCode);
+    unsigned Opcode = (CCCode == ISD::SETEQ) ? ISD::AND : ISD::OR;
+    return DAG.getNode(Opcode, SDLoc(N), LoCmp.getValueType(), LoCmp, HiCmp);
+  }
+
+  if (CCCode == ISD::SETUGT || CCCode == ISD::SETUGE) {
+    // x >  K  <=> (xhi > khi)  || (xhi==khi && xlo >  klo)
+    // x >= K  <=> (xhi > khi)  || (xhi==khi && xlo >= klo)
+    SDValue ZeroHi = DAG.getConstant(0, SDLoc(N), LHSHi.getValueType());
+
+    ISD::CondCode LoCC = (CCCode == ISD::SETUGT) ? ISD::SETULE : ISD::SETULT;
+
+    SDValue HiEq = DAG.getSetCC(SDLoc(N), N->getValueType(0), LHSHi, ZeroHi, ISD::SETEQ);
+    SDValue LoCmp = DAG.getSetCC(SDLoc(N), N->getValueType(0), LHSLo, RHSLo, LoCC);
+    SDValue Tree = DAG.getNode(ISD::AND, SDLoc(N), N->getValueType(0), HiEq, LoCmp);
+
+    SDValue Zero = DAG.getConstant(0, SDLoc(N), N->getValueType(0));
+    return DAG.getSetCC(SDLoc(N), N->getValueType(0), Tree, Zero, ISD::SETEQ);
+  }
+
+  if (CCCode == ISD::SETULT || CCCode == ISD::SETULE) {
+    // x <  K  <=> (xhi < khi)  || (xhi==khi && xlo <  klo)
+    // x <= K  <=> (xhi < khi)  || (xhi==khi && xlo <= klo)
+    ISD::CondCode Opcode = ISD::SETULT;
+
+    SDValue HiCmp = DAG.getSetCC(SDLoc(N), N->getValueType(0), LHSHi, RHSHi, Opcode);
+    SDValue HiEq  = DAG.getSetCC(SDLoc(N), N->getValueType(0), LHSHi, RHSHi, ISD::SETEQ);
+    SDValue LoCmp = DAG.getSetCC(SDLoc(N), N->getValueType(0), LHSLo, RHSLo, CCCode);
+
+    SDValue LoAnd = DAG.getNode(ISD::AND, SDLoc(N), HiEq.getValueType(), HiEq, LoCmp);
+    SDValue Tree = DAG.getNode(ISD::OR, SDLoc(N), N->getValueType(0), HiCmp, LoAnd);
+
+    SDValue Zero = DAG.getConstant(0, SDLoc(N), N->getValueType(0));
+    return DAG.getSetCC(SDLoc(N), N->getValueType(0), Tree, Zero, ISD::SETNE);
+  }
+  return SDValue();
+}
+
 static SDValue performSETCCCombine(SDNode *N,
                                    TargetLowering::DAGCombinerInfo &DCI,
                                    SelectionDAG &DAG) {
@@ -27224,7 +27319,10 @@ static SDValue performSETCCCombine(SDNode *N,
       ISD::isConstantSplatVector(LHS.getNode(), SplatLHSVal) &&
       SplatLHSVal.isOne())
     return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, CmpVT), RHS, ISD::SETGE);
-
+  
+  if (SDValue V = performExpandedI128CmpCombine(N, DCI, DAG, Cond))
+    return V;
+  
   return SDValue();
 }
 
diff --git a/llvm/test/CodeGen/AArch64/i128-imm-compare-ccmp.ll b/llvm/test/CodeGen/AArch64/i128-imm-compare-ccmp.ll
new file mode 100644
index 0000000000000..7422cbbfd19a0
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/i128-imm-compare-ccmp.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -O3 -o - < %s | FileCheck %s
+
+define i1 @eq_imm(i128 %x) {
+; CHECK-LABEL: eq_imm:
+; CHECK:       cmp x0, #5
+; CHECK-NEXT:  ccmp x1, #0, #0, eq
+; CHECK-NEXT:  cset w0, eq
+; CHECK-NEXT:  ret
+  %cmp = icmp eq i128 %x, 5
+  ret i1 %cmp
+}
+
+define i1 @ne_imm(i128 %x) {
+; CHECK-LABEL: ne_imm:
+; CHECK:       cmp x0, #5
+; CHECK-NEXT:  ccmp x1, #0, #0, eq
+; CHECK-NEXT:  cset w0, ne
+; CHECK-NEXT:  ret
+  %cmp = icmp ne i128 %x, 5
+  ret i1 %cmp
+}
+
+define i1 @ult_imm(i128 %x) {
+; CHECK-LABEL: ult_imm:
+; CHECK:       cmp x1, #0
+; CHECK-NEXT:  ccmp x0, #5, #2, eq
+; CHECK-NEXT:  cset w0, {{cc|lo}}
+; CHECK-NEXT:  ret
+  %cmp = icmp ult i128 %x, 5
+  ret i1 %cmp
+}
+
+define i1 @ule_imm(i128 %x) {
+; CHECK-LABEL: ule_imm:
+; CHECK:       cmp x1, #0
+; CHECK-NEXT:  ccmp x0, #6, #2, eq
+; CHECK-NEXT:  cset w0, {{cc|lo}}
+; CHECK-NEXT:  ret
+  %cmp = icmp ule i128 %x, 5
+  ret i1 %cmp
+}
+
+define i1 @ugt_imm(i128 %x) {
+; CHECK-LABEL: ugt_imm:
+; CHECK:       cmp x1, #0
+; CHECK-NEXT:  ccmp x0, #5, #2, eq
+; CHECK-NEXT:  cset	w0, hi
+; CHECK-NEXT:  ret
+  %cmp = icmp ugt i128 %x, 5
+  ret i1 %cmp
+}
+
+define i1 @uge_imm(i128 %x) {
+; CHECK-LABEL: uge_imm:
+; CHECK:       cmp	x1, #0
+; CHECK-NEXT:  ccmp	x0, #4, #2, eq
+; CHECK-NEXT:  cset	w0, hi
+; CHECK-NEXT:  ret
+  %cmp = icmp uge i128 %x, 5
+  ret i1 %cmp
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AArch64/isinf.ll b/llvm/test/CodeGen/AArch64/isinf.ll
index e8bbaf96395f0..59e1ab44f16b4 100644
--- a/llvm/test/CodeGen/AArch64/isinf.ll
+++ b/llvm/test/CodeGen/AArch64/isinf.ll
@@ -58,11 +58,11 @@ define i32 @replace_isinf_call_f128(fp128 %x) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str q0, [sp, #-16]!
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    ldp x9, x8, [sp], #16
-; CHECK-NEXT:    and x8, x8, #0x7fffffffffffffff
-; CHECK-NEXT:    eor x8, x8, #0x7fff000000000000
-; CHECK-NEXT:    orr x8, x9, x8
-; CHECK-NEXT:    cmp x8, #0
+; CHECK-NEXT:    mov	x8, #-562949953421312
+; CHECK-NEXT:    ldp	x10, x9, [sp], #16
+; CHECK-NEXT:    lsl	x9, x9, #1
+; CHECK-NEXT:    cmp	x10, #0
+; CHECK-NEXT:    ccmp	x8, x9, #0, eq
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
   %abs = tail call fp128 @llvm.fabs.f128(fp128 %x)