[llvm] [AArch64] optimize lowering for icmp on i128 when RHS is an immediate (PR #181822)
Cheng Lingfei via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 17 05:46:22 PST 2026
https://github.com/clingfei created https://github.com/llvm/llvm-project/pull/181822
Optimize cases including eq, ne, ult, ule, ugt, and uge.
Closes https://github.com/llvm/llvm-project/issues/161273.
>From 6c5b2bd5fcdb9df3ea28ea46f42a3538e1c36c9d Mon Sep 17 00:00:00 2001
From: clingfei <1599101385 at qq.com>
Date: Tue, 17 Feb 2026 21:22:02 +0800
Subject: [PATCH] [AArch64] optimize lowering for icmp on i128 when RHS is an
immediate
---
.../Target/AArch64/AArch64ISelLowering.cpp | 100 +++++++++++++++++-
.../CodeGen/AArch64/i128-imm-compare-ccmp.ll | 61 +++++++++++
llvm/test/CodeGen/AArch64/isinf.ll | 10 +-
3 files changed, 165 insertions(+), 6 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/i128-imm-compare-ccmp.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 66c22db0491d1..7b0f79a4b0a9f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -27145,6 +27145,101 @@ performVecReduceBitwiseCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
return SDValue();
}
+static bool splitI128ValueToI64Halves(SDValue V, SelectionDAG &DAG,
+ const SDLoc &DL, SDValue &Lo, SDValue &Hi) {
+ EVT VT = V.getValueType();
+ if (!VT.isInteger() || VT.getFixedSizeInBits() != 128)
+ return false;
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, V);
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, DAG.getNode(ISD::SRL, DL, VT, V, DAG.getShiftAmountConstant(64, VT, DL)));
+ return true;
+}
+
+static SDValue
+performExpandedI128CmpCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG, ISD::CondCode CCCode) {
+ SDValue NewLHS = N->getOperand(0);
+ SDValue NewRHS = N->getOperand(1);
+ if (N->hasOneUse()) {
+ auto User = N->user_begin();
+ // see fshl_i128 in funnel-shift.ll
+ if ((User)->getOpcode() == ISD::BRCOND)
+ return SDValue();
+ }
+ // Keep the dedicated shift+cmp-zero combine opportunities for wide integers.
+ // Canonicalizing these into split compares here tends to introduce an extra
+ // EXTR on AArch64 (see icmp-shift-opt.ll).
+ if ((CCCode == ISD::SETEQ || CCCode == ISD::SETNE) && isNullConstant(NewRHS) &&
+ (NewLHS.getOpcode() == ISD::SRL || NewLHS.getOpcode() == ISD::SHL))
+ return SDValue();
+
+ if (NewLHS.getNumOperands() != 2)
+ return SDValue();
+ if (NewLHS.getValueType().isScalableVT() || NewRHS.getValueType().isScalableVT() || NewLHS.getNumOperands() != 2 ||
+ !(NewLHS.getValueType().isInteger() && NewLHS.getValueSizeInBits() == 128) ||
+ !(NewRHS.getValueType().isInteger() && NewRHS.getValueSizeInBits() == 128))
+ return SDValue();
+ ConstantSDNode *ConstLHS = dyn_cast<ConstantSDNode>(NewLHS.getNode());
+ ConstantSDNode *ConstRHS = dyn_cast<ConstantSDNode>(NewRHS.getNode());
+ if (ConstLHS || !ConstRHS)
+ return SDValue();
+
+ SDValue LHSLo = NewLHS.getOperand(0);
+ SDValue LHSHi = NewLHS.getOperand(1);
+ SDValue RHSLo = DAG.getConstant(ConstRHS->getAPIntValue().trunc(64), SDLoc(N), MVT::i64);
+ SDValue RHSHi = DAG.getConstant(ConstRHS->getAPIntValue().lshr(64).trunc(64), SDLoc(N), MVT::i64);
+ if (!splitI128ValueToI64Halves(NewLHS, DAG, SDLoc(N), LHSLo, LHSHi)) {
+ return SDValue();
+ }
+
+ if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {
+ SDValue LoCmpF = DAG.FoldSetCC(N->getValueType(0), LHSLo, RHSLo, CCCode, SDLoc(N));
+ SDValue HiCmpF = DAG.FoldSetCC(N->getValueType(0), LHSHi, RHSHi, CCCode, SDLoc(N));
+ auto IsConstBool = [](SDValue V) {
+ return isa_and_nonnull<ConstantSDNode>(V.getNode());
+ };
+ if (IsConstBool(LoCmpF) || IsConstBool(HiCmpF))
+ return SDValue();
+
+ SDValue LoCmp = DAG.getSetCC(SDLoc(N), N->getValueType(0), LHSLo, RHSLo, CCCode);
+ SDValue HiCmp = DAG.getSetCC(SDLoc(N), N->getValueType(0), LHSHi, RHSHi, CCCode);
+ unsigned Opcode = (CCCode == ISD::SETEQ) ? ISD::AND : ISD::OR;
+ return DAG.getNode(Opcode, SDLoc(N), LoCmp.getValueType(), LoCmp, HiCmp);
+ }
+
+ if (CCCode == ISD::SETUGT || CCCode == ISD::SETUGE) {
+ // x > K <=> (xhi > khi) || (xhi==khi && xlo > klo)
+ // x >= K <=> (xhi > khi) || (xhi==khi && xlo >= klo)
+ SDValue ZeroHi = DAG.getConstant(0, SDLoc(N), LHSHi.getValueType());
+
+ ISD::CondCode LoCC = (CCCode == ISD::SETUGT) ? ISD::SETULE : ISD::SETULT;
+
+ SDValue HiEq = DAG.getSetCC(SDLoc(N), N->getValueType(0), LHSHi, ZeroHi, ISD::SETEQ);
+ SDValue LoCmp = DAG.getSetCC(SDLoc(N), N->getValueType(0), LHSLo, RHSLo, LoCC);
+ SDValue Tree = DAG.getNode(ISD::AND, SDLoc(N), N->getValueType(0), HiEq, LoCmp);
+
+ SDValue Zero = DAG.getConstant(0, SDLoc(N), N->getValueType(0));
+ return DAG.getSetCC(SDLoc(N), N->getValueType(0), Tree, Zero, ISD::SETEQ);
+ }
+
+ if (CCCode == ISD::SETULT || CCCode == ISD::SETULE) {
+ // x < K <=> (xhi < khi) || (xhi==khi && xlo < klo)
+ // x <= K <=> (xhi < khi) || (xhi==khi && xlo <= klo)
+ ISD::CondCode Opcode = ISD::SETULT;
+
+ SDValue HiCmp = DAG.getSetCC(SDLoc(N), N->getValueType(0), LHSHi, RHSHi, Opcode);
+ SDValue HiEq = DAG.getSetCC(SDLoc(N), N->getValueType(0), LHSHi, RHSHi, ISD::SETEQ);
+ SDValue LoCmp = DAG.getSetCC(SDLoc(N), N->getValueType(0), LHSLo, RHSLo, CCCode);
+
+ SDValue LoAnd = DAG.getNode(ISD::AND, SDLoc(N), HiEq.getValueType(), HiEq, LoCmp);
+ SDValue Tree = DAG.getNode(ISD::OR, SDLoc(N), N->getValueType(0), HiCmp, LoAnd);
+
+ SDValue Zero = DAG.getConstant(0, SDLoc(N), N->getValueType(0));
+ return DAG.getSetCC(SDLoc(N), N->getValueType(0), Tree, Zero, ISD::SETNE);
+ }
+ return SDValue();
+}
+
static SDValue performSETCCCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
@@ -27224,7 +27319,10 @@ static SDValue performSETCCCombine(SDNode *N,
ISD::isConstantSplatVector(LHS.getNode(), SplatLHSVal) &&
SplatLHSVal.isOne())
return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, CmpVT), RHS, ISD::SETGE);
-
+
+ if (SDValue V = performExpandedI128CmpCombine(N, DCI, DAG, Cond))
+ return V;
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/AArch64/i128-imm-compare-ccmp.ll b/llvm/test/CodeGen/AArch64/i128-imm-compare-ccmp.ll
new file mode 100644
index 0000000000000..7422cbbfd19a0
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/i128-imm-compare-ccmp.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -O3 -o - < %s | FileCheck %s
+
+define i1 @eq_imm(i128 %x) {
+; CHECK-LABEL: eq_imm:
+; CHECK: cmp x0, #5
+; CHECK-NEXT: ccmp x1, #0, #0, eq
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %cmp = icmp eq i128 %x, 5
+ ret i1 %cmp
+}
+
+define i1 @ne_imm(i128 %x) {
+; CHECK-LABEL: ne_imm:
+; CHECK: cmp x0, #5
+; CHECK-NEXT: ccmp x1, #0, #0, eq
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %cmp = icmp ne i128 %x, 5
+ ret i1 %cmp
+}
+
+define i1 @ult_imm(i128 %x) {
+; CHECK-LABEL: ult_imm:
+; CHECK: cmp x1, #0
+; CHECK-NEXT: ccmp x0, #5, #2, eq
+; CHECK-NEXT: cset w0, {{cc|lo}}
+; CHECK-NEXT: ret
+ %cmp = icmp ult i128 %x, 5
+ ret i1 %cmp
+}
+
+define i1 @ule_imm(i128 %x) {
+; CHECK-LABEL: ule_imm:
+; CHECK: cmp x1, #0
+; CHECK-NEXT: ccmp x0, #6, #2, eq
+; CHECK-NEXT: cset w0, {{cc|lo}}
+; CHECK-NEXT: ret
+ %cmp = icmp ule i128 %x, 5
+ ret i1 %cmp
+}
+
+define i1 @ugt_imm(i128 %x) {
+; CHECK-LABEL: ugt_imm:
+; CHECK: cmp x1, #0
+; CHECK-NEXT: ccmp x0, #5, #2, eq
+; CHECK-NEXT: cset w0, hi
+; CHECK-NEXT: ret
+ %cmp = icmp ugt i128 %x, 5
+ ret i1 %cmp
+}
+
+define i1 @uge_imm(i128 %x) {
+; CHECK-LABEL: uge_imm:
+; CHECK: cmp x1, #0
+; CHECK-NEXT: ccmp x0, #4, #2, eq
+; CHECK-NEXT: cset w0, hi
+; CHECK-NEXT: ret
+ %cmp = icmp uge i128 %x, 5
+ ret i1 %cmp
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AArch64/isinf.ll b/llvm/test/CodeGen/AArch64/isinf.ll
index e8bbaf96395f0..59e1ab44f16b4 100644
--- a/llvm/test/CodeGen/AArch64/isinf.ll
+++ b/llvm/test/CodeGen/AArch64/isinf.ll
@@ -58,11 +58,11 @@ define i32 @replace_isinf_call_f128(fp128 %x) {
; CHECK: // %bb.0:
; CHECK-NEXT: str q0, [sp, #-16]!
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: ldp x9, x8, [sp], #16
-; CHECK-NEXT: and x8, x8, #0x7fffffffffffffff
-; CHECK-NEXT: eor x8, x8, #0x7fff000000000000
-; CHECK-NEXT: orr x8, x9, x8
-; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: mov x8, #-562949953421312
+; CHECK-NEXT: ldp x10, x9, [sp], #16
+; CHECK-NEXT: lsl x9, x9, #1
+; CHECK-NEXT: cmp x10, #0
+; CHECK-NEXT: ccmp x8, x9, #0, eq
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%abs = tail call fp128 @llvm.fabs.f128(fp128 %x)
More information about the llvm-commits
mailing list