[llvm] [ARM] Have custom lowering for ucmp and scmp (PR #149315)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 17 07:06:20 PDT 2025
https://github.com/AZero13 created https://github.com/llvm/llvm-project/pull/149315
Limited to non-thumb at the moment, but we can do this for i32 in 3 steps, using subs to set the flags initially.
>From dd3d30a9097f7452e276a8e730a66850438fbd0e Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Thu, 17 Jul 2025 10:05:46 -0400
Subject: [PATCH] [ARM] Have custom lowering for ucmp and scmp
Limited to non-thumb at the moment, but we can do this for i32 in 3 steps, using subs to set the flags initially.
---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 145 ++++++++++++++++++++++++
llvm/lib/Target/ARM/ARMISelLowering.h | 2 +
llvm/test/CodeGen/ARM/scmp.ll | 48 ++++----
llvm/test/CodeGen/ARM/ucmp.ll | 36 ++----
4 files changed, 183 insertions(+), 48 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 65d1c4e2d6515..9681eab17518b 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -802,6 +802,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::BSWAP, VT, Expand);
}
+ if (!Subtarget->isThumb()) {
+ setOperationAction(ISD::SCMP, MVT::i32, Custom);
+ setOperationAction(ISD::UCMP, MVT::i32, Custom);
+ }
+
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
@@ -10614,6 +10619,142 @@ SDValue ARMTargetLowering::LowerFP_TO_BF16(SDValue Op,
return DAG.getBitcast(MVT::i32, Res);
}
+SDValue ARMTargetLowering::LowerSCMP(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ // For the ARM assembly pattern:
+ // subs r0, r0, r1 ; subtract RHS from LHS and set flags
+ // movgt r0, #1 ; if LHS > RHS, set result to 1
+ // mvnlt r0, #0 ; if LHS < RHS, set result to -1 (mvn #0 = -1)
+ // ; if LHS == RHS, result remains 0 from the subs
+
+ // Optimization: if RHS is a subtraction against 0, use ADDC instead of SUBC
+ // Check if RHS is (0 - something), and if so use ADDC with LHS + something
+ SDValue SubResult, Flags;
+ bool CanUseAdd = false;
+ SDValue AddOperand;
+
+ // Check if RHS is a subtraction against 0: (0 - X)
+ if (RHS.getOpcode() == ISD::SUB) {
+ SDValue SubLHS = RHS.getOperand(0);
+ SDValue SubRHS = RHS.getOperand(1);
+
+ // Check if it's 0 - X
+ if (isNullConstant(SubLHS)) {
+ // For SCMP: only if X is known to never be INT_MIN (to avoid overflow)
+ if (RHS->getFlags().hasNoSignedWrap() || !DAG.computeKnownBits(SubRHS)
+ .getSignedMinValue()
+ .isMinSignedValue()) {
+ CanUseAdd = true;
+ AddOperand = SubRHS; // Replace RHS with X, so we do LHS + X instead of
+ // LHS - (0 - X)
+ }
+ }
+ }
+
+ if (CanUseAdd) {
+ // Use ADDC: LHS + AddOperand (where RHS was 0 - AddOperand)
+ SDValue AddWithFlags = DAG.getNode(
+ ARMISD::ADDC, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, AddOperand);
+ SubResult = AddWithFlags.getValue(0); // The addition result
+ Flags = AddWithFlags.getValue(1); // The flags from ADDS
+ } else {
+ // Use ARMISD::SUBC to generate SUBS instruction (subtract with flags)
+ SDValue SubWithFlags = DAG.getNode(
+ ARMISD::SUBC, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
+ SubResult = SubWithFlags.getValue(0); // The subtraction result
+ Flags = SubWithFlags.getValue(1); // The flags from SUBS
+ }
+
+ // Constants for conditional moves
+ SDValue One = DAG.getConstant(1, dl, MVT::i32);
+ SDValue MinusOne = DAG.getConstant(0xFFFFFFFF, dl, MVT::i32);
+
+ // movgt: if greater than, set to 1
+ SDValue GTCond = DAG.getConstant(ARMCC::GT, dl, MVT::i32);
+ SDValue Result1 =
+ DAG.getNode(ARMISD::CMOV, dl, MVT::i32, SubResult, One, GTCond, Flags);
+
+ // mvnlt: if less than, set to -1 (equivalent to mvn #0)
+ SDValue LTCond = DAG.getConstant(ARMCC::LT, dl, MVT::i32);
+ SDValue Result2 =
+ DAG.getNode(ARMISD::CMOV, dl, MVT::i32, Result1, MinusOne, LTCond, Flags);
+
+ if (Op.getValueType() != MVT::i32)
+ Result2 = DAG.getSExtOrTrunc(Result2, dl, Op.getValueType());
+
+ return Result2;
+}
+
+SDValue ARMTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ // For the ARM assembly pattern (unsigned version):
+ // subs r0, r0, r1 ; subtract RHS from LHS and set flags
+ // movhi r0, #1 ; if LHS > RHS (unsigned), set result to 1
+ // mvnlo r0, #0 ; if LHS < RHS (unsigned), set result to -1
+ // ; if LHS == RHS, result remains 0 from the subs
+
+ // Optimization: if RHS is a subtraction against 0, use ADDC instead of SUBC
+ // Check if RHS is (0 - something), and if so use ADDC with LHS + something
+ SDValue SubResult, Flags;
+ bool CanUseAdd = false;
+ SDValue AddOperand;
+
+ // Check if RHS is a subtraction against 0: (0 - X)
+ if (RHS.getOpcode() == ISD::SUB) {
+ SDValue SubLHS = RHS.getOperand(0);
+ SDValue SubRHS = RHS.getOperand(1);
+
+ // Check if it's 0 - X
+ if (isNullConstant(SubLHS)) {
+ // For UCMP: only if X is known to never be zero
+ if (DAG.isKnownNeverZero(SubRHS)) {
+ CanUseAdd = true;
+ AddOperand = SubRHS; // Replace RHS with X, so we do LHS + X instead of
+ // LHS - (0 - X)
+ }
+ }
+ }
+
+ if (CanUseAdd) {
+ // Use ADDC: LHS + AddOperand (where RHS was 0 - AddOperand)
+ SDValue AddWithFlags = DAG.getNode(
+ ARMISD::ADDC, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, AddOperand);
+ SubResult = AddWithFlags.getValue(0); // The addition result
+ Flags = AddWithFlags.getValue(1); // The flags from ADDS
+ } else {
+ // Use ARMISD::SUBC to generate SUBS instruction (subtract with flags)
+ SDValue SubWithFlags = DAG.getNode(
+ ARMISD::SUBC, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
+ SubResult = SubWithFlags.getValue(0); // The subtraction result
+ Flags = SubWithFlags.getValue(1); // The flags from SUBS
+ }
+
+ // Constants for conditional moves
+ SDValue One = DAG.getConstant(1, dl, MVT::i32);
+ SDValue MinusOne = DAG.getConstant(0xFFFFFFFF, dl, MVT::i32);
+
+ // movhi: if higher (unsigned greater than), set to 1
+ SDValue HICond = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
+ SDValue Result1 =
+ DAG.getNode(ARMISD::CMOV, dl, MVT::i32, SubResult, One, HICond, Flags);
+
+ // mvnlo: if lower (unsigned less than), set to -1
+ SDValue LOCond = DAG.getConstant(ARMCC::LO, dl, MVT::i32);
+ SDValue Result2 =
+ DAG.getNode(ARMISD::CMOV, dl, MVT::i32, Result1, MinusOne, LOCond, Flags);
+
+ if (Op.getValueType() != MVT::i32)
+ Result2 = DAG.getSExtOrTrunc(Result2, dl, Op.getValueType());
+
+ return Result2;
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
switch (Op.getOpcode()) {
@@ -10742,6 +10883,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FP_TO_BF16:
return LowerFP_TO_BF16(Op, DAG);
case ARMISD::WIN__DBZCHK: return SDValue();
+ case ISD::SCMP:
+ return LowerSCMP(Op, DAG);
+ case ISD::UCMP:
+ return LowerUCMP(Op, DAG);
}
}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 5f4aef55b22c9..1cb7edd041b32 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -903,6 +903,8 @@ class VectorType;
void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSCMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUCMP(SDValue Op, SelectionDAG &DAG) const;
Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
diff --git a/llvm/test/CodeGen/ARM/scmp.ll b/llvm/test/CodeGen/ARM/scmp.ll
index 6e493c993751c..9189aee6aaf43 100644
--- a/llvm/test/CodeGen/ARM/scmp.ll
+++ b/llvm/test/CodeGen/ARM/scmp.ll
@@ -4,12 +4,9 @@
define i8 @scmp_8_8(i8 signext %x, i8 signext %y) nounwind {
; CHECK-LABEL: scmp_8_8:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: movwgt r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwgt r0, #1
+; CHECK-NEXT: mvnlt r0, #0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.scmp(i8 %x, i8 %y)
ret i8 %1
@@ -18,12 +15,9 @@ define i8 @scmp_8_8(i8 signext %x, i8 signext %y) nounwind {
define i8 @scmp_8_16(i16 signext %x, i16 signext %y) nounwind {
; CHECK-LABEL: scmp_8_16:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: movwgt r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwgt r0, #1
+; CHECK-NEXT: mvnlt r0, #0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.scmp(i16 %x, i16 %y)
ret i8 %1
@@ -32,12 +26,9 @@ define i8 @scmp_8_16(i16 signext %x, i16 signext %y) nounwind {
define i8 @scmp_8_32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: scmp_8_32:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: movwgt r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwgt r0, #1
+; CHECK-NEXT: mvnlt r0, #0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.scmp(i32 %x, i32 %y)
ret i8 %1
@@ -92,17 +83,26 @@ define i8 @scmp_8_128(i128 %x, i128 %y) nounwind {
define i32 @scmp_32_32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: scmp_32_32:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: movwgt r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwgt r0, #1
+; CHECK-NEXT: mvnlt r0, #0
; CHECK-NEXT: bx lr
%1 = call i32 @llvm.scmp(i32 %x, i32 %y)
ret i32 %1
}
+define i32 @scmp_neg(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: scmp_neg:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: adds r0, r0, r1
+; CHECK-NEXT: movwgt r0, #1
+; CHECK-NEXT: mvnlt r0, #0
+; CHECK-NEXT: bx lr
+ %yy = sub nsw i32 0, %y
+ %1 = call i32 @llvm.scmp(i32 %x, i32 %yy)
+ ret i32 %1
+}
+
define i32 @scmp_32_64(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: scmp_32_64:
; CHECK: @ %bb.0:
diff --git a/llvm/test/CodeGen/ARM/ucmp.ll b/llvm/test/CodeGen/ARM/ucmp.ll
index ad4af534ee8fe..bb0201454d1ea 100644
--- a/llvm/test/CodeGen/ARM/ucmp.ll
+++ b/llvm/test/CodeGen/ARM/ucmp.ll
@@ -4,12 +4,9 @@
define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind {
; CHECK-LABEL: ucmp_8_8:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlo r0, #1
-; CHECK-NEXT: movwhi r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwhi r0, #1
+; CHECK-NEXT: mvnlo r0, #0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
ret i8 %1
@@ -18,12 +15,9 @@ define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind {
define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind {
; CHECK-LABEL: ucmp_8_16:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlo r0, #1
-; CHECK-NEXT: movwhi r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwhi r0, #1
+; CHECK-NEXT: mvnlo r0, #0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
ret i8 %1
@@ -32,12 +26,9 @@ define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind {
define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: ucmp_8_32:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlo r0, #1
-; CHECK-NEXT: movwhi r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwhi r0, #1
+; CHECK-NEXT: mvnlo r0, #0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
ret i8 %1
@@ -92,12 +83,9 @@ define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind {
define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: ucmp_32_32:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlo r0, #1
-; CHECK-NEXT: movwhi r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwhi r0, #1
+; CHECK-NEXT: mvnlo r0, #0
; CHECK-NEXT: bx lr
%1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
ret i32 %1
More information about the llvm-commits
mailing list