[llvm] [ARM] Have custom lowering for ucmp and scmp (PR #149315)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 7 07:49:34 PDT 2025
https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/149315
>From 1e8912f583cd4805c6c09b18bacab97726e34258 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Mon, 4 Aug 2025 16:01:00 -0400
Subject: [PATCH] [ARM] Have custom lowering for ucmp and scmp
Limited to non-thumb at the moment, but we can do this for i32 in 3 steps, using subs to set the flags initially.
---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 140 +++++++
llvm/lib/Target/ARM/ARMISelLowering.h | 3 +
llvm/test/CodeGen/ARM/scmp.ll | 48 +--
llvm/test/CodeGen/ARM/ucmp.ll | 36 +-
llvm/test/CodeGen/Thumb/scmp.ll | 489 ++++++++++++++++++------
llvm/test/CodeGen/Thumb/ucmp.ll | 445 +++++++++++++++------
6 files changed, 893 insertions(+), 268 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 936625606e315..3bbd078dba6e8 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -807,6 +807,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::BSWAP, VT, Expand);
}
+ if (!Subtarget->isThumb1Only() && !Subtarget->hasV8_1MMainlineOps())
+ setOperationAction(ISD::SCMP, MVT::i32, Custom);
+
+ if (!Subtarget->hasV8_1MMainlineOps())
+ setOperationAction(ISD::UCMP, MVT::i32, Custom);
+
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
@@ -1639,6 +1645,10 @@ bool ARMTargetLowering::useSoftFloat() const {
return Subtarget->useSoftFloat();
}
+bool ARMTargetLowering::shouldExpandCmpUsingSelects(EVT VT) const {
+ return !Subtarget->isThumb1Only() && VT.getSizeInBits() <= 32;
+}
+
// FIXME: It might make sense to define the representative register class as the
// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
@@ -10617,6 +10627,133 @@ SDValue ARMTargetLowering::LowerFP_TO_BF16(SDValue Op,
return DAG.getBitcast(MVT::i32, Res);
}
+SDValue ARMTargetLowering::LowerCMP(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ // Determine if this is signed or unsigned comparison
+ bool IsSigned = (Op.getOpcode() == ISD::SCMP);
+
+ // Special case for Thumb1 UCMP only
+ if (!IsSigned && Subtarget->isThumb1Only()) {
+ // For Thumb unsigned comparison, use this sequence:
+ // subs r2, r0, r1 ; r2 = LHS - RHS, sets flags
+ // sbc r2, r2 ; r2 = r2 - r2 - !carry
+ // cmp r1, r0 ; compare RHS with LHS
+ // sbc r1, r1 ; r1 = r1 - r1 - !carry
+ // subs r0, r2, r1 ; r0 = r2 - r1 (final result)
+
+ // First subtraction: LHS - RHS
+ SDValue Sub1WithFlags = DAG.getNode(
+ ARMISD::SUBC, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
+ SDValue Sub1Result = Sub1WithFlags.getValue(0);
+ SDValue Flags1 = Sub1WithFlags.getValue(1);
+
+ // SUBE: Sub1Result - Sub1Result - !carry
+ // This gives 0 if LHS >= RHS (unsigned), -1 if LHS < RHS (unsigned)
+ SDValue Sbc1 =
+ DAG.getNode(ARMISD::SUBE, dl, DAG.getVTList(MVT::i32, FlagsVT),
+ Sub1Result, Sub1Result, Flags1);
+ SDValue Sbc1Result = Sbc1.getValue(0);
+
+ // Second comparison: RHS vs LHS (reverse comparison)
+ SDValue CmpFlags = DAG.getNode(ARMISD::CMP, dl, FlagsVT, RHS, LHS);
+
+ // SUBE: RHS - RHS - !carry
+ // This gives 0 if RHS <= LHS (unsigned), -1 if RHS > LHS (unsigned)
+ SDValue Sbc2 = DAG.getNode(
+ ARMISD::SUBE, dl, DAG.getVTList(MVT::i32, FlagsVT), RHS, RHS, CmpFlags);
+ SDValue Sbc2Result = Sbc2.getValue(0);
+
+ // Final subtraction: Sbc1Result - Sbc2Result (no flags needed)
+ SDValue Result =
+ DAG.getNode(ISD::SUB, dl, MVT::i32, Sbc1Result, Sbc2Result);
+ if (Op.getValueType() != MVT::i32)
+ Result = DAG.getSExtOrTrunc(Result, dl, Op.getValueType());
+
+ return Result;
+ }
+
+ // For the ARM assembly pattern:
+ // subs r0, r0, r1 ; subtract RHS from LHS and set flags
+ // movgt r0, #1 ; if LHS > RHS, set result to 1 (GT for signed, HI for
+ // unsigned) mvnlt r0, #0 ; if LHS < RHS, set result to -1 (LT for
+ // signed, LO for unsigned)
+ // ; if LHS == RHS, result remains 0 from the subs
+
+ // Optimization: if RHS is a subtraction against 0, use ADDC instead of SUBC
+ unsigned Opcode = ARMISD::SUBC;
+
+ // Check if RHS is a subtraction against 0: (0 - X)
+ if (RHS.getOpcode() == ISD::SUB) {
+ SDValue SubLHS = RHS.getOperand(0);
+ SDValue SubRHS = RHS.getOperand(1);
+
+ // Check if it's 0 - X
+ if (isNullConstant(SubLHS)) {
+ bool CanUseAdd = false;
+ if (IsSigned) {
+ // For SCMP: only if X is known to never be INT_MIN (to avoid overflow)
+ if (RHS->getFlags().hasNoSignedWrap() || !DAG.computeKnownBits(SubRHS)
+ .getSignedMinValue()
+ .isMinSignedValue()) {
+ CanUseAdd = true;
+ }
+ } else {
+ // For UCMP: only if X is known to never be zero
+ if (DAG.isKnownNeverZero(SubRHS)) {
+ CanUseAdd = true;
+ }
+ }
+
+ if (CanUseAdd) {
+ Opcode = ARMISD::ADDC;
+ RHS = SubRHS; // Replace RHS with X, so we do LHS + X instead of
+ // LHS - (0 - X)
+ }
+ }
+ }
+
+ // Generate the operation with flags
+ SDValue OpWithFlags;
+ if (Opcode == ARMISD::ADDC) {
+ // Use ADDC: LHS + RHS (where RHS was 0 - X, now X)
+ OpWithFlags = DAG.getNode(ARMISD::ADDC, dl,
+ DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
+ } else {
+ // Use ARMISD::SUBC to generate SUBS instruction (subtract with flags)
+ OpWithFlags = DAG.getNode(ARMISD::SUBC, dl,
+ DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
+ }
+
+ SDValue OpResult = OpWithFlags.getValue(0); // The operation result
+ SDValue Flags = OpWithFlags.getValue(1); // The flags
+
+ // Constants for conditional moves
+ SDValue One = DAG.getConstant(1, dl, MVT::i32);
+ SDValue MinusOne = DAG.getAllOnesConstant(dl, MVT::i32);
+
+ // Select condition codes based on signed vs unsigned
+ ARMCC::CondCodes GTCond = IsSigned ? ARMCC::GT : ARMCC::HI;
+ ARMCC::CondCodes LTCond = IsSigned ? ARMCC::LT : ARMCC::LO;
+
+ // First conditional move: if greater than, set to 1
+ SDValue GTCondValue = DAG.getConstant(GTCond, dl, MVT::i32);
+ SDValue Result1 = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, OpResult, One,
+ GTCondValue, Flags);
+
+ // Second conditional move: if less than, set to -1
+ SDValue LTCondValue = DAG.getConstant(LTCond, dl, MVT::i32);
+ SDValue Result2 = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, Result1, MinusOne,
+ LTCondValue, Flags);
+
+ if (Op.getValueType() != MVT::i32)
+ Result2 = DAG.getSExtOrTrunc(Result2, dl, Op.getValueType());
+
+ return Result2;
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
switch (Op.getOpcode()) {
@@ -10745,6 +10882,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FP_TO_BF16:
return LowerFP_TO_BF16(Op, DAG);
case ARMISD::WIN__DBZCHK: return SDValue();
+ case ISD::UCMP:
+ case ISD::SCMP:
+ return LowerCMP(Op, DAG);
}
}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 825145d813fb1..a84a3cb001d0a 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -607,6 +607,8 @@ class VectorType;
bool preferZeroCompareBranch() const override { return true; }
+ bool shouldExpandCmpUsingSelects(EVT VT) const override;
+
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
bool hasAndNotCompare(SDValue V) const override {
@@ -904,6 +906,7 @@ class VectorType;
void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCMP(SDValue Op, SelectionDAG &DAG) const;
Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
diff --git a/llvm/test/CodeGen/ARM/scmp.ll b/llvm/test/CodeGen/ARM/scmp.ll
index 6e493c993751c..9189aee6aaf43 100644
--- a/llvm/test/CodeGen/ARM/scmp.ll
+++ b/llvm/test/CodeGen/ARM/scmp.ll
@@ -4,12 +4,9 @@
define i8 @scmp_8_8(i8 signext %x, i8 signext %y) nounwind {
; CHECK-LABEL: scmp_8_8:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: movwgt r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwgt r0, #1
+; CHECK-NEXT: mvnlt r0, #0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.scmp(i8 %x, i8 %y)
ret i8 %1
@@ -18,12 +15,9 @@ define i8 @scmp_8_8(i8 signext %x, i8 signext %y) nounwind {
define i8 @scmp_8_16(i16 signext %x, i16 signext %y) nounwind {
; CHECK-LABEL: scmp_8_16:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: movwgt r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwgt r0, #1
+; CHECK-NEXT: mvnlt r0, #0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.scmp(i16 %x, i16 %y)
ret i8 %1
@@ -32,12 +26,9 @@ define i8 @scmp_8_16(i16 signext %x, i16 signext %y) nounwind {
define i8 @scmp_8_32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: scmp_8_32:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: movwgt r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwgt r0, #1
+; CHECK-NEXT: mvnlt r0, #0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.scmp(i32 %x, i32 %y)
ret i8 %1
@@ -92,17 +83,26 @@ define i8 @scmp_8_128(i128 %x, i128 %y) nounwind {
define i32 @scmp_32_32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: scmp_32_32:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: movwgt r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwgt r0, #1
+; CHECK-NEXT: mvnlt r0, #0
; CHECK-NEXT: bx lr
%1 = call i32 @llvm.scmp(i32 %x, i32 %y)
ret i32 %1
}
+define i32 @scmp_neg(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: scmp_neg:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: adds r0, r0, r1
+; CHECK-NEXT: movwgt r0, #1
+; CHECK-NEXT: mvnlt r0, #0
+; CHECK-NEXT: bx lr
+ %yy = sub nsw i32 0, %y
+ %1 = call i32 @llvm.scmp(i32 %x, i32 %yy)
+ ret i32 %1
+}
+
define i32 @scmp_32_64(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: scmp_32_64:
; CHECK: @ %bb.0:
diff --git a/llvm/test/CodeGen/ARM/ucmp.ll b/llvm/test/CodeGen/ARM/ucmp.ll
index ad4af534ee8fe..bb0201454d1ea 100644
--- a/llvm/test/CodeGen/ARM/ucmp.ll
+++ b/llvm/test/CodeGen/ARM/ucmp.ll
@@ -4,12 +4,9 @@
define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind {
; CHECK-LABEL: ucmp_8_8:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlo r0, #1
-; CHECK-NEXT: movwhi r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwhi r0, #1
+; CHECK-NEXT: mvnlo r0, #0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
ret i8 %1
@@ -18,12 +15,9 @@ define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind {
define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind {
; CHECK-LABEL: ucmp_8_16:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlo r0, #1
-; CHECK-NEXT: movwhi r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwhi r0, #1
+; CHECK-NEXT: mvnlo r0, #0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
ret i8 %1
@@ -32,12 +26,9 @@ define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind {
define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: ucmp_8_32:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlo r0, #1
-; CHECK-NEXT: movwhi r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwhi r0, #1
+; CHECK-NEXT: mvnlo r0, #0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
ret i8 %1
@@ -92,12 +83,9 @@ define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind {
define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: ucmp_32_32:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlo r0, #1
-; CHECK-NEXT: movwhi r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwhi r0, #1
+; CHECK-NEXT: mvnlo r0, #0
; CHECK-NEXT: bx lr
%1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
ret i32 %1
diff --git a/llvm/test/CodeGen/Thumb/scmp.ll b/llvm/test/CodeGen/Thumb/scmp.ll
index 661dbe97cdb3c..c0024492b3a6d 100644
--- a/llvm/test/CodeGen/Thumb/scmp.ll
+++ b/llvm/test/CodeGen/Thumb/scmp.ll
@@ -1,151 +1,420 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=thumbv7-apple-darwin %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s -check-prefix=THUMB1
+; RUN: llc -mtriple=thumbv7-apple-darwin %s -o - | FileCheck %s -check-prefix=THUMB2
+; RUN: llc -mtriple thumbv8.1m.main-none-eabi -o - %s | FileCheck %s --check-prefix=V81M
define i8 @scmp_8_8(i8 signext %x, i8 signext %y) nounwind {
-; CHECK-LABEL: scmp_8_8:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov.w r0, #0
-; CHECK-NEXT: mov.w r2, #0
-; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r0, #1
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r2, #1
-; CHECK-NEXT: subs r0, r2, r0
-; CHECK-NEXT: bx lr
+; THUMB1-LABEL: scmp_8_8:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: movs r2, #1
+; THUMB1-NEXT: movs r3, #0
+; THUMB1-NEXT: cmp r0, r1
+; THUMB1-NEXT: mov r0, r2
+; THUMB1-NEXT: bge .LBB0_3
+; THUMB1-NEXT: @ %bb.1:
+; THUMB1-NEXT: ble .LBB0_4
+; THUMB1-NEXT: .LBB0_2:
+; THUMB1-NEXT: subs r0, r2, r0
+; THUMB1-NEXT: bx lr
+; THUMB1-NEXT: .LBB0_3:
+; THUMB1-NEXT: mov r0, r3
+; THUMB1-NEXT: bgt .LBB0_2
+; THUMB1-NEXT: .LBB0_4:
+; THUMB1-NEXT: mov r2, r3
+; THUMB1-NEXT: subs r0, r2, r0
+; THUMB1-NEXT: bx lr
+;
+; THUMB2-LABEL: scmp_8_8:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: subs r0, r0, r1
+; THUMB2-NEXT: it gt
+; THUMB2-NEXT: movgt r0, #1
+; THUMB2-NEXT: it lt
+; THUMB2-NEXT: movlt.w r0, #-1
+; THUMB2-NEXT: bx lr
+;
+; V81M-LABEL: scmp_8_8:
+; V81M: @ %bb.0:
+; V81M-NEXT: cmp r0, r1
+; V81M-NEXT: cset r0, gt
+; V81M-NEXT: it lt
+; V81M-NEXT: movlt.w r0, #-1
+; V81M-NEXT: bx lr
%1 = call i8 @llvm.scmp(i8 %x, i8 %y)
ret i8 %1
}
define i8 @scmp_8_16(i16 signext %x, i16 signext %y) nounwind {
-; CHECK-LABEL: scmp_8_16:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov.w r0, #0
-; CHECK-NEXT: mov.w r2, #0
-; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r0, #1
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r2, #1
-; CHECK-NEXT: subs r0, r2, r0
-; CHECK-NEXT: bx lr
+; THUMB1-LABEL: scmp_8_16:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: movs r2, #1
+; THUMB1-NEXT: movs r3, #0
+; THUMB1-NEXT: cmp r0, r1
+; THUMB1-NEXT: mov r0, r2
+; THUMB1-NEXT: bge .LBB1_3
+; THUMB1-NEXT: @ %bb.1:
+; THUMB1-NEXT: ble .LBB1_4
+; THUMB1-NEXT: .LBB1_2:
+; THUMB1-NEXT: subs r0, r2, r0
+; THUMB1-NEXT: bx lr
+; THUMB1-NEXT: .LBB1_3:
+; THUMB1-NEXT: mov r0, r3
+; THUMB1-NEXT: bgt .LBB1_2
+; THUMB1-NEXT: .LBB1_4:
+; THUMB1-NEXT: mov r2, r3
+; THUMB1-NEXT: subs r0, r2, r0
+; THUMB1-NEXT: bx lr
+;
+; THUMB2-LABEL: scmp_8_16:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: subs r0, r0, r1
+; THUMB2-NEXT: it gt
+; THUMB2-NEXT: movgt r0, #1
+; THUMB2-NEXT: it lt
+; THUMB2-NEXT: movlt.w r0, #-1
+; THUMB2-NEXT: bx lr
+;
+; V81M-LABEL: scmp_8_16:
+; V81M: @ %bb.0:
+; V81M-NEXT: cmp r0, r1
+; V81M-NEXT: cset r0, gt
+; V81M-NEXT: it lt
+; V81M-NEXT: movlt.w r0, #-1
+; V81M-NEXT: bx lr
%1 = call i8 @llvm.scmp(i16 %x, i16 %y)
ret i8 %1
}
define i8 @scmp_8_32(i32 %x, i32 %y) nounwind {
-; CHECK-LABEL: scmp_8_32:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov.w r0, #0
-; CHECK-NEXT: mov.w r2, #0
-; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r0, #1
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r2, #1
-; CHECK-NEXT: subs r0, r2, r0
-; CHECK-NEXT: bx lr
+; THUMB1-LABEL: scmp_8_32:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: movs r2, #1
+; THUMB1-NEXT: movs r3, #0
+; THUMB1-NEXT: cmp r0, r1
+; THUMB1-NEXT: mov r0, r2
+; THUMB1-NEXT: bge .LBB2_3
+; THUMB1-NEXT: @ %bb.1:
+; THUMB1-NEXT: ble .LBB2_4
+; THUMB1-NEXT: .LBB2_2:
+; THUMB1-NEXT: subs r0, r2, r0
+; THUMB1-NEXT: bx lr
+; THUMB1-NEXT: .LBB2_3:
+; THUMB1-NEXT: mov r0, r3
+; THUMB1-NEXT: bgt .LBB2_2
+; THUMB1-NEXT: .LBB2_4:
+; THUMB1-NEXT: mov r2, r3
+; THUMB1-NEXT: subs r0, r2, r0
+; THUMB1-NEXT: bx lr
+;
+; THUMB2-LABEL: scmp_8_32:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: subs r0, r0, r1
+; THUMB2-NEXT: it gt
+; THUMB2-NEXT: movgt r0, #1
+; THUMB2-NEXT: it lt
+; THUMB2-NEXT: movlt.w r0, #-1
+; THUMB2-NEXT: bx lr
+;
+; V81M-LABEL: scmp_8_32:
+; V81M: @ %bb.0:
+; V81M-NEXT: cmp r0, r1
+; V81M-NEXT: cset r0, gt
+; V81M-NEXT: it lt
+; V81M-NEXT: movlt.w r0, #-1
+; V81M-NEXT: bx lr
%1 = call i8 @llvm.scmp(i32 %x, i32 %y)
ret i8 %1
}
define i8 @scmp_8_64(i64 %x, i64 %y) nounwind {
-; CHECK-LABEL: scmp_8_64:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: subs.w r12, r0, r2
-; CHECK-NEXT: mov.w r9, #0
-; CHECK-NEXT: sbcs.w r12, r1, r3
-; CHECK-NEXT: mov.w r12, #0
-; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt.w r12, #1
-; CHECK-NEXT: subs r0, r2, r0
-; CHECK-NEXT: sbcs.w r0, r3, r1
-; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt.w r9, #1
-; CHECK-NEXT: sub.w r0, r9, r12
-; CHECK-NEXT: bx lr
+; THUMB1-LABEL: scmp_8_64:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: .save {r4, r5, r6, lr}
+; THUMB1-NEXT: push {r4, r5, r6, lr}
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: movs r5, #0
+; THUMB1-NEXT: subs r6, r0, r2
+; THUMB1-NEXT: mov r6, r1
+; THUMB1-NEXT: sbcs r6, r3
+; THUMB1-NEXT: mov r6, r4
+; THUMB1-NEXT: blt .LBB3_2
+; THUMB1-NEXT: @ %bb.1:
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: .LBB3_2:
+; THUMB1-NEXT: subs r0, r2, r0
+; THUMB1-NEXT: sbcs r3, r1
+; THUMB1-NEXT: blt .LBB3_4
+; THUMB1-NEXT: @ %bb.3:
+; THUMB1-NEXT: mov r4, r5
+; THUMB1-NEXT: .LBB3_4:
+; THUMB1-NEXT: subs r0, r4, r6
+; THUMB1-NEXT: pop {r4, r5, r6, pc}
+;
+; THUMB2-LABEL: scmp_8_64:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: subs.w r12, r0, r2
+; THUMB2-NEXT: mov.w r9, #0
+; THUMB2-NEXT: sbcs.w r12, r1, r3
+; THUMB2-NEXT: mov.w r12, #0
+; THUMB2-NEXT: it lt
+; THUMB2-NEXT: movlt.w r12, #1
+; THUMB2-NEXT: subs r0, r2, r0
+; THUMB2-NEXT: sbcs.w r0, r3, r1
+; THUMB2-NEXT: it lt
+; THUMB2-NEXT: movlt.w r9, #1
+; THUMB2-NEXT: sub.w r0, r9, r12
+; THUMB2-NEXT: bx lr
+;
+; V81M-LABEL: scmp_8_64:
+; V81M: @ %bb.0:
+; V81M-NEXT: subs.w r12, r0, r2
+; V81M-NEXT: sbcs.w r12, r1, r3
+; V81M-NEXT: cset r12, lt
+; V81M-NEXT: subs r0, r2, r0
+; V81M-NEXT: sbcs.w r0, r3, r1
+; V81M-NEXT: cset r0, lt
+; V81M-NEXT: sub.w r0, r0, r12
+; V81M-NEXT: bx lr
%1 = call i8 @llvm.scmp(i64 %x, i64 %y)
ret i8 %1
}
define i8 @scmp_8_128(i128 %x, i128 %y) nounwind {
-; CHECK-LABEL: scmp_8_128:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r4, r5, r6, lr}
-; CHECK-NEXT: add.w lr, sp, #16
-; CHECK-NEXT: ldr r4, [sp, #28]
-; CHECK-NEXT: movs r5, #0
-; CHECK-NEXT: ldm.w lr, {r9, r12, lr}
-; CHECK-NEXT: subs.w r6, r0, r9
-; CHECK-NEXT: sbcs.w r6, r1, r12
-; CHECK-NEXT: sbcs.w r6, r2, lr
-; CHECK-NEXT: sbcs.w r6, r3, r4
-; CHECK-NEXT: mov.w r6, #0
-; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r6, #1
-; CHECK-NEXT: subs.w r0, r9, r0
-; CHECK-NEXT: sbcs.w r0, r12, r1
-; CHECK-NEXT: sbcs.w r0, lr, r2
-; CHECK-NEXT: sbcs.w r0, r4, r3
-; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r5, #1
-; CHECK-NEXT: subs r0, r5, r6
-; CHECK-NEXT: pop {r4, r5, r6, pc}
+; THUMB1-LABEL: scmp_8_128:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: .save {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: push {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: .pad #20
+; THUMB1-NEXT: sub sp, #20
+; THUMB1-NEXT: str r3, [sp, #16] @ 4-byte Spill
+; THUMB1-NEXT: movs r3, #1
+; THUMB1-NEXT: str r3, [sp] @ 4-byte Spill
+; THUMB1-NEXT: movs r3, #0
+; THUMB1-NEXT: str r3, [sp, #12] @ 4-byte Spill
+; THUMB1-NEXT: ldr r6, [sp, #52]
+; THUMB1-NEXT: add r7, sp, #40
+; THUMB1-NEXT: ldm r7, {r3, r5, r7}
+; THUMB1-NEXT: subs r4, r0, r3
+; THUMB1-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; THUMB1-NEXT: mov r4, r1
+; THUMB1-NEXT: ldr r1, [sp] @ 4-byte Reload
+; THUMB1-NEXT: sbcs r4, r5
+; THUMB1-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; THUMB1-NEXT: mov r4, r2
+; THUMB1-NEXT: sbcs r4, r7
+; THUMB1-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
+; THUMB1-NEXT: sbcs r4, r6
+; THUMB1-NEXT: mov r2, r1
+; THUMB1-NEXT: blt .LBB4_2
+; THUMB1-NEXT: @ %bb.1:
+; THUMB1-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; THUMB1-NEXT: .LBB4_2:
+; THUMB1-NEXT: subs r0, r3, r0
+; THUMB1-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; THUMB1-NEXT: sbcs r5, r0
+; THUMB1-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; THUMB1-NEXT: sbcs r7, r0
+; THUMB1-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
+; THUMB1-NEXT: sbcs r6, r0
+; THUMB1-NEXT: blt .LBB4_4
+; THUMB1-NEXT: @ %bb.3:
+; THUMB1-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; THUMB1-NEXT: .LBB4_4:
+; THUMB1-NEXT: subs r0, r1, r2
+; THUMB1-NEXT: add sp, #20
+; THUMB1-NEXT: pop {r4, r5, r6, r7, pc}
+;
+; THUMB2-LABEL: scmp_8_128:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: push {r4, r5, r6, lr}
+; THUMB2-NEXT: add.w lr, sp, #16
+; THUMB2-NEXT: ldr r4, [sp, #28]
+; THUMB2-NEXT: movs r5, #0
+; THUMB2-NEXT: ldm.w lr, {r9, r12, lr}
+; THUMB2-NEXT: subs.w r6, r0, r9
+; THUMB2-NEXT: sbcs.w r6, r1, r12
+; THUMB2-NEXT: sbcs.w r6, r2, lr
+; THUMB2-NEXT: sbcs.w r6, r3, r4
+; THUMB2-NEXT: mov.w r6, #0
+; THUMB2-NEXT: it lt
+; THUMB2-NEXT: movlt r6, #1
+; THUMB2-NEXT: subs.w r0, r9, r0
+; THUMB2-NEXT: sbcs.w r0, r12, r1
+; THUMB2-NEXT: sbcs.w r0, lr, r2
+; THUMB2-NEXT: sbcs.w r0, r4, r3
+; THUMB2-NEXT: it lt
+; THUMB2-NEXT: movlt r5, #1
+; THUMB2-NEXT: subs r0, r5, r6
+; THUMB2-NEXT: pop {r4, r5, r6, pc}
+;
+; V81M-LABEL: scmp_8_128:
+; V81M: @ %bb.0:
+; V81M-NEXT: .save {r4, r5, r6, lr}
+; V81M-NEXT: push {r4, r5, r6, lr}
+; V81M-NEXT: ldrd r5, r4, [sp, #16]
+; V81M-NEXT: ldrd lr, r12, [sp, #24]
+; V81M-NEXT: subs r6, r0, r5
+; V81M-NEXT: sbcs.w r6, r1, r4
+; V81M-NEXT: sbcs.w r6, r2, lr
+; V81M-NEXT: sbcs.w r6, r3, r12
+; V81M-NEXT: cset r6, lt
+; V81M-NEXT: subs r0, r5, r0
+; V81M-NEXT: sbcs.w r0, r4, r1
+; V81M-NEXT: sbcs.w r0, lr, r2
+; V81M-NEXT: sbcs.w r0, r12, r3
+; V81M-NEXT: cset r0, lt
+; V81M-NEXT: subs r0, r0, r6
+; V81M-NEXT: pop {r4, r5, r6, pc}
%1 = call i8 @llvm.scmp(i128 %x, i128 %y)
ret i8 %1
}
define i32 @scmp_32_32(i32 %x, i32 %y) nounwind {
-; CHECK-LABEL: scmp_32_32:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov.w r0, #0
-; CHECK-NEXT: mov.w r2, #0
-; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r0, #1
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r2, #1
-; CHECK-NEXT: subs r0, r2, r0
-; CHECK-NEXT: bx lr
+; THUMB1-LABEL: scmp_32_32:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: movs r2, #1
+; THUMB1-NEXT: movs r3, #0
+; THUMB1-NEXT: cmp r0, r1
+; THUMB1-NEXT: mov r0, r2
+; THUMB1-NEXT: bge .LBB5_3
+; THUMB1-NEXT: @ %bb.1:
+; THUMB1-NEXT: ble .LBB5_4
+; THUMB1-NEXT: .LBB5_2:
+; THUMB1-NEXT: subs r0, r2, r0
+; THUMB1-NEXT: bx lr
+; THUMB1-NEXT: .LBB5_3:
+; THUMB1-NEXT: mov r0, r3
+; THUMB1-NEXT: bgt .LBB5_2
+; THUMB1-NEXT: .LBB5_4:
+; THUMB1-NEXT: mov r2, r3
+; THUMB1-NEXT: subs r0, r2, r0
+; THUMB1-NEXT: bx lr
+;
+; THUMB2-LABEL: scmp_32_32:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: subs r0, r0, r1
+; THUMB2-NEXT: it gt
+; THUMB2-NEXT: movgt r0, #1
+; THUMB2-NEXT: it lt
+; THUMB2-NEXT: movlt.w r0, #-1
+; THUMB2-NEXT: bx lr
+;
+; V81M-LABEL: scmp_32_32:
+; V81M: @ %bb.0:
+; V81M-NEXT: cmp r0, r1
+; V81M-NEXT: cset r0, gt
+; V81M-NEXT: it lt
+; V81M-NEXT: movlt.w r0, #-1
+; V81M-NEXT: bx lr
%1 = call i32 @llvm.scmp(i32 %x, i32 %y)
ret i32 %1
}
define i32 @scmp_32_64(i64 %x, i64 %y) nounwind {
-; CHECK-LABEL: scmp_32_64:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: subs.w r12, r0, r2
-; CHECK-NEXT: mov.w r9, #0
-; CHECK-NEXT: sbcs.w r12, r1, r3
-; CHECK-NEXT: mov.w r12, #0
-; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt.w r12, #1
-; CHECK-NEXT: subs r0, r2, r0
-; CHECK-NEXT: sbcs.w r0, r3, r1
-; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt.w r9, #1
-; CHECK-NEXT: sub.w r0, r9, r12
-; CHECK-NEXT: bx lr
+; THUMB1-LABEL: scmp_32_64:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: .save {r4, r5, r6, lr}
+; THUMB1-NEXT: push {r4, r5, r6, lr}
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: movs r5, #0
+; THUMB1-NEXT: subs r6, r0, r2
+; THUMB1-NEXT: mov r6, r1
+; THUMB1-NEXT: sbcs r6, r3
+; THUMB1-NEXT: mov r6, r4
+; THUMB1-NEXT: blt .LBB6_2
+; THUMB1-NEXT: @ %bb.1:
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: .LBB6_2:
+; THUMB1-NEXT: subs r0, r2, r0
+; THUMB1-NEXT: sbcs r3, r1
+; THUMB1-NEXT: blt .LBB6_4
+; THUMB1-NEXT: @ %bb.3:
+; THUMB1-NEXT: mov r4, r5
+; THUMB1-NEXT: .LBB6_4:
+; THUMB1-NEXT: subs r0, r4, r6
+; THUMB1-NEXT: pop {r4, r5, r6, pc}
+;
+; THUMB2-LABEL: scmp_32_64:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: subs.w r12, r0, r2
+; THUMB2-NEXT: mov.w r9, #0
+; THUMB2-NEXT: sbcs.w r12, r1, r3
+; THUMB2-NEXT: mov.w r12, #0
+; THUMB2-NEXT: it lt
+; THUMB2-NEXT: movlt.w r12, #1
+; THUMB2-NEXT: subs r0, r2, r0
+; THUMB2-NEXT: sbcs.w r0, r3, r1
+; THUMB2-NEXT: it lt
+; THUMB2-NEXT: movlt.w r9, #1
+; THUMB2-NEXT: sub.w r0, r9, r12
+; THUMB2-NEXT: bx lr
+;
+; V81M-LABEL: scmp_32_64:
+; V81M: @ %bb.0:
+; V81M-NEXT: subs.w r12, r0, r2
+; V81M-NEXT: sbcs.w r12, r1, r3
+; V81M-NEXT: cset r12, lt
+; V81M-NEXT: subs r0, r2, r0
+; V81M-NEXT: sbcs.w r0, r3, r1
+; V81M-NEXT: cset r0, lt
+; V81M-NEXT: sub.w r0, r0, r12
+; V81M-NEXT: bx lr
%1 = call i32 @llvm.scmp(i64 %x, i64 %y)
ret i32 %1
}
define i64 @scmp_64_64(i64 %x, i64 %y) nounwind {
-; CHECK-LABEL: scmp_64_64:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: subs.w r12, r0, r2
-; CHECK-NEXT: mov.w r9, #0
-; CHECK-NEXT: sbcs.w r12, r1, r3
-; CHECK-NEXT: mov.w r12, #0
-; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt.w r12, #1
-; CHECK-NEXT: subs r0, r2, r0
-; CHECK-NEXT: sbcs.w r0, r3, r1
-; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt.w r9, #1
-; CHECK-NEXT: sub.w r0, r9, r12
-; CHECK-NEXT: asrs r1, r0, #31
-; CHECK-NEXT: bx lr
+; THUMB1-LABEL: scmp_64_64:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: .save {r4, r5, r6, lr}
+; THUMB1-NEXT: push {r4, r5, r6, lr}
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: movs r5, #0
+; THUMB1-NEXT: subs r6, r0, r2
+; THUMB1-NEXT: mov r6, r1
+; THUMB1-NEXT: sbcs r6, r3
+; THUMB1-NEXT: mov r6, r4
+; THUMB1-NEXT: blt .LBB7_2
+; THUMB1-NEXT: @ %bb.1:
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: .LBB7_2:
+; THUMB1-NEXT: subs r0, r2, r0
+; THUMB1-NEXT: sbcs r3, r1
+; THUMB1-NEXT: blt .LBB7_4
+; THUMB1-NEXT: @ %bb.3:
+; THUMB1-NEXT: mov r4, r5
+; THUMB1-NEXT: .LBB7_4:
+; THUMB1-NEXT: subs r0, r4, r6
+; THUMB1-NEXT: asrs r1, r0, #31
+; THUMB1-NEXT: pop {r4, r5, r6, pc}
+;
+; THUMB2-LABEL: scmp_64_64:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: subs.w r12, r0, r2
+; THUMB2-NEXT: mov.w r9, #0
+; THUMB2-NEXT: sbcs.w r12, r1, r3
+; THUMB2-NEXT: mov.w r12, #0
+; THUMB2-NEXT: it lt
+; THUMB2-NEXT: movlt.w r12, #1
+; THUMB2-NEXT: subs r0, r2, r0
+; THUMB2-NEXT: sbcs.w r0, r3, r1
+; THUMB2-NEXT: it lt
+; THUMB2-NEXT: movlt.w r9, #1
+; THUMB2-NEXT: sub.w r0, r9, r12
+; THUMB2-NEXT: asrs r1, r0, #31
+; THUMB2-NEXT: bx lr
+;
+; V81M-LABEL: scmp_64_64:
+; V81M: @ %bb.0:
+; V81M-NEXT: subs.w r12, r0, r2
+; V81M-NEXT: sbcs.w r12, r1, r3
+; V81M-NEXT: cset r12, lt
+; V81M-NEXT: subs r0, r2, r0
+; V81M-NEXT: sbcs.w r0, r3, r1
+; V81M-NEXT: cset r0, lt
+; V81M-NEXT: sub.w r0, r0, r12
+; V81M-NEXT: asrs r1, r0, #31
+; V81M-NEXT: bx lr
%1 = call i64 @llvm.scmp(i64 %x, i64 %y)
ret i64 %1
}
diff --git a/llvm/test/CodeGen/Thumb/ucmp.ll b/llvm/test/CodeGen/Thumb/ucmp.ll
index 7e6d0a323b11c..5d0f57e2a9d72 100644
--- a/llvm/test/CodeGen/Thumb/ucmp.ll
+++ b/llvm/test/CodeGen/Thumb/ucmp.ll
@@ -1,151 +1,376 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=thumbv7-apple-darwin %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s -check-prefix=THUMB1
+; RUN: llc -mtriple=thumbv7-apple-darwin %s -o - | FileCheck %s -check-prefix=THUMB2
+; RUN: llc -mtriple thumbv8.1m.main-none-eabi -o - %s | FileCheck %s --check-prefix=V81M
define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind {
-; CHECK-LABEL: ucmp_8_8:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov.w r0, #0
-; CHECK-NEXT: mov.w r2, #0
-; CHECK-NEXT: it lo
-; CHECK-NEXT: movlo r0, #1
-; CHECK-NEXT: it hi
-; CHECK-NEXT: movhi r2, #1
-; CHECK-NEXT: subs r0, r2, r0
-; CHECK-NEXT: bx lr
+; THUMB1-LABEL: ucmp_8_8:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: subs r2, r0, r1
+; THUMB1-NEXT: sbcs r2, r2
+; THUMB1-NEXT: cmp r1, r0
+; THUMB1-NEXT: sbcs r1, r1
+; THUMB1-NEXT: subs r0, r2, r1
+; THUMB1-NEXT: bx lr
+;
+; THUMB2-LABEL: ucmp_8_8:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: subs r0, r0, r1
+; THUMB2-NEXT: it hi
+; THUMB2-NEXT: movhi r0, #1
+; THUMB2-NEXT: it lo
+; THUMB2-NEXT: movlo.w r0, #-1
+; THUMB2-NEXT: bx lr
+;
+; V81M-LABEL: ucmp_8_8:
+; V81M: @ %bb.0:
+; V81M-NEXT: cmp r0, r1
+; V81M-NEXT: cset r0, hi
+; V81M-NEXT: it lo
+; V81M-NEXT: movlo.w r0, #-1
+; V81M-NEXT: bx lr
%1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
ret i8 %1
}
define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind {
-; CHECK-LABEL: ucmp_8_16:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov.w r0, #0
-; CHECK-NEXT: mov.w r2, #0
-; CHECK-NEXT: it lo
-; CHECK-NEXT: movlo r0, #1
-; CHECK-NEXT: it hi
-; CHECK-NEXT: movhi r2, #1
-; CHECK-NEXT: subs r0, r2, r0
-; CHECK-NEXT: bx lr
+; THUMB1-LABEL: ucmp_8_16:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: subs r2, r0, r1
+; THUMB1-NEXT: sbcs r2, r2
+; THUMB1-NEXT: cmp r1, r0
+; THUMB1-NEXT: sbcs r1, r1
+; THUMB1-NEXT: subs r0, r2, r1
+; THUMB1-NEXT: bx lr
+;
+; THUMB2-LABEL: ucmp_8_16:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: subs r0, r0, r1
+; THUMB2-NEXT: it hi
+; THUMB2-NEXT: movhi r0, #1
+; THUMB2-NEXT: it lo
+; THUMB2-NEXT: movlo.w r0, #-1
+; THUMB2-NEXT: bx lr
+;
+; V81M-LABEL: ucmp_8_16:
+; V81M: @ %bb.0:
+; V81M-NEXT: cmp r0, r1
+; V81M-NEXT: cset r0, hi
+; V81M-NEXT: it lo
+; V81M-NEXT: movlo.w r0, #-1
+; V81M-NEXT: bx lr
%1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
ret i8 %1
}
define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind {
-; CHECK-LABEL: ucmp_8_32:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov.w r0, #0
-; CHECK-NEXT: mov.w r2, #0
-; CHECK-NEXT: it lo
-; CHECK-NEXT: movlo r0, #1
-; CHECK-NEXT: it hi
-; CHECK-NEXT: movhi r2, #1
-; CHECK-NEXT: subs r0, r2, r0
-; CHECK-NEXT: bx lr
+; THUMB1-LABEL: ucmp_8_32:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: subs r2, r0, r1
+; THUMB1-NEXT: sbcs r2, r2
+; THUMB1-NEXT: cmp r1, r0
+; THUMB1-NEXT: sbcs r1, r1
+; THUMB1-NEXT: subs r0, r2, r1
+; THUMB1-NEXT: bx lr
+;
+; THUMB2-LABEL: ucmp_8_32:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: subs r0, r0, r1
+; THUMB2-NEXT: it hi
+; THUMB2-NEXT: movhi r0, #1
+; THUMB2-NEXT: it lo
+; THUMB2-NEXT: movlo.w r0, #-1
+; THUMB2-NEXT: bx lr
+;
+; V81M-LABEL: ucmp_8_32:
+; V81M: @ %bb.0:
+; V81M-NEXT: cmp r0, r1
+; V81M-NEXT: cset r0, hi
+; V81M-NEXT: it lo
+; V81M-NEXT: movlo.w r0, #-1
+; V81M-NEXT: bx lr
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
ret i8 %1
}
define i8 @ucmp_8_64(i64 %x, i64 %y) nounwind {
-; CHECK-LABEL: ucmp_8_64:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: subs.w r12, r0, r2
-; CHECK-NEXT: mov.w r9, #0
-; CHECK-NEXT: sbcs.w r12, r1, r3
-; CHECK-NEXT: mov.w r12, #0
-; CHECK-NEXT: it lo
-; CHECK-NEXT: movlo.w r12, #1
-; CHECK-NEXT: subs r0, r2, r0
-; CHECK-NEXT: sbcs.w r0, r3, r1
-; CHECK-NEXT: it lo
-; CHECK-NEXT: movlo.w r9, #1
-; CHECK-NEXT: sub.w r0, r9, r12
-; CHECK-NEXT: bx lr
+; THUMB1-LABEL: ucmp_8_64:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: .save {r4, r5, r6, lr}
+; THUMB1-NEXT: push {r4, r5, r6, lr}
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: movs r5, #0
+; THUMB1-NEXT: subs r6, r0, r2
+; THUMB1-NEXT: mov r6, r1
+; THUMB1-NEXT: sbcs r6, r3
+; THUMB1-NEXT: mov r6, r4
+; THUMB1-NEXT: blo .LBB3_2
+; THUMB1-NEXT: @ %bb.1:
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: .LBB3_2:
+; THUMB1-NEXT: subs r0, r2, r0
+; THUMB1-NEXT: sbcs r3, r1
+; THUMB1-NEXT: blo .LBB3_4
+; THUMB1-NEXT: @ %bb.3:
+; THUMB1-NEXT: mov r4, r5
+; THUMB1-NEXT: .LBB3_4:
+; THUMB1-NEXT: subs r0, r4, r6
+; THUMB1-NEXT: pop {r4, r5, r6, pc}
+;
+; THUMB2-LABEL: ucmp_8_64:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: subs.w r12, r0, r2
+; THUMB2-NEXT: mov.w r9, #0
+; THUMB2-NEXT: sbcs.w r12, r1, r3
+; THUMB2-NEXT: mov.w r12, #0
+; THUMB2-NEXT: it lo
+; THUMB2-NEXT: movlo.w r12, #1
+; THUMB2-NEXT: subs r0, r2, r0
+; THUMB2-NEXT: sbcs.w r0, r3, r1
+; THUMB2-NEXT: it lo
+; THUMB2-NEXT: movlo.w r9, #1
+; THUMB2-NEXT: sub.w r0, r9, r12
+; THUMB2-NEXT: bx lr
+;
+; V81M-LABEL: ucmp_8_64:
+; V81M: @ %bb.0:
+; V81M-NEXT: subs.w r12, r0, r2
+; V81M-NEXT: sbcs.w r12, r1, r3
+; V81M-NEXT: cset r12, lo
+; V81M-NEXT: subs r0, r2, r0
+; V81M-NEXT: sbcs.w r0, r3, r1
+; V81M-NEXT: cset r0, lo
+; V81M-NEXT: sub.w r0, r0, r12
+; V81M-NEXT: bx lr
%1 = call i8 @llvm.ucmp(i64 %x, i64 %y)
ret i8 %1
}
define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind {
-; CHECK-LABEL: ucmp_8_128:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r4, r5, r6, lr}
-; CHECK-NEXT: add.w lr, sp, #16
-; CHECK-NEXT: ldr r4, [sp, #28]
-; CHECK-NEXT: movs r5, #0
-; CHECK-NEXT: ldm.w lr, {r9, r12, lr}
-; CHECK-NEXT: subs.w r6, r0, r9
-; CHECK-NEXT: sbcs.w r6, r1, r12
-; CHECK-NEXT: sbcs.w r6, r2, lr
-; CHECK-NEXT: sbcs.w r6, r3, r4
-; CHECK-NEXT: mov.w r6, #0
-; CHECK-NEXT: it lo
-; CHECK-NEXT: movlo r6, #1
-; CHECK-NEXT: subs.w r0, r9, r0
-; CHECK-NEXT: sbcs.w r0, r12, r1
-; CHECK-NEXT: sbcs.w r0, lr, r2
-; CHECK-NEXT: sbcs.w r0, r4, r3
-; CHECK-NEXT: it lo
-; CHECK-NEXT: movlo r5, #1
-; CHECK-NEXT: subs r0, r5, r6
-; CHECK-NEXT: pop {r4, r5, r6, pc}
+; THUMB1-LABEL: ucmp_8_128:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: .save {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: push {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: .pad #20
+; THUMB1-NEXT: sub sp, #20
+; THUMB1-NEXT: str r3, [sp, #16] @ 4-byte Spill
+; THUMB1-NEXT: movs r3, #1
+; THUMB1-NEXT: str r3, [sp] @ 4-byte Spill
+; THUMB1-NEXT: movs r3, #0
+; THUMB1-NEXT: str r3, [sp, #12] @ 4-byte Spill
+; THUMB1-NEXT: ldr r6, [sp, #52]
+; THUMB1-NEXT: add r7, sp, #40
+; THUMB1-NEXT: ldm r7, {r3, r5, r7}
+; THUMB1-NEXT: subs r4, r0, r3
+; THUMB1-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; THUMB1-NEXT: mov r4, r1
+; THUMB1-NEXT: ldr r1, [sp] @ 4-byte Reload
+; THUMB1-NEXT: sbcs r4, r5
+; THUMB1-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; THUMB1-NEXT: mov r4, r2
+; THUMB1-NEXT: sbcs r4, r7
+; THUMB1-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
+; THUMB1-NEXT: sbcs r4, r6
+; THUMB1-NEXT: mov r2, r1
+; THUMB1-NEXT: blo .LBB4_2
+; THUMB1-NEXT: @ %bb.1:
+; THUMB1-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; THUMB1-NEXT: .LBB4_2:
+; THUMB1-NEXT: subs r0, r3, r0
+; THUMB1-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; THUMB1-NEXT: sbcs r5, r0
+; THUMB1-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; THUMB1-NEXT: sbcs r7, r0
+; THUMB1-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
+; THUMB1-NEXT: sbcs r6, r0
+; THUMB1-NEXT: blo .LBB4_4
+; THUMB1-NEXT: @ %bb.3:
+; THUMB1-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; THUMB1-NEXT: .LBB4_4:
+; THUMB1-NEXT: subs r0, r1, r2
+; THUMB1-NEXT: add sp, #20
+; THUMB1-NEXT: pop {r4, r5, r6, r7, pc}
+;
+; THUMB2-LABEL: ucmp_8_128:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: push {r4, r5, r6, lr}
+; THUMB2-NEXT: add.w lr, sp, #16
+; THUMB2-NEXT: ldr r4, [sp, #28]
+; THUMB2-NEXT: movs r5, #0
+; THUMB2-NEXT: ldm.w lr, {r9, r12, lr}
+; THUMB2-NEXT: subs.w r6, r0, r9
+; THUMB2-NEXT: sbcs.w r6, r1, r12
+; THUMB2-NEXT: sbcs.w r6, r2, lr
+; THUMB2-NEXT: sbcs.w r6, r3, r4
+; THUMB2-NEXT: mov.w r6, #0
+; THUMB2-NEXT: it lo
+; THUMB2-NEXT: movlo r6, #1
+; THUMB2-NEXT: subs.w r0, r9, r0
+; THUMB2-NEXT: sbcs.w r0, r12, r1
+; THUMB2-NEXT: sbcs.w r0, lr, r2
+; THUMB2-NEXT: sbcs.w r0, r4, r3
+; THUMB2-NEXT: it lo
+; THUMB2-NEXT: movlo r5, #1
+; THUMB2-NEXT: subs r0, r5, r6
+; THUMB2-NEXT: pop {r4, r5, r6, pc}
+;
+; V81M-LABEL: ucmp_8_128:
+; V81M: @ %bb.0:
+; V81M-NEXT: .save {r4, r5, r6, lr}
+; V81M-NEXT: push {r4, r5, r6, lr}
+; V81M-NEXT: ldrd r5, r4, [sp, #16]
+; V81M-NEXT: ldrd lr, r12, [sp, #24]
+; V81M-NEXT: subs r6, r0, r5
+; V81M-NEXT: sbcs.w r6, r1, r4
+; V81M-NEXT: sbcs.w r6, r2, lr
+; V81M-NEXT: sbcs.w r6, r3, r12
+; V81M-NEXT: cset r6, lo
+; V81M-NEXT: subs r0, r5, r0
+; V81M-NEXT: sbcs.w r0, r4, r1
+; V81M-NEXT: sbcs.w r0, lr, r2
+; V81M-NEXT: sbcs.w r0, r12, r3
+; V81M-NEXT: cset r0, lo
+; V81M-NEXT: subs r0, r0, r6
+; V81M-NEXT: pop {r4, r5, r6, pc}
%1 = call i8 @llvm.ucmp(i128 %x, i128 %y)
ret i8 %1
}
define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind {
-; CHECK-LABEL: ucmp_32_32:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov.w r0, #0
-; CHECK-NEXT: mov.w r2, #0
-; CHECK-NEXT: it lo
-; CHECK-NEXT: movlo r0, #1
-; CHECK-NEXT: it hi
-; CHECK-NEXT: movhi r2, #1
-; CHECK-NEXT: subs r0, r2, r0
-; CHECK-NEXT: bx lr
+; THUMB1-LABEL: ucmp_32_32:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: subs r2, r0, r1
+; THUMB1-NEXT: sbcs r2, r2
+; THUMB1-NEXT: cmp r1, r0
+; THUMB1-NEXT: sbcs r1, r1
+; THUMB1-NEXT: subs r0, r2, r1
+; THUMB1-NEXT: bx lr
+;
+; THUMB2-LABEL: ucmp_32_32:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: subs r0, r0, r1
+; THUMB2-NEXT: it hi
+; THUMB2-NEXT: movhi r0, #1
+; THUMB2-NEXT: it lo
+; THUMB2-NEXT: movlo.w r0, #-1
+; THUMB2-NEXT: bx lr
+;
+; V81M-LABEL: ucmp_32_32:
+; V81M: @ %bb.0:
+; V81M-NEXT: cmp r0, r1
+; V81M-NEXT: cset r0, hi
+; V81M-NEXT: it lo
+; V81M-NEXT: movlo.w r0, #-1
+; V81M-NEXT: bx lr
%1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
ret i32 %1
}
define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind {
-; CHECK-LABEL: ucmp_32_64:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: subs.w r12, r0, r2
-; CHECK-NEXT: mov.w r9, #0
-; CHECK-NEXT: sbcs.w r12, r1, r3
-; CHECK-NEXT: mov.w r12, #0
-; CHECK-NEXT: it lo
-; CHECK-NEXT: movlo.w r12, #1
-; CHECK-NEXT: subs r0, r2, r0
-; CHECK-NEXT: sbcs.w r0, r3, r1
-; CHECK-NEXT: it lo
-; CHECK-NEXT: movlo.w r9, #1
-; CHECK-NEXT: sub.w r0, r9, r12
-; CHECK-NEXT: bx lr
+; THUMB1-LABEL: ucmp_32_64:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: .save {r4, r5, r6, lr}
+; THUMB1-NEXT: push {r4, r5, r6, lr}
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: movs r5, #0
+; THUMB1-NEXT: subs r6, r0, r2
+; THUMB1-NEXT: mov r6, r1
+; THUMB1-NEXT: sbcs r6, r3
+; THUMB1-NEXT: mov r6, r4
+; THUMB1-NEXT: blo .LBB6_2
+; THUMB1-NEXT: @ %bb.1:
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: .LBB6_2:
+; THUMB1-NEXT: subs r0, r2, r0
+; THUMB1-NEXT: sbcs r3, r1
+; THUMB1-NEXT: blo .LBB6_4
+; THUMB1-NEXT: @ %bb.3:
+; THUMB1-NEXT: mov r4, r5
+; THUMB1-NEXT: .LBB6_4:
+; THUMB1-NEXT: subs r0, r4, r6
+; THUMB1-NEXT: pop {r4, r5, r6, pc}
+;
+; THUMB2-LABEL: ucmp_32_64:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: subs.w r12, r0, r2
+; THUMB2-NEXT: mov.w r9, #0
+; THUMB2-NEXT: sbcs.w r12, r1, r3
+; THUMB2-NEXT: mov.w r12, #0
+; THUMB2-NEXT: it lo
+; THUMB2-NEXT: movlo.w r12, #1
+; THUMB2-NEXT: subs r0, r2, r0
+; THUMB2-NEXT: sbcs.w r0, r3, r1
+; THUMB2-NEXT: it lo
+; THUMB2-NEXT: movlo.w r9, #1
+; THUMB2-NEXT: sub.w r0, r9, r12
+; THUMB2-NEXT: bx lr
+;
+; V81M-LABEL: ucmp_32_64:
+; V81M: @ %bb.0:
+; V81M-NEXT: subs.w r12, r0, r2
+; V81M-NEXT: sbcs.w r12, r1, r3
+; V81M-NEXT: cset r12, lo
+; V81M-NEXT: subs r0, r2, r0
+; V81M-NEXT: sbcs.w r0, r3, r1
+; V81M-NEXT: cset r0, lo
+; V81M-NEXT: sub.w r0, r0, r12
+; V81M-NEXT: bx lr
%1 = call i32 @llvm.ucmp(i64 %x, i64 %y)
ret i32 %1
}
define i64 @ucmp_64_64(i64 %x, i64 %y) nounwind {
-; CHECK-LABEL: ucmp_64_64:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: subs.w r12, r0, r2
-; CHECK-NEXT: mov.w r9, #0
-; CHECK-NEXT: sbcs.w r12, r1, r3
-; CHECK-NEXT: mov.w r12, #0
-; CHECK-NEXT: it lo
-; CHECK-NEXT: movlo.w r12, #1
-; CHECK-NEXT: subs r0, r2, r0
-; CHECK-NEXT: sbcs.w r0, r3, r1
-; CHECK-NEXT: it lo
-; CHECK-NEXT: movlo.w r9, #1
-; CHECK-NEXT: sub.w r0, r9, r12
-; CHECK-NEXT: asrs r1, r0, #31
-; CHECK-NEXT: bx lr
+; THUMB1-LABEL: ucmp_64_64:
+; THUMB1: @ %bb.0:
+; THUMB1-NEXT: .save {r4, r5, r6, lr}
+; THUMB1-NEXT: push {r4, r5, r6, lr}
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: movs r5, #0
+; THUMB1-NEXT: subs r6, r0, r2
+; THUMB1-NEXT: mov r6, r1
+; THUMB1-NEXT: sbcs r6, r3
+; THUMB1-NEXT: mov r6, r4
+; THUMB1-NEXT: blo .LBB7_2
+; THUMB1-NEXT: @ %bb.1:
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: .LBB7_2:
+; THUMB1-NEXT: subs r0, r2, r0
+; THUMB1-NEXT: sbcs r3, r1
+; THUMB1-NEXT: blo .LBB7_4
+; THUMB1-NEXT: @ %bb.3:
+; THUMB1-NEXT: mov r4, r5
+; THUMB1-NEXT: .LBB7_4:
+; THUMB1-NEXT: subs r0, r4, r6
+; THUMB1-NEXT: asrs r1, r0, #31
+; THUMB1-NEXT: pop {r4, r5, r6, pc}
+;
+; THUMB2-LABEL: ucmp_64_64:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: subs.w r12, r0, r2
+; THUMB2-NEXT: mov.w r9, #0
+; THUMB2-NEXT: sbcs.w r12, r1, r3
+; THUMB2-NEXT: mov.w r12, #0
+; THUMB2-NEXT: it lo
+; THUMB2-NEXT: movlo.w r12, #1
+; THUMB2-NEXT: subs r0, r2, r0
+; THUMB2-NEXT: sbcs.w r0, r3, r1
+; THUMB2-NEXT: it lo
+; THUMB2-NEXT: movlo.w r9, #1
+; THUMB2-NEXT: sub.w r0, r9, r12
+; THUMB2-NEXT: asrs r1, r0, #31
+; THUMB2-NEXT: bx lr
+;
+; V81M-LABEL: ucmp_64_64:
+; V81M: @ %bb.0:
+; V81M-NEXT: subs.w r12, r0, r2
+; V81M-NEXT: sbcs.w r12, r1, r3
+; V81M-NEXT: cset r12, lo
+; V81M-NEXT: subs r0, r2, r0
+; V81M-NEXT: sbcs.w r0, r3, r1
+; V81M-NEXT: cset r0, lo
+; V81M-NEXT: sub.w r0, r0, r12
+; V81M-NEXT: asrs r1, r0, #31
+; V81M-NEXT: bx lr
%1 = call i64 @llvm.ucmp(i64 %x, i64 %y)
ret i64 %1
}
More information about the llvm-commits
mailing list