[llvm] [ARM] Set operation action for UMULO and SMULO as Custom if not Thumb1 (PR #154253)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 18 19:54:34 PDT 2025
https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/154253
>From 87edae960080a479c02023c5a09e3e1a12d54d04 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Mon, 18 Aug 2025 16:42:15 -0400
Subject: [PATCH] [ARM] Set operation action for UMULO and SMULO as Custom if
not Thumb1
We should specify a custom lowering for SMULO and UMULO like we do for AArch64, but only if not Thumb 1 obviously.
---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 101 ++++--
llvm/lib/Target/ARM/ARMISelLowering.h | 3 +-
llvm/test/CodeGen/ARM/select.ll | 10 +-
.../ARM/umulo-128-legalisation-lowering.ll | 335 +++++++++---------
.../Thumb2/umulo-128-legalisation-lowering.ll | 180 +++++-----
.../Thumb2/umulo-64-legalisation-lowering.ll | 33 +-
6 files changed, 336 insertions(+), 326 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 830156359e9e8..66e452126ff41 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -986,6 +986,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
setOperationAction(ISD::USUBO, MVT::i32, Custom);
+ if (!Subtarget->isThumb1Only()) {
+ setOperationAction(ISD::UMULO, MVT::i32, Custom);
+ setOperationAction(ISD::SMULO, MVT::i32, Custom);
+ }
+
setOperationAction(ISD::UADDO_CARRY, MVT::i32, Custom);
setOperationAction(ISD::USUBO_CARRY, MVT::i32, Custom);
if (Subtarget->hasDSP()) {
@@ -4879,7 +4884,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
Value = DAG.getNode(ISD::UMUL_LOHI, dl,
DAG.getVTList(Op.getValueType(), Op.getValueType()),
LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
+ OverflowCmp = DAG.getNode(ARMISD::CMPZ, dl, FlagsVT, Value.getValue(1),
DAG.getConstant(0, dl, MVT::i32));
Value = Value.getValue(0); // We only want the low 32 bits for the result.
break;
@@ -4890,7 +4895,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
Value = DAG.getNode(ISD::SMUL_LOHI, dl,
DAG.getVTList(Op.getValueType(), Op.getValueType()),
LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
+ OverflowCmp = DAG.getNode(ARMISD::CMPZ, dl, FlagsVT, Value.getValue(1),
DAG.getNode(ISD::SRA, dl, Op.getValueType(),
Value.getValue(0),
DAG.getConstant(31, dl, MVT::i32)));
@@ -4901,28 +4906,6 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
return std::make_pair(Value, OverflowCmp);
}
-SDValue
-ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
- // Let legalize expand this if it isn't a legal type yet.
- if (!isTypeLegal(Op.getValueType()))
- return SDValue();
-
- SDValue Value, OverflowCmp;
- SDValue ARMcc;
- std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
- SDLoc dl(Op);
- // We use 0 and 1 as false and true values.
- SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
- SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
- EVT VT = Op.getValueType();
-
- SDValue Overflow =
- DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, OverflowCmp);
-
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
- return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
-}
-
static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry,
SelectionDAG &DAG) {
SDLoc DL(BoolCarry);
@@ -4947,8 +4930,7 @@ static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT,
DAG.getConstant(0, DL, MVT::i32), Flags);
}
-SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue ARMTargetLowering::LowerALUO(SDValue Op, SelectionDAG &DAG) const {
// Let legalize expand this if it isn't a legal type yet.
if (!isTypeLegal(Op.getValueType()))
return SDValue();
@@ -4962,14 +4944,12 @@ SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
SDValue Value;
SDValue Overflow;
switch (Op.getOpcode()) {
- default:
- llvm_unreachable("Unknown overflow instruction!");
case ISD::UADDO:
Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
// Convert the carry flag into a boolean value.
Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
break;
- case ISD::USUBO: {
+ case ISD::USUBO:
Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
// Convert the carry flag into a boolean value.
Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
@@ -4978,6 +4958,56 @@ SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
DAG.getConstant(1, dl, MVT::i32), Overflow);
break;
+ case ISD::UMULO: {
+ // We generate a UMUL_LOHI and then check if the high word is 0.
+ Value = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), LHS, RHS);
+ SDValue HighWord = Value.getValue(1);
+ Value = Value.getValue(0); // We only want the low 32 bits for the result.
+
+ // Compare high word with 0
+ SDValue Flags = DAG.getNode(ARMISD::CMPZ, dl, FlagsVT, HighWord,
+ DAG.getConstant(0, dl, MVT::i32));
+
+ ARMCC::CondCodes NECond = ARMCC::NE;
+ SDValue NECondValue = DAG.getConstant(NECond, dl, MVT::i32);
+ Overflow =
+ DAG.getNode(ARMISD::CMOV, dl, MVT::i32, HighWord,
+ DAG.getConstant(1, dl, MVT::i32), NECondValue, Flags);
+ break;
+ }
+ case ISD::SMULO: {
+ // We generate a SMUL_LOHI and then check if all the bits of the high word
+ // are the same as the sign bit of the low word.
+ Value = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), LHS, RHS);
+ SDValue HighWord = Value.getValue(1);
+ SDValue LowWord = Value.getValue(0);
+ Value = LowWord; // We only want the low 32 bits for the result.
+
+ // Compute sign bit of low word: LowWord >> 31
+ SDValue SignBit = DAG.getNode(ISD::SRA, dl, VT, LowWord,
+ DAG.getConstant(31, dl, MVT::i32));
+
+ // Subtract sign bit from high word: HighWord - SignBit
+ SDValue SubResult = DAG.getNode(ARMISD::SUBC, dl, VTs, HighWord, SignBit);
+ SDValue Flags = SubResult.getValue(1);
+
+ ARMCC::CondCodes NECond = ARMCC::NE;
+ SDValue NECondValue = DAG.getConstant(NECond, dl, MVT::i32);
+ Overflow =
+ DAG.getNode(ARMISD::CMOV, dl, MVT::i32, SubResult, DAG.getConstant(1, dl, MVT::i32), NECondValue, Flags);
+ break;
+ }
+ default: {
+ // Handle other operations with getARMXALUOOp
+ SDValue OverflowCmp, ARMcc;
+ std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
+ // We use 0 and 1 as false and true values.
+ SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
+ SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
+
+ Overflow =
+ DAG.getNode(ARMISD::CMOV, dl, MVT::i32, TVal, FVal, ARMcc, OverflowCmp);
+ break;
}
}
@@ -5045,9 +5075,11 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
unsigned Opc = Cond.getOpcode();
+ bool OptimizeMul =
+ (Opc == ISD::SMULO || Opc == ISD::UMULO) && !Subtarget->isThumb1Only();
if (Cond.getResNo() == 1 &&
(Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
- Opc == ISD::USUBO)) {
+ Opc == ISD::USUBO || OptimizeMul)) {
if (!isTypeLegal(Cond->getValueType(0)))
return SDValue();
@@ -10653,12 +10685,13 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UADDO_CARRY:
case ISD::USUBO_CARRY:
return LowerUADDSUBO_CARRY(Op, DAG);
- case ISD::SADDO:
- case ISD::SSUBO:
- return LowerSignedALUO(Op, DAG);
case ISD::UADDO:
case ISD::USUBO:
- return LowerUnsignedALUO(Op, DAG);
+ case ISD::UMULO:
+ case ISD::SADDO:
+ case ISD::SSUBO:
+ case ISD::SMULO:
+ return LowerALUO(Op, DAG);
case ISD::SADDSAT:
case ISD::SSUBSAT:
case ISD::UADDSAT:
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 778595e93f84a..3e710f6d60a12 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -866,8 +866,7 @@ class VectorType;
SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/ARM/select.ll b/llvm/test/CodeGen/ARM/select.ll
index 48d6ee925d939..3f186f69c5aa3 100644
--- a/llvm/test/CodeGen/ARM/select.ll
+++ b/llvm/test/CodeGen/ARM/select.ll
@@ -440,17 +440,15 @@ define float @f12(i32 %a, i32 %b) nounwind uwtable readnone ssp {
define i1 @test_overflow_recombine(i32 %in1, i32 %in2) {
; CHECK-LABEL: test_overflow_recombine:
; CHECK: @ %bb.0:
-; CHECK-NEXT: mul r2, r0, r1
-; CHECK-NEXT: smmul r0, r0, r1
-; CHECK-NEXT: subs r0, r0, r2, asr #31
+; CHECK-NEXT: smull r0, r1, r0, r1
+; CHECK-NEXT: subs r0, r1, r0, asr #31
; CHECK-NEXT: movwne r0, #1
; CHECK-NEXT: bx lr
;
; CHECK-NEON-LABEL: test_overflow_recombine:
; CHECK-NEON: @ %bb.0:
-; CHECK-NEON-NEXT: mul r2, r0, r1
-; CHECK-NEON-NEXT: smmul r0, r0, r1
-; CHECK-NEON-NEXT: subs.w r0, r0, r2, asr #31
+; CHECK-NEON-NEXT: smull r0, r1, r0, r1
+; CHECK-NEON-NEXT: subs.w r0, r1, r0, asr #31
; CHECK-NEON-NEXT: it ne
; CHECK-NEON-NEXT: movne r0, #1
; CHECK-NEON-NEXT: bx lr
diff --git a/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
index 4eb82c80e2bff..1bacd3258f698 100644
--- a/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
@@ -6,210 +6,201 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; ARMV6-LABEL: muloti_test:
; ARMV6: @ %bb.0: @ %start
; ARMV6-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; ARMV6-NEXT: sub sp, sp, #28
-; ARMV6-NEXT: ldr r4, [sp, #72]
-; ARMV6-NEXT: mov r7, r0
-; ARMV6-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; ARMV6-NEXT: ldr r12, [sp, #64]
-; ARMV6-NEXT: umull r1, r0, r2, r4
+; ARMV6-NEXT: sub sp, sp, #20
+; ARMV6-NEXT: ldr r4, [sp, #64]
+; ARMV6-NEXT: mov lr, r0
; ARMV6-NEXT: ldr r5, [sp, #68]
-; ARMV6-NEXT: str r1, [r7]
-; ARMV6-NEXT: ldr r1, [sp, #76]
-; ARMV6-NEXT: umull r7, r6, r1, r12
-; ARMV6-NEXT: str r6, [sp, #8] @ 4-byte Spill
-; ARMV6-NEXT: umull r6, r9, r5, r4
-; ARMV6-NEXT: add r7, r6, r7
-; ARMV6-NEXT: umull r4, r6, r12, r4
-; ARMV6-NEXT: str r4, [sp, #16] @ 4-byte Spill
-; ARMV6-NEXT: mov r4, #0
-; ARMV6-NEXT: adds r8, r6, r7
-; ARMV6-NEXT: ldr r6, [sp, #80]
-; ARMV6-NEXT: adc r7, r4, #0
-; ARMV6-NEXT: ldr r4, [sp, #84]
-; ARMV6-NEXT: str r7, [sp, #24] @ 4-byte Spill
-; ARMV6-NEXT: umull r12, lr, r3, r6
-; ARMV6-NEXT: umull r11, r7, r4, r2
-; ARMV6-NEXT: add r12, r11, r12
-; ARMV6-NEXT: umull r11, r10, r6, r2
-; ARMV6-NEXT: adds r12, r10, r12
-; ARMV6-NEXT: mov r10, #0
-; ARMV6-NEXT: adc r6, r10, #0
-; ARMV6-NEXT: str r6, [sp, #20] @ 4-byte Spill
-; ARMV6-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
-; ARMV6-NEXT: adds r6, r6, r11
-; ARMV6-NEXT: str r6, [sp, #12] @ 4-byte Spill
-; ARMV6-NEXT: adc r6, r8, r12
-; ARMV6-NEXT: str r6, [sp, #16] @ 4-byte Spill
-; ARMV6-NEXT: ldr r6, [sp, #72]
-; ARMV6-NEXT: mov r12, #0
-; ARMV6-NEXT: umull r2, r8, r2, r1
-; ARMV6-NEXT: umlal r0, r12, r3, r6
-; ARMV6-NEXT: adds r0, r2, r0
-; ARMV6-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; ARMV6-NEXT: adcs r8, r12, r8
-; ARMV6-NEXT: adc r12, r10, #0
-; ARMV6-NEXT: cmp lr, #0
-; ARMV6-NEXT: str r0, [r2, #4]
-; ARMV6-NEXT: movne lr, #1
-; ARMV6-NEXT: ldr r11, [sp, #8] @ 4-byte Reload
-; ARMV6-NEXT: cmp r7, #0
-; ARMV6-NEXT: movne r7, #1
-; ARMV6-NEXT: ldr r0, [sp, #64]
-; ARMV6-NEXT: cmp r11, #0
-; ARMV6-NEXT: umlal r8, r12, r3, r1
+; ARMV6-NEXT: umull r1, r12, r2, r4
+; ARMV6-NEXT: str r1, [r0]
+; ARMV6-NEXT: ldr r1, [sp, #60]
+; ARMV6-NEXT: mul r7, r1, r4
+; ARMV6-NEXT: subs r6, r7, #0
+; ARMV6-NEXT: mov r0, r6
+; ARMV6-NEXT: movne r0, #1
+; ARMV6-NEXT: str r0, [sp, #16] @ 4-byte Spill
+; ARMV6-NEXT: ldr r0, [sp, #56]
+; ARMV6-NEXT: mul r7, r5, r0
+; ARMV6-NEXT: subs r11, r7, #0
+; ARMV6-NEXT: umull r0, r7, r0, r4
+; ARMV6-NEXT: add r6, r6, r11
; ARMV6-NEXT: movne r11, #1
-; ARMV6-NEXT: cmp r9, #0
-; ARMV6-NEXT: movne r9, #1
-; ARMV6-NEXT: orrs r10, r0, r5
-; ARMV6-NEXT: ldr r0, [sp, #80]
+; ARMV6-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; ARMV6-NEXT: mov r0, #0
+; ARMV6-NEXT: adds r4, r7, r6
+; ARMV6-NEXT: ldr r6, [sp, #76]
+; ARMV6-NEXT: adc r0, r0, #0
+; ARMV6-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; ARMV6-NEXT: ldr r0, [sp, #72]
+; ARMV6-NEXT: mul r7, r6, r2
+; ARMV6-NEXT: str r4, [sp] @ 4-byte Spill
+; ARMV6-NEXT: mov r4, #0
+; ARMV6-NEXT: subs r9, r7, #0
+; ARMV6-NEXT: mul r7, r3, r0
+; ARMV6-NEXT: umull r0, r8, r0, r2
+; ARMV6-NEXT: mov r10, r9
; ARMV6-NEXT: movne r10, #1
-; ARMV6-NEXT: ldr r6, [sp, #12] @ 4-byte Reload
-; ARMV6-NEXT: orrs r0, r0, r4
-; ARMV6-NEXT: movne r0, #1
-; ARMV6-NEXT: cmp r4, #0
+; ARMV6-NEXT: subs r7, r7, #0
+; ARMV6-NEXT: add r9, r9, r7
+; ARMV6-NEXT: movne r7, #1
+; ARMV6-NEXT: adds r9, r8, r9
+; ARMV6-NEXT: adc r8, r4, #0
+; ARMV6-NEXT: ldr r4, [sp, #4] @ 4-byte Reload
+; ARMV6-NEXT: str r8, [sp, #8] @ 4-byte Spill
+; ARMV6-NEXT: adds r0, r4, r0
+; ARMV6-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; ARMV6-NEXT: ldr r0, [sp] @ 4-byte Reload
+; ARMV6-NEXT: ldr r4, [sp, #64]
+; ARMV6-NEXT: adc r8, r0, r9
+; ARMV6-NEXT: mov r0, #0
+; ARMV6-NEXT: umull r2, r9, r2, r5
+; ARMV6-NEXT: umlal r12, r0, r3, r4
+; ARMV6-NEXT: adds r2, r2, r12
+; ARMV6-NEXT: str r2, [lr, #4]
+; ARMV6-NEXT: adcs r0, r0, r9
+; ARMV6-NEXT: mov r2, #0
+; ARMV6-NEXT: adc r9, r2, #0
+; ARMV6-NEXT: ldr r2, [sp, #56]
+; ARMV6-NEXT: umlal r0, r9, r3, r5
+; ARMV6-NEXT: ldr r12, [sp, #4] @ 4-byte Reload
+; ARMV6-NEXT: orrs r4, r2, r1
+; ARMV6-NEXT: ldr r2, [sp, #72]
; ARMV6-NEXT: movne r4, #1
+; ARMV6-NEXT: orrs r2, r2, r6
+; ARMV6-NEXT: movne r2, #1
+; ARMV6-NEXT: cmp r6, #0
+; ARMV6-NEXT: movne r6, #1
; ARMV6-NEXT: cmp r3, #0
; ARMV6-NEXT: movne r3, #1
-; ARMV6-NEXT: cmp r5, #0
-; ARMV6-NEXT: movne r5, #1
; ARMV6-NEXT: cmp r1, #0
; ARMV6-NEXT: movne r1, #1
-; ARMV6-NEXT: adds r6, r8, r6
-; ARMV6-NEXT: str r6, [r2, #8]
-; ARMV6-NEXT: and r1, r5, r1
-; ARMV6-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
-; ARMV6-NEXT: orr r1, r1, r9
+; ARMV6-NEXT: cmp r5, #0
+; ARMV6-NEXT: movne r5, #1
+; ARMV6-NEXT: adds r0, r0, r12
+; ARMV6-NEXT: str r0, [lr, #8]
+; ARMV6-NEXT: adcs r0, r9, r8
+; ARMV6-NEXT: str r0, [lr, #12]
+; ARMV6-NEXT: and r0, r4, r2
+; ARMV6-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
+; ARMV6-NEXT: and r1, r1, r5
+; ARMV6-NEXT: orr r1, r1, r2
+; ARMV6-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
; ARMV6-NEXT: orr r1, r1, r11
-; ARMV6-NEXT: and r0, r10, r0
-; ARMV6-NEXT: adcs r6, r12, r6
-; ARMV6-NEXT: str r6, [r2, #12]
-; ARMV6-NEXT: ldr r6, [sp, #24] @ 4-byte Reload
-; ARMV6-NEXT: orr r1, r1, r6
+; ARMV6-NEXT: orr r1, r1, r2
+; ARMV6-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
; ARMV6-NEXT: orr r0, r0, r1
-; ARMV6-NEXT: and r1, r4, r3
+; ARMV6-NEXT: and r1, r6, r3
+; ARMV6-NEXT: orr r1, r1, r10
; ARMV6-NEXT: orr r1, r1, r7
-; ARMV6-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
-; ARMV6-NEXT: orr r1, r1, lr
-; ARMV6-NEXT: orr r1, r1, r3
+; ARMV6-NEXT: orr r1, r1, r2
; ARMV6-NEXT: orr r0, r0, r1
; ARMV6-NEXT: mov r1, #0
; ARMV6-NEXT: adc r1, r1, #0
; ARMV6-NEXT: orr r0, r0, r1
; ARMV6-NEXT: and r0, r0, #1
-; ARMV6-NEXT: strb r0, [r2, #16]
-; ARMV6-NEXT: add sp, sp, #28
+; ARMV6-NEXT: strb r0, [lr, #16]
+; ARMV6-NEXT: add sp, sp, #20
; ARMV6-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
;
; ARMV7-LABEL: muloti_test:
; ARMV7: @ %bb.0: @ %start
; ARMV7-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; ARMV7-NEXT: sub sp, sp, #44
+; ARMV7-NEXT: sub sp, sp, #36
+; ARMV7-NEXT: ldr r5, [sp, #84]
+; ARMV7-NEXT: mov r12, r0
+; ARMV7-NEXT: ldr r1, [sp, #80]
+; ARMV7-NEXT: ldr r10, [sp, #72]
+; ARMV7-NEXT: umull r7, r6, r2, r5
+; ARMV7-NEXT: ldr r4, [sp, #76]
; ARMV7-NEXT: ldr r8, [sp, #88]
-; ARMV7-NEXT: mov r9, r0
-; ARMV7-NEXT: ldr r7, [sp, #96]
-; ARMV7-NEXT: ldr lr, [sp, #100]
-; ARMV7-NEXT: umull r0, r5, r2, r8
-; ARMV7-NEXT: ldr r4, [sp, #80]
-; ARMV7-NEXT: str r0, [sp, #32] @ 4-byte Spill
-; ARMV7-NEXT: umull r1, r0, r3, r7
-; ARMV7-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; ARMV7-NEXT: umull r0, r11, lr, r2
-; ARMV7-NEXT: str r1, [sp, #20] @ 4-byte Spill
-; ARMV7-NEXT: ldr r1, [sp, #92]
-; ARMV7-NEXT: str r0, [sp] @ 4-byte Spill
-; ARMV7-NEXT: umull r0, r10, r7, r2
-; ARMV7-NEXT: mov r7, r1
-; ARMV7-NEXT: umull r6, r12, r1, r4
-; ARMV7-NEXT: str r0, [sp, #40] @ 4-byte Spill
-; ARMV7-NEXT: ldr r0, [sp, #84]
-; ARMV7-NEXT: str r6, [sp, #24] @ 4-byte Spill
-; ARMV7-NEXT: umull r6, r1, r0, r8
+; ARMV7-NEXT: umull lr, r0, r2, r1
; ARMV7-NEXT: str r6, [sp, #16] @ 4-byte Spill
-; ARMV7-NEXT: umull r6, r2, r2, r7
-; ARMV7-NEXT: mov r7, r4
-; ARMV7-NEXT: str r6, [sp, #8] @ 4-byte Spill
-; ARMV7-NEXT: str r2, [sp, #12] @ 4-byte Spill
-; ARMV7-NEXT: umull r2, r6, r4, r8
-; ARMV7-NEXT: str r2, [sp, #36] @ 4-byte Spill
-; ARMV7-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
-; ARMV7-NEXT: str r6, [sp, #28] @ 4-byte Spill
; ARMV7-NEXT: mov r6, #0
-; ARMV7-NEXT: str r2, [r9]
-; ARMV7-NEXT: umlal r5, r6, r3, r8
-; ARMV7-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; ARMV7-NEXT: ldr r4, [sp] @ 4-byte Reload
-; ARMV7-NEXT: add r4, r4, r2
-; ARMV7-NEXT: adds r2, r10, r4
-; ARMV7-NEXT: str r2, [sp, #20] @ 4-byte Spill
-; ARMV7-NEXT: mov r2, #0
-; ARMV7-NEXT: adc r2, r2, #0
-; ARMV7-NEXT: cmp r12, #0
-; ARMV7-NEXT: str r2, [sp, #32] @ 4-byte Spill
-; ARMV7-NEXT: movwne r12, #1
-; ARMV7-NEXT: cmp r1, #0
-; ARMV7-NEXT: ldr r2, [sp, #96]
-; ARMV7-NEXT: movwne r1, #1
-; ARMV7-NEXT: orrs r10, r7, r0
-; ARMV7-NEXT: movwne r10, #1
-; ARMV7-NEXT: orrs r7, r2, lr
-; ARMV7-NEXT: ldr r2, [sp, #92]
+; ARMV7-NEXT: mul r9, r4, r1
+; ARMV7-NEXT: str r7, [sp, #4] @ 4-byte Spill
+; ARMV7-NEXT: umlal r0, r6, r3, r1
+; ARMV7-NEXT: umull r1, r7, r10, r1
+; ARMV7-NEXT: mul r11, r5, r10
+; ARMV7-NEXT: str r1, [sp, #32] @ 4-byte Spill
+; ARMV7-NEXT: mul r1, r3, r8
+; ARMV7-NEXT: str r7, [sp, #12] @ 4-byte Spill
+; ARMV7-NEXT: umull r7, r10, r8, r2
+; ARMV7-NEXT: subs r5, r11, #0
+; ARMV7-NEXT: str r1, [sp, #20] @ 4-byte Spill
+; ARMV7-NEXT: ldr r1, [sp, #92]
+; ARMV7-NEXT: str r10, [sp, #24] @ 4-byte Spill
+; ARMV7-NEXT: str r7, [sp, #28] @ 4-byte Spill
+; ARMV7-NEXT: mul r2, r1, r2
+; ARMV7-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; ARMV7-NEXT: str lr, [r12]
+; ARMV7-NEXT: str r5, [sp] @ 4-byte Spill
+; ARMV7-NEXT: movwne r5, #1
+; ARMV7-NEXT: subs lr, r9, #0
+; ARMV7-NEXT: ldr r2, [sp, #72]
+; ARMV7-NEXT: ldr r9, [sp, #84]
+; ARMV7-NEXT: mov r7, lr
+; ARMV7-NEXT: ldr r11, [sp, #4] @ 4-byte Reload
; ARMV7-NEXT: movwne r7, #1
-; ARMV7-NEXT: cmp r0, #0
-; ARMV7-NEXT: movwne r0, #1
-; ARMV7-NEXT: cmp r2, #0
-; ARMV7-NEXT: mov r4, r2
-; ARMV7-NEXT: mov r8, r2
-; ARMV7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; ARMV7-NEXT: movwne r4, #1
-; ARMV7-NEXT: and r0, r0, r4
-; ARMV7-NEXT: mov r4, #0
-; ARMV7-NEXT: adds r5, r2, r5
-; ARMV7-NEXT: str r5, [r9, #4]
-; ARMV7-NEXT: orr r0, r0, r1
-; ARMV7-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
-; ARMV7-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
-; ARMV7-NEXT: and r5, r10, r7
-; ARMV7-NEXT: orr r0, r0, r12
-; ARMV7-NEXT: mov r12, #0
-; ARMV7-NEXT: add r1, r2, r1
-; ARMV7-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
-; ARMV7-NEXT: adcs r2, r6, r2
-; ARMV7-NEXT: ldr r6, [sp, #28] @ 4-byte Reload
-; ARMV7-NEXT: adc r7, r4, #0
-; ARMV7-NEXT: adds r1, r6, r1
-; ARMV7-NEXT: umlal r2, r7, r3, r8
-; ARMV7-NEXT: adc r4, r4, #0
-; ARMV7-NEXT: orr r0, r0, r4
-; ARMV7-NEXT: orr r0, r5, r0
-; ARMV7-NEXT: ldr r4, [sp, #40] @ 4-byte Reload
-; ARMV7-NEXT: ldr r5, [sp, #36] @ 4-byte Reload
-; ARMV7-NEXT: adds r5, r5, r4
-; ARMV7-NEXT: ldr r4, [sp, #20] @ 4-byte Reload
-; ARMV7-NEXT: adc r1, r1, r4
-; ARMV7-NEXT: ldr r4, [sp, #4] @ 4-byte Reload
+; ARMV7-NEXT: orrs r10, r2, r4
+; ARMV7-NEXT: movwne r10, #1
+; ARMV7-NEXT: orrs r8, r8, r1
+; ARMV7-NEXT: movwne r8, #1
; ARMV7-NEXT: cmp r4, #0
; ARMV7-NEXT: movwne r4, #1
+; ARMV7-NEXT: cmp r9, #0
+; ARMV7-NEXT: movwne r9, #1
+; ARMV7-NEXT: and r4, r4, r9
+; ARMV7-NEXT: adds r0, r11, r0
+; ARMV7-NEXT: str r0, [r12, #4]
+; ARMV7-NEXT: orr r4, r4, r7
+; ARMV7-NEXT: ldr r7, [sp, #16] @ 4-byte Reload
+; ARMV7-NEXT: ldr r2, [sp] @ 4-byte Reload
+; ARMV7-NEXT: orr r4, r4, r5
+; ARMV7-NEXT: ldr r5, [sp, #84]
+; ARMV7-NEXT: adcs r7, r6, r7
+; ARMV7-NEXT: add r2, lr, r2
+; ARMV7-NEXT: mov lr, #0
+; ARMV7-NEXT: adc r6, lr, #0
+; ARMV7-NEXT: and r0, r10, r8
+; ARMV7-NEXT: umlal r7, r6, r3, r5
+; ARMV7-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
+; ARMV7-NEXT: mov r10, #0
+; ARMV7-NEXT: adds r9, r5, r2
+; ARMV7-NEXT: adc r5, lr, #0
+; ARMV7-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
+; ARMV7-NEXT: orr r5, r4, r5
+; ARMV7-NEXT: orr lr, r0, r5
+; ARMV7-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; ARMV7-NEXT: subs r5, r0, #0
+; ARMV7-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
+; ARMV7-NEXT: mov r4, r5
+; ARMV7-NEXT: movwne r4, #1
+; ARMV7-NEXT: subs r0, r0, #0
+; ARMV7-NEXT: add r5, r5, r0
+; ARMV7-NEXT: movwne r0, #1
+; ARMV7-NEXT: adds r11, r2, r5
+; ARMV7-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
+; ARMV7-NEXT: ldr r5, [sp, #28] @ 4-byte Reload
+; ARMV7-NEXT: adc r8, r10, #0
+; ARMV7-NEXT: adds r2, r2, r5
+; ARMV7-NEXT: adc r5, r9, r11
; ARMV7-NEXT: cmp r3, #0
; ARMV7-NEXT: movwne r3, #1
-; ARMV7-NEXT: cmp lr, #0
-; ARMV7-NEXT: movwne lr, #1
-; ARMV7-NEXT: cmp r11, #0
-; ARMV7-NEXT: movwne r11, #1
-; ARMV7-NEXT: adds r2, r2, r5
-; ARMV7-NEXT: and r3, lr, r3
-; ARMV7-NEXT: str r2, [r9, #8]
-; ARMV7-NEXT: adcs r1, r7, r1
-; ARMV7-NEXT: str r1, [r9, #12]
-; ARMV7-NEXT: orr r1, r3, r11
-; ARMV7-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
+; ARMV7-NEXT: cmp r1, #0
+; ARMV7-NEXT: movwne r1, #1
+; ARMV7-NEXT: adds r2, r7, r2
+; ARMV7-NEXT: and r1, r1, r3
+; ARMV7-NEXT: str r2, [r12, #8]
; ARMV7-NEXT: orr r1, r1, r4
-; ARMV7-NEXT: orr r1, r1, r2
-; ARMV7-NEXT: orr r0, r0, r1
-; ARMV7-NEXT: adc r1, r12, #0
+; ARMV7-NEXT: adcs r2, r6, r5
+; ARMV7-NEXT: orr r0, r1, r0
+; ARMV7-NEXT: adc r1, r10, #0
+; ARMV7-NEXT: orr r0, r0, r8
+; ARMV7-NEXT: str r2, [r12, #12]
+; ARMV7-NEXT: orr r0, lr, r0
; ARMV7-NEXT: orr r0, r0, r1
; ARMV7-NEXT: and r0, r0, #1
-; ARMV7-NEXT: strb r0, [r9, #16]
-; ARMV7-NEXT: add sp, sp, #44
+; ARMV7-NEXT: strb r0, [r12, #16]
+; ARMV7-NEXT: add sp, sp, #36
; ARMV7-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
start:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
diff --git a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
index fe1d06cb39e16..d5d8c333712e4 100644
--- a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
@@ -6,122 +6,110 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV7: @ %bb.0: @ %start
; THUMBV7-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; THUMBV7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; THUMBV7-NEXT: .pad #44
-; THUMBV7-NEXT: sub sp, #44
+; THUMBV7-NEXT: .pad #36
+; THUMBV7-NEXT: sub sp, #36
+; THUMBV7-NEXT: ldrd r1, r5, [sp, #80]
+; THUMBV7-NEXT: mov r12, r0
+; THUMBV7-NEXT: umull r7, r6, r2, r5
+; THUMBV7-NEXT: ldrd r10, r4, [sp, #72]
; THUMBV7-NEXT: ldr.w r8, [sp, #88]
-; THUMBV7-NEXT: mov r9, r0
-; THUMBV7-NEXT: ldr r7, [sp, #96]
-; THUMBV7-NEXT: ldr.w lr, [sp, #100]
-; THUMBV7-NEXT: umull r0, r5, r2, r8
-; THUMBV7-NEXT: ldr r4, [sp, #80]
-; THUMBV7-NEXT: str r0, [sp, #32] @ 4-byte Spill
-; THUMBV7-NEXT: umull r1, r0, r3, r7
-; THUMBV7-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; THUMBV7-NEXT: umull r0, r11, lr, r2
-; THUMBV7-NEXT: str r1, [sp, #20] @ 4-byte Spill
-; THUMBV7-NEXT: ldr r1, [sp, #92]
-; THUMBV7-NEXT: str r0, [sp] @ 4-byte Spill
-; THUMBV7-NEXT: umull r0, r10, r7, r2
-; THUMBV7-NEXT: mov r7, r1
-; THUMBV7-NEXT: umull r6, r12, r1, r4
-; THUMBV7-NEXT: str r0, [sp, #40] @ 4-byte Spill
-; THUMBV7-NEXT: ldr r0, [sp, #84]
-; THUMBV7-NEXT: str r6, [sp, #24] @ 4-byte Spill
-; THUMBV7-NEXT: umull r6, r1, r0, r8
+; THUMBV7-NEXT: umull lr, r0, r2, r1
; THUMBV7-NEXT: str r6, [sp, #16] @ 4-byte Spill
-; THUMBV7-NEXT: umull r6, r2, r2, r7
-; THUMBV7-NEXT: mov r7, r4
-; THUMBV7-NEXT: strd r6, r2, [sp, #8] @ 8-byte Folded Spill
-; THUMBV7-NEXT: umull r2, r6, r4, r8
-; THUMBV7-NEXT: str r2, [sp, #36] @ 4-byte Spill
-; THUMBV7-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
-; THUMBV7-NEXT: str r6, [sp, #28] @ 4-byte Spill
; THUMBV7-NEXT: movs r6, #0
-; THUMBV7-NEXT: str.w r2, [r9]
-; THUMBV7-NEXT: umlal r5, r6, r3, r8
-; THUMBV7-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; THUMBV7-NEXT: ldr r4, [sp] @ 4-byte Reload
-; THUMBV7-NEXT: add r4, r2
-; THUMBV7-NEXT: adds.w r2, r10, r4
-; THUMBV7-NEXT: str r2, [sp, #20] @ 4-byte Spill
-; THUMBV7-NEXT: mov.w r2, #0
-; THUMBV7-NEXT: adc r2, r2, #0
-; THUMBV7-NEXT: cmp.w r12, #0
-; THUMBV7-NEXT: str r2, [sp, #32] @ 4-byte Spill
+; THUMBV7-NEXT: mul r9, r4, r1
+; THUMBV7-NEXT: str r7, [sp, #4] @ 4-byte Spill
+; THUMBV7-NEXT: umlal r0, r6, r3, r1
+; THUMBV7-NEXT: umull r1, r7, r10, r1
+; THUMBV7-NEXT: mul r11, r5, r10
+; THUMBV7-NEXT: str r1, [sp, #32] @ 4-byte Spill
+; THUMBV7-NEXT: mul r1, r3, r8
+; THUMBV7-NEXT: str r7, [sp, #12] @ 4-byte Spill
+; THUMBV7-NEXT: umull r7, r10, r8, r2
+; THUMBV7-NEXT: subs.w r5, r11, #0
+; THUMBV7-NEXT: str r1, [sp, #20] @ 4-byte Spill
+; THUMBV7-NEXT: ldr r1, [sp, #92]
+; THUMBV7-NEXT: mul r2, r1, r2
+; THUMBV7-NEXT: strd r10, r7, [sp, #24] @ 8-byte Folded Spill
+; THUMBV7-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; THUMBV7-NEXT: str.w lr, [r12]
+; THUMBV7-NEXT: str r5, [sp] @ 4-byte Spill
; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne.w r12, #1
-; THUMBV7-NEXT: cmp r1, #0
-; THUMBV7-NEXT: ldr r2, [sp, #96]
+; THUMBV7-NEXT: movne r5, #1
+; THUMBV7-NEXT: subs.w lr, r9, #0
+; THUMBV7-NEXT: ldr r2, [sp, #72]
+; THUMBV7-NEXT: ldr.w r9, [sp, #84]
+; THUMBV7-NEXT: mov r7, lr
; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne r1, #1
-; THUMBV7-NEXT: orrs.w r10, r7, r0
+; THUMBV7-NEXT: movne r7, #1
+; THUMBV7-NEXT: orrs.w r10, r2, r4
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne.w r10, #1
-; THUMBV7-NEXT: orrs.w r7, r2, lr
-; THUMBV7-NEXT: ldr r2, [sp, #92]
-; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne r7, #1
-; THUMBV7-NEXT: cmp r0, #0
+; THUMBV7-NEXT: orrs.w r8, r8, r1
; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne r0, #1
-; THUMBV7-NEXT: cmp r2, #0
-; THUMBV7-NEXT: mov r4, r2
-; THUMBV7-NEXT: mov r8, r2
+; THUMBV7-NEXT: movne.w r8, #1
+; THUMBV7-NEXT: cmp r4, #0
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r4, #1
-; THUMBV7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; THUMBV7-NEXT: ands r0, r4
-; THUMBV7-NEXT: movs r4, #0
-; THUMBV7-NEXT: adds r5, r5, r2
-; THUMBV7-NEXT: str.w r5, [r9, #4]
-; THUMBV7-NEXT: orr.w r0, r0, r1
-; THUMBV7-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
-; THUMBV7-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
-; THUMBV7-NEXT: and.w r5, r10, r7
-; THUMBV7-NEXT: orr.w r0, r0, r12
-; THUMBV7-NEXT: mov.w r12, #0
-; THUMBV7-NEXT: add r1, r2
-; THUMBV7-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
-; THUMBV7-NEXT: adcs r2, r6
-; THUMBV7-NEXT: ldr r6, [sp, #28] @ 4-byte Reload
-; THUMBV7-NEXT: adc r7, r4, #0
-; THUMBV7-NEXT: adds r1, r1, r6
-; THUMBV7-NEXT: umlal r2, r7, r3, r8
-; THUMBV7-NEXT: adc r4, r4, #0
-; THUMBV7-NEXT: orrs r0, r4
-; THUMBV7-NEXT: orrs r0, r5
-; THUMBV7-NEXT: ldrd r5, r4, [sp, #36] @ 8-byte Folded Reload
-; THUMBV7-NEXT: adds r5, r5, r4
-; THUMBV7-NEXT: ldr r4, [sp, #20] @ 4-byte Reload
-; THUMBV7-NEXT: adcs r1, r4
-; THUMBV7-NEXT: ldr r4, [sp, #4] @ 4-byte Reload
-; THUMBV7-NEXT: cmp r4, #0
+; THUMBV7-NEXT: cmp.w r9, #0
+; THUMBV7-NEXT: it ne
+; THUMBV7-NEXT: movne.w r9, #1
+; THUMBV7-NEXT: ldr.w r11, [sp, #4] @ 4-byte Reload
+; THUMBV7-NEXT: and.w r4, r4, r9
+; THUMBV7-NEXT: adds.w r0, r0, r11
+; THUMBV7-NEXT: str.w r0, [r12, #4]
+; THUMBV7-NEXT: orr.w r4, r4, r7
+; THUMBV7-NEXT: ldr r7, [sp, #16] @ 4-byte Reload
+; THUMBV7-NEXT: ldr r2, [sp] @ 4-byte Reload
+; THUMBV7-NEXT: orr.w r4, r4, r5
+; THUMBV7-NEXT: ldr r5, [sp, #84]
+; THUMBV7-NEXT: adcs r7, r6
+; THUMBV7-NEXT: add r2, lr
+; THUMBV7-NEXT: mov.w lr, #0
+; THUMBV7-NEXT: adc r6, lr, #0
+; THUMBV7-NEXT: and.w r0, r10, r8
+; THUMBV7-NEXT: umlal r7, r6, r3, r5
+; THUMBV7-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
+; THUMBV7-NEXT: mov.w r10, #0
+; THUMBV7-NEXT: adds.w r9, r5, r2
+; THUMBV7-NEXT: adc r5, lr, #0
+; THUMBV7-NEXT: orrs r5, r4
+; THUMBV7-NEXT: orr.w lr, r0, r5
+; THUMBV7-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; THUMBV7-NEXT: subs r5, r0, #0
+; THUMBV7-NEXT: mov r4, r5
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r4, #1
+; THUMBV7-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
+; THUMBV7-NEXT: subs r0, #0
+; THUMBV7-NEXT: add r5, r0
+; THUMBV7-NEXT: it ne
+; THUMBV7-NEXT: movne r0, #1
+; THUMBV7-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
+; THUMBV7-NEXT: adds.w r11, r2, r5
+; THUMBV7-NEXT: ldrd r5, r2, [sp, #28] @ 8-byte Folded Reload
+; THUMBV7-NEXT: adc r8, r10, #0
+; THUMBV7-NEXT: adds r2, r2, r5
+; THUMBV7-NEXT: adc.w r5, r9, r11
; THUMBV7-NEXT: cmp r3, #0
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r3, #1
-; THUMBV7-NEXT: cmp.w lr, #0
-; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne.w lr, #1
-; THUMBV7-NEXT: cmp.w r11, #0
+; THUMBV7-NEXT: cmp r1, #0
; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne.w r11, #1
-; THUMBV7-NEXT: adds r2, r2, r5
-; THUMBV7-NEXT: and.w r3, r3, lr
-; THUMBV7-NEXT: str.w r2, [r9, #8]
-; THUMBV7-NEXT: adcs r1, r7
-; THUMBV7-NEXT: str.w r1, [r9, #12]
-; THUMBV7-NEXT: orr.w r1, r3, r11
-; THUMBV7-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
+; THUMBV7-NEXT: movne r1, #1
+; THUMBV7-NEXT: adds r2, r2, r7
+; THUMBV7-NEXT: and.w r1, r1, r3
+; THUMBV7-NEXT: str.w r2, [r12, #8]
; THUMBV7-NEXT: orr.w r1, r1, r4
-; THUMBV7-NEXT: orr.w r1, r1, r2
+; THUMBV7-NEXT: adcs.w r2, r6, r5
; THUMBV7-NEXT: orr.w r0, r0, r1
-; THUMBV7-NEXT: adc r1, r12, #0
+; THUMBV7-NEXT: adc r1, r10, #0
+; THUMBV7-NEXT: orr.w r0, r0, r8
+; THUMBV7-NEXT: str.w r2, [r12, #12]
+; THUMBV7-NEXT: orr.w r0, r0, lr
; THUMBV7-NEXT: orrs r0, r1
; THUMBV7-NEXT: and r0, r0, #1
-; THUMBV7-NEXT: strb.w r0, [r9, #16]
-; THUMBV7-NEXT: add sp, #44
+; THUMBV7-NEXT: strb.w r0, [r12, #16]
+; THUMBV7-NEXT: add sp, #36
; THUMBV7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
start:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
diff --git a/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll
index 55e917159fce9..167ffec6be119 100644
--- a/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll
@@ -4,32 +4,33 @@
define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 {
; THUMBV7-LABEL: mulodi_test:
; THUMBV7: @ %bb.0: @ %start
-; THUMBV7-NEXT: .save {r4, r5, r7, lr}
-; THUMBV7-NEXT: push {r4, r5, r7, lr}
-; THUMBV7-NEXT: umull r12, lr, r3, r0
+; THUMBV7-NEXT: .save {r4, lr}
+; THUMBV7-NEXT: push {r4, lr}
+; THUMBV7-NEXT: mul r12, r1, r2
; THUMBV7-NEXT: cmp r3, #0
+; THUMBV7-NEXT: mul lr, r3, r0
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r3, #1
; THUMBV7-NEXT: cmp r1, #0
-; THUMBV7-NEXT: umull r0, r4, r0, r2
-; THUMBV7-NEXT: umull r2, r5, r1, r2
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r1, #1
+; THUMBV7-NEXT: umull r0, r4, r0, r2
; THUMBV7-NEXT: ands r1, r3
-; THUMBV7-NEXT: cmp r5, #0
+; THUMBV7-NEXT: subs.w r2, r12, #0
+; THUMBV7-NEXT: mov.w r12, #0
+; THUMBV7-NEXT: mov r3, r2
; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne r5, #1
-; THUMBV7-NEXT: orrs r1, r5
-; THUMBV7-NEXT: cmp.w lr, #0
+; THUMBV7-NEXT: movne r3, #1
+; THUMBV7-NEXT: orrs r1, r3
+; THUMBV7-NEXT: subs.w r3, lr, #0
+; THUMBV7-NEXT: add r2, r3
; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne.w lr, #1
-; THUMBV7-NEXT: orr.w r3, r1, lr
-; THUMBV7-NEXT: add.w r1, r2, r12
-; THUMBV7-NEXT: movs r2, #0
-; THUMBV7-NEXT: adds r1, r1, r4
-; THUMBV7-NEXT: adc r2, r2, #0
+; THUMBV7-NEXT: movne r3, #1
+; THUMBV7-NEXT: orrs r3, r1
+; THUMBV7-NEXT: adds r1, r4, r2
+; THUMBV7-NEXT: adc r2, r12, #0
; THUMBV7-NEXT: orrs r2, r3
-; THUMBV7-NEXT: pop {r4, r5, r7, pc}
+; THUMBV7-NEXT: pop {r4, pc}
start:
%0 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %l, i64 %r) #2
%1 = extractvalue { i64, i1 } %0, 0
More information about the llvm-commits
mailing list