[llvm] [ARM] Set operation action for UMULO and SMULO as Custom if not Thumb1 (PR #154253)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 18 19:59:37 PDT 2025
https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/154253
>From 79f7844360d85f555e649278aa3f43aec45612b8 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Mon, 18 Aug 2025 16:42:15 -0400
Subject: [PATCH] [ARM] Set operation action for UMULO and SMULO as Custom if
not Thumb1
We should specify a custom lowering for SMULO and UMULO like we do for AArch64, but only if not Thumb 1 obviously.
---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 102 ++++++----
llvm/lib/Target/ARM/ARMISelLowering.h | 3 +-
llvm/test/CodeGen/ARM/select.ll | 10 +-
.../Thumb2/umulo-128-legalisation-lowering.ll | 180 ++++++++----------
.../Thumb2/umulo-64-legalisation-lowering.ll | 33 ++--
5 files changed, 174 insertions(+), 154 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 830156359e9e8..cb036995fc243 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -986,6 +986,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
setOperationAction(ISD::USUBO, MVT::i32, Custom);
+ if (!Subtarget->isThumb1Only()) {
+ setOperationAction(ISD::UMULO, MVT::i32, Custom);
+ setOperationAction(ISD::SMULO, MVT::i32, Custom);
+ }
+
setOperationAction(ISD::UADDO_CARRY, MVT::i32, Custom);
setOperationAction(ISD::USUBO_CARRY, MVT::i32, Custom);
if (Subtarget->hasDSP()) {
@@ -4879,7 +4884,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
Value = DAG.getNode(ISD::UMUL_LOHI, dl,
DAG.getVTList(Op.getValueType(), Op.getValueType()),
LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
+ OverflowCmp = DAG.getNode(ARMISD::CMPZ, dl, FlagsVT, Value.getValue(1),
DAG.getConstant(0, dl, MVT::i32));
Value = Value.getValue(0); // We only want the low 32 bits for the result.
break;
@@ -4890,7 +4895,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
Value = DAG.getNode(ISD::SMUL_LOHI, dl,
DAG.getVTList(Op.getValueType(), Op.getValueType()),
LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
+ OverflowCmp = DAG.getNode(ARMISD::CMPZ, dl, FlagsVT, Value.getValue(1),
DAG.getNode(ISD::SRA, dl, Op.getValueType(),
Value.getValue(0),
DAG.getConstant(31, dl, MVT::i32)));
@@ -4901,28 +4906,6 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
return std::make_pair(Value, OverflowCmp);
}
-SDValue
-ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
- // Let legalize expand this if it isn't a legal type yet.
- if (!isTypeLegal(Op.getValueType()))
- return SDValue();
-
- SDValue Value, OverflowCmp;
- SDValue ARMcc;
- std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
- SDLoc dl(Op);
- // We use 0 and 1 as false and true values.
- SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
- SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
- EVT VT = Op.getValueType();
-
- SDValue Overflow =
- DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, OverflowCmp);
-
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
- return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
-}
-
static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry,
SelectionDAG &DAG) {
SDLoc DL(BoolCarry);
@@ -4947,8 +4930,7 @@ static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT,
DAG.getConstant(0, DL, MVT::i32), Flags);
}
-SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue ARMTargetLowering::LowerALUO(SDValue Op, SelectionDAG &DAG) const {
// Let legalize expand this if it isn't a legal type yet.
if (!isTypeLegal(Op.getValueType()))
return SDValue();
@@ -4962,14 +4944,12 @@ SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
SDValue Value;
SDValue Overflow;
switch (Op.getOpcode()) {
- default:
- llvm_unreachable("Unknown overflow instruction!");
case ISD::UADDO:
Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
// Convert the carry flag into a boolean value.
Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
break;
- case ISD::USUBO: {
+ case ISD::USUBO:
Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
// Convert the carry flag into a boolean value.
Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
@@ -4978,6 +4958,57 @@ SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
DAG.getConstant(1, dl, MVT::i32), Overflow);
break;
+ case ISD::UMULO: {
+ // We generate a UMUL_LOHI and then check if the high word is 0.
+ Value = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), LHS, RHS);
+ SDValue HighWord = Value.getValue(1);
+ Value = Value.getValue(0); // We only want the low 32 bits for the result.
+
+ // Compare high word with 0
+ SDValue Flags = DAG.getNode(ARMISD::CMPZ, dl, FlagsVT, HighWord,
+ DAG.getConstant(0, dl, MVT::i32));
+
+ ARMCC::CondCodes NECond = ARMCC::NE;
+ SDValue NECondValue = DAG.getConstant(NECond, dl, MVT::i32);
+ Overflow =
+ DAG.getNode(ARMISD::CMOV, dl, MVT::i32, HighWord,
+ DAG.getConstant(1, dl, MVT::i32), NECondValue, Flags);
+ break;
+ }
+ case ISD::SMULO: {
+ // We generate a SMUL_LOHI and then check if all the bits of the high word
+ // are the same as the sign bit of the low word.
+ Value = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), LHS, RHS);
+ SDValue HighWord = Value.getValue(1);
+ SDValue LowWord = Value.getValue(0);
+ Value = LowWord; // We only want the low 32 bits for the result.
+
+ // Compute sign bit of low word: LowWord >> 31
+ SDValue SignBit = DAG.getNode(ISD::SRA, dl, VT, LowWord,
+ DAG.getConstant(31, dl, MVT::i32));
+
+ // Subtract sign bit from high word: HighWord - SignBit
+ SDValue SubResult = DAG.getNode(ARMISD::SUBC, dl, VTs, HighWord, SignBit);
+ SDValue Flags = SubResult.getValue(1);
+
+ ARMCC::CondCodes NECond = ARMCC::NE;
+ SDValue NECondValue = DAG.getConstant(NECond, dl, MVT::i32);
+ Overflow =
+ DAG.getNode(ARMISD::CMOV, dl, MVT::i32, SubResult,
+ DAG.getConstant(1, dl, MVT::i32), NECondValue, Flags);
+ break;
+ }
+ default: {
+ // Handle other operations with getARMXALUOOp
+ SDValue OverflowCmp, ARMcc;
+ std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
+ // We use 0 and 1 as false and true values.
+ SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
+ SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
+
+ Overflow =
+ DAG.getNode(ARMISD::CMOV, dl, MVT::i32, TVal, FVal, ARMcc, OverflowCmp);
+ break;
}
}
@@ -5045,9 +5076,11 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
unsigned Opc = Cond.getOpcode();
+ bool OptimizeMul =
+ (Opc == ISD::SMULO || Opc == ISD::UMULO) && !Subtarget->isThumb1Only();
if (Cond.getResNo() == 1 &&
(Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
- Opc == ISD::USUBO)) {
+ Opc == ISD::USUBO || OptimizeMul)) {
if (!isTypeLegal(Cond->getValueType(0)))
return SDValue();
@@ -10653,12 +10686,13 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UADDO_CARRY:
case ISD::USUBO_CARRY:
return LowerUADDSUBO_CARRY(Op, DAG);
- case ISD::SADDO:
- case ISD::SSUBO:
- return LowerSignedALUO(Op, DAG);
case ISD::UADDO:
case ISD::USUBO:
- return LowerUnsignedALUO(Op, DAG);
+ case ISD::UMULO:
+ case ISD::SADDO:
+ case ISD::SSUBO:
+ case ISD::SMULO:
+ return LowerALUO(Op, DAG);
case ISD::SADDSAT:
case ISD::SSUBSAT:
case ISD::UADDSAT:
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 778595e93f84a..3e710f6d60a12 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -866,8 +866,7 @@ class VectorType;
SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/ARM/select.ll b/llvm/test/CodeGen/ARM/select.ll
index 48d6ee925d939..3f186f69c5aa3 100644
--- a/llvm/test/CodeGen/ARM/select.ll
+++ b/llvm/test/CodeGen/ARM/select.ll
@@ -440,17 +440,15 @@ define float @f12(i32 %a, i32 %b) nounwind uwtable readnone ssp {
define i1 @test_overflow_recombine(i32 %in1, i32 %in2) {
; CHECK-LABEL: test_overflow_recombine:
; CHECK: @ %bb.0:
-; CHECK-NEXT: mul r2, r0, r1
-; CHECK-NEXT: smmul r0, r0, r1
-; CHECK-NEXT: subs r0, r0, r2, asr #31
+; CHECK-NEXT: smull r0, r1, r0, r1
+; CHECK-NEXT: subs r0, r1, r0, asr #31
; CHECK-NEXT: movwne r0, #1
; CHECK-NEXT: bx lr
;
; CHECK-NEON-LABEL: test_overflow_recombine:
; CHECK-NEON: @ %bb.0:
-; CHECK-NEON-NEXT: mul r2, r0, r1
-; CHECK-NEON-NEXT: smmul r0, r0, r1
-; CHECK-NEON-NEXT: subs.w r0, r0, r2, asr #31
+; CHECK-NEON-NEXT: smull r0, r1, r0, r1
+; CHECK-NEON-NEXT: subs.w r0, r1, r0, asr #31
; CHECK-NEON-NEXT: it ne
; CHECK-NEON-NEXT: movne r0, #1
; CHECK-NEON-NEXT: bx lr
diff --git a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
index fe1d06cb39e16..d5d8c333712e4 100644
--- a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
@@ -6,122 +6,110 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV7: @ %bb.0: @ %start
; THUMBV7-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; THUMBV7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; THUMBV7-NEXT: .pad #44
-; THUMBV7-NEXT: sub sp, #44
+; THUMBV7-NEXT: .pad #36
+; THUMBV7-NEXT: sub sp, #36
+; THUMBV7-NEXT: ldrd r1, r5, [sp, #80]
+; THUMBV7-NEXT: mov r12, r0
+; THUMBV7-NEXT: umull r7, r6, r2, r5
+; THUMBV7-NEXT: ldrd r10, r4, [sp, #72]
; THUMBV7-NEXT: ldr.w r8, [sp, #88]
-; THUMBV7-NEXT: mov r9, r0
-; THUMBV7-NEXT: ldr r7, [sp, #96]
-; THUMBV7-NEXT: ldr.w lr, [sp, #100]
-; THUMBV7-NEXT: umull r0, r5, r2, r8
-; THUMBV7-NEXT: ldr r4, [sp, #80]
-; THUMBV7-NEXT: str r0, [sp, #32] @ 4-byte Spill
-; THUMBV7-NEXT: umull r1, r0, r3, r7
-; THUMBV7-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; THUMBV7-NEXT: umull r0, r11, lr, r2
-; THUMBV7-NEXT: str r1, [sp, #20] @ 4-byte Spill
-; THUMBV7-NEXT: ldr r1, [sp, #92]
-; THUMBV7-NEXT: str r0, [sp] @ 4-byte Spill
-; THUMBV7-NEXT: umull r0, r10, r7, r2
-; THUMBV7-NEXT: mov r7, r1
-; THUMBV7-NEXT: umull r6, r12, r1, r4
-; THUMBV7-NEXT: str r0, [sp, #40] @ 4-byte Spill
-; THUMBV7-NEXT: ldr r0, [sp, #84]
-; THUMBV7-NEXT: str r6, [sp, #24] @ 4-byte Spill
-; THUMBV7-NEXT: umull r6, r1, r0, r8
+; THUMBV7-NEXT: umull lr, r0, r2, r1
; THUMBV7-NEXT: str r6, [sp, #16] @ 4-byte Spill
-; THUMBV7-NEXT: umull r6, r2, r2, r7
-; THUMBV7-NEXT: mov r7, r4
-; THUMBV7-NEXT: strd r6, r2, [sp, #8] @ 8-byte Folded Spill
-; THUMBV7-NEXT: umull r2, r6, r4, r8
-; THUMBV7-NEXT: str r2, [sp, #36] @ 4-byte Spill
-; THUMBV7-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
-; THUMBV7-NEXT: str r6, [sp, #28] @ 4-byte Spill
; THUMBV7-NEXT: movs r6, #0
-; THUMBV7-NEXT: str.w r2, [r9]
-; THUMBV7-NEXT: umlal r5, r6, r3, r8
-; THUMBV7-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; THUMBV7-NEXT: ldr r4, [sp] @ 4-byte Reload
-; THUMBV7-NEXT: add r4, r2
-; THUMBV7-NEXT: adds.w r2, r10, r4
-; THUMBV7-NEXT: str r2, [sp, #20] @ 4-byte Spill
-; THUMBV7-NEXT: mov.w r2, #0
-; THUMBV7-NEXT: adc r2, r2, #0
-; THUMBV7-NEXT: cmp.w r12, #0
-; THUMBV7-NEXT: str r2, [sp, #32] @ 4-byte Spill
+; THUMBV7-NEXT: mul r9, r4, r1
+; THUMBV7-NEXT: str r7, [sp, #4] @ 4-byte Spill
+; THUMBV7-NEXT: umlal r0, r6, r3, r1
+; THUMBV7-NEXT: umull r1, r7, r10, r1
+; THUMBV7-NEXT: mul r11, r5, r10
+; THUMBV7-NEXT: str r1, [sp, #32] @ 4-byte Spill
+; THUMBV7-NEXT: mul r1, r3, r8
+; THUMBV7-NEXT: str r7, [sp, #12] @ 4-byte Spill
+; THUMBV7-NEXT: umull r7, r10, r8, r2
+; THUMBV7-NEXT: subs.w r5, r11, #0
+; THUMBV7-NEXT: str r1, [sp, #20] @ 4-byte Spill
+; THUMBV7-NEXT: ldr r1, [sp, #92]
+; THUMBV7-NEXT: mul r2, r1, r2
+; THUMBV7-NEXT: strd r10, r7, [sp, #24] @ 8-byte Folded Spill
+; THUMBV7-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; THUMBV7-NEXT: str.w lr, [r12]
+; THUMBV7-NEXT: str r5, [sp] @ 4-byte Spill
; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne.w r12, #1
-; THUMBV7-NEXT: cmp r1, #0
-; THUMBV7-NEXT: ldr r2, [sp, #96]
+; THUMBV7-NEXT: movne r5, #1
+; THUMBV7-NEXT: subs.w lr, r9, #0
+; THUMBV7-NEXT: ldr r2, [sp, #72]
+; THUMBV7-NEXT: ldr.w r9, [sp, #84]
+; THUMBV7-NEXT: mov r7, lr
; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne r1, #1
-; THUMBV7-NEXT: orrs.w r10, r7, r0
+; THUMBV7-NEXT: movne r7, #1
+; THUMBV7-NEXT: orrs.w r10, r2, r4
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne.w r10, #1
-; THUMBV7-NEXT: orrs.w r7, r2, lr
-; THUMBV7-NEXT: ldr r2, [sp, #92]
-; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne r7, #1
-; THUMBV7-NEXT: cmp r0, #0
+; THUMBV7-NEXT: orrs.w r8, r8, r1
; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne r0, #1
-; THUMBV7-NEXT: cmp r2, #0
-; THUMBV7-NEXT: mov r4, r2
-; THUMBV7-NEXT: mov r8, r2
+; THUMBV7-NEXT: movne.w r8, #1
+; THUMBV7-NEXT: cmp r4, #0
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r4, #1
-; THUMBV7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; THUMBV7-NEXT: ands r0, r4
-; THUMBV7-NEXT: movs r4, #0
-; THUMBV7-NEXT: adds r5, r5, r2
-; THUMBV7-NEXT: str.w r5, [r9, #4]
-; THUMBV7-NEXT: orr.w r0, r0, r1
-; THUMBV7-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
-; THUMBV7-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
-; THUMBV7-NEXT: and.w r5, r10, r7
-; THUMBV7-NEXT: orr.w r0, r0, r12
-; THUMBV7-NEXT: mov.w r12, #0
-; THUMBV7-NEXT: add r1, r2
-; THUMBV7-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
-; THUMBV7-NEXT: adcs r2, r6
-; THUMBV7-NEXT: ldr r6, [sp, #28] @ 4-byte Reload
-; THUMBV7-NEXT: adc r7, r4, #0
-; THUMBV7-NEXT: adds r1, r1, r6
-; THUMBV7-NEXT: umlal r2, r7, r3, r8
-; THUMBV7-NEXT: adc r4, r4, #0
-; THUMBV7-NEXT: orrs r0, r4
-; THUMBV7-NEXT: orrs r0, r5
-; THUMBV7-NEXT: ldrd r5, r4, [sp, #36] @ 8-byte Folded Reload
-; THUMBV7-NEXT: adds r5, r5, r4
-; THUMBV7-NEXT: ldr r4, [sp, #20] @ 4-byte Reload
-; THUMBV7-NEXT: adcs r1, r4
-; THUMBV7-NEXT: ldr r4, [sp, #4] @ 4-byte Reload
-; THUMBV7-NEXT: cmp r4, #0
+; THUMBV7-NEXT: cmp.w r9, #0
+; THUMBV7-NEXT: it ne
+; THUMBV7-NEXT: movne.w r9, #1
+; THUMBV7-NEXT: ldr.w r11, [sp, #4] @ 4-byte Reload
+; THUMBV7-NEXT: and.w r4, r4, r9
+; THUMBV7-NEXT: adds.w r0, r0, r11
+; THUMBV7-NEXT: str.w r0, [r12, #4]
+; THUMBV7-NEXT: orr.w r4, r4, r7
+; THUMBV7-NEXT: ldr r7, [sp, #16] @ 4-byte Reload
+; THUMBV7-NEXT: ldr r2, [sp] @ 4-byte Reload
+; THUMBV7-NEXT: orr.w r4, r4, r5
+; THUMBV7-NEXT: ldr r5, [sp, #84]
+; THUMBV7-NEXT: adcs r7, r6
+; THUMBV7-NEXT: add r2, lr
+; THUMBV7-NEXT: mov.w lr, #0
+; THUMBV7-NEXT: adc r6, lr, #0
+; THUMBV7-NEXT: and.w r0, r10, r8
+; THUMBV7-NEXT: umlal r7, r6, r3, r5
+; THUMBV7-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
+; THUMBV7-NEXT: mov.w r10, #0
+; THUMBV7-NEXT: adds.w r9, r5, r2
+; THUMBV7-NEXT: adc r5, lr, #0
+; THUMBV7-NEXT: orrs r5, r4
+; THUMBV7-NEXT: orr.w lr, r0, r5
+; THUMBV7-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; THUMBV7-NEXT: subs r5, r0, #0
+; THUMBV7-NEXT: mov r4, r5
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r4, #1
+; THUMBV7-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
+; THUMBV7-NEXT: subs r0, #0
+; THUMBV7-NEXT: add r5, r0
+; THUMBV7-NEXT: it ne
+; THUMBV7-NEXT: movne r0, #1
+; THUMBV7-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
+; THUMBV7-NEXT: adds.w r11, r2, r5
+; THUMBV7-NEXT: ldrd r5, r2, [sp, #28] @ 8-byte Folded Reload
+; THUMBV7-NEXT: adc r8, r10, #0
+; THUMBV7-NEXT: adds r2, r2, r5
+; THUMBV7-NEXT: adc.w r5, r9, r11
; THUMBV7-NEXT: cmp r3, #0
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r3, #1
-; THUMBV7-NEXT: cmp.w lr, #0
-; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne.w lr, #1
-; THUMBV7-NEXT: cmp.w r11, #0
+; THUMBV7-NEXT: cmp r1, #0
; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne.w r11, #1
-; THUMBV7-NEXT: adds r2, r2, r5
-; THUMBV7-NEXT: and.w r3, r3, lr
-; THUMBV7-NEXT: str.w r2, [r9, #8]
-; THUMBV7-NEXT: adcs r1, r7
-; THUMBV7-NEXT: str.w r1, [r9, #12]
-; THUMBV7-NEXT: orr.w r1, r3, r11
-; THUMBV7-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
+; THUMBV7-NEXT: movne r1, #1
+; THUMBV7-NEXT: adds r2, r2, r7
+; THUMBV7-NEXT: and.w r1, r1, r3
+; THUMBV7-NEXT: str.w r2, [r12, #8]
; THUMBV7-NEXT: orr.w r1, r1, r4
-; THUMBV7-NEXT: orr.w r1, r1, r2
+; THUMBV7-NEXT: adcs.w r2, r6, r5
; THUMBV7-NEXT: orr.w r0, r0, r1
-; THUMBV7-NEXT: adc r1, r12, #0
+; THUMBV7-NEXT: adc r1, r10, #0
+; THUMBV7-NEXT: orr.w r0, r0, r8
+; THUMBV7-NEXT: str.w r2, [r12, #12]
+; THUMBV7-NEXT: orr.w r0, r0, lr
; THUMBV7-NEXT: orrs r0, r1
; THUMBV7-NEXT: and r0, r0, #1
-; THUMBV7-NEXT: strb.w r0, [r9, #16]
-; THUMBV7-NEXT: add sp, #44
+; THUMBV7-NEXT: strb.w r0, [r12, #16]
+; THUMBV7-NEXT: add sp, #36
; THUMBV7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
start:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
diff --git a/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll
index 55e917159fce9..167ffec6be119 100644
--- a/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll
@@ -4,32 +4,33 @@
define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 {
; THUMBV7-LABEL: mulodi_test:
; THUMBV7: @ %bb.0: @ %start
-; THUMBV7-NEXT: .save {r4, r5, r7, lr}
-; THUMBV7-NEXT: push {r4, r5, r7, lr}
-; THUMBV7-NEXT: umull r12, lr, r3, r0
+; THUMBV7-NEXT: .save {r4, lr}
+; THUMBV7-NEXT: push {r4, lr}
+; THUMBV7-NEXT: mul r12, r1, r2
; THUMBV7-NEXT: cmp r3, #0
+; THUMBV7-NEXT: mul lr, r3, r0
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r3, #1
; THUMBV7-NEXT: cmp r1, #0
-; THUMBV7-NEXT: umull r0, r4, r0, r2
-; THUMBV7-NEXT: umull r2, r5, r1, r2
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r1, #1
+; THUMBV7-NEXT: umull r0, r4, r0, r2
; THUMBV7-NEXT: ands r1, r3
-; THUMBV7-NEXT: cmp r5, #0
+; THUMBV7-NEXT: subs.w r2, r12, #0
+; THUMBV7-NEXT: mov.w r12, #0
+; THUMBV7-NEXT: mov r3, r2
; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne r5, #1
-; THUMBV7-NEXT: orrs r1, r5
-; THUMBV7-NEXT: cmp.w lr, #0
+; THUMBV7-NEXT: movne r3, #1
+; THUMBV7-NEXT: orrs r1, r3
+; THUMBV7-NEXT: subs.w r3, lr, #0
+; THUMBV7-NEXT: add r2, r3
; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne.w lr, #1
-; THUMBV7-NEXT: orr.w r3, r1, lr
-; THUMBV7-NEXT: add.w r1, r2, r12
-; THUMBV7-NEXT: movs r2, #0
-; THUMBV7-NEXT: adds r1, r1, r4
-; THUMBV7-NEXT: adc r2, r2, #0
+; THUMBV7-NEXT: movne r3, #1
+; THUMBV7-NEXT: orrs r3, r1
+; THUMBV7-NEXT: adds r1, r4, r2
+; THUMBV7-NEXT: adc r2, r12, #0
; THUMBV7-NEXT: orrs r2, r3
-; THUMBV7-NEXT: pop {r4, r5, r7, pc}
+; THUMBV7-NEXT: pop {r4, pc}
start:
%0 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %l, i64 %r) #2
%1 = extractvalue { i64, i1 } %0, 0
More information about the llvm-commits
mailing list