[llvm] [ARM] Set operation action for UMULO and SMULO as Custom if not Thumb1 (PR #154253)

via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 18 19:59:37 PDT 2025


https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/154253

>From 79f7844360d85f555e649278aa3f43aec45612b8 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Mon, 18 Aug 2025 16:42:15 -0400
Subject: [PATCH] [ARM] Set operation action for UMULO and SMULO as Custom if
 not Thumb1

We should specify a custom lowering for SMULO and UMULO like we do for AArch64, but only if not Thumb 1 obviously.
---
 llvm/lib/Target/ARM/ARMISelLowering.cpp       | 102 ++++++----
 llvm/lib/Target/ARM/ARMISelLowering.h         |   3 +-
 llvm/test/CodeGen/ARM/select.ll               |  10 +-
 .../Thumb2/umulo-128-legalisation-lowering.ll | 180 ++++++++----------
 .../Thumb2/umulo-64-legalisation-lowering.ll  |  33 ++--
 5 files changed, 174 insertions(+), 154 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 830156359e9e8..cb036995fc243 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -986,6 +986,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
   setOperationAction(ISD::SSUBO, MVT::i32, Custom);
   setOperationAction(ISD::USUBO, MVT::i32, Custom);
 
+  if (!Subtarget->isThumb1Only()) {
+    setOperationAction(ISD::UMULO, MVT::i32, Custom);
+    setOperationAction(ISD::SMULO, MVT::i32, Custom);
+  }
+
   setOperationAction(ISD::UADDO_CARRY, MVT::i32, Custom);
   setOperationAction(ISD::USUBO_CARRY, MVT::i32, Custom);
   if (Subtarget->hasDSP()) {
@@ -4879,7 +4884,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
     Value = DAG.getNode(ISD::UMUL_LOHI, dl,
                         DAG.getVTList(Op.getValueType(), Op.getValueType()),
                         LHS, RHS);
-    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
+    OverflowCmp = DAG.getNode(ARMISD::CMPZ, dl, FlagsVT, Value.getValue(1),
                               DAG.getConstant(0, dl, MVT::i32));
     Value = Value.getValue(0); // We only want the low 32 bits for the result.
     break;
@@ -4890,7 +4895,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
     Value = DAG.getNode(ISD::SMUL_LOHI, dl,
                         DAG.getVTList(Op.getValueType(), Op.getValueType()),
                         LHS, RHS);
-    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
+    OverflowCmp = DAG.getNode(ARMISD::CMPZ, dl, FlagsVT, Value.getValue(1),
                               DAG.getNode(ISD::SRA, dl, Op.getValueType(),
                                           Value.getValue(0),
                                           DAG.getConstant(31, dl, MVT::i32)));
@@ -4901,28 +4906,6 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
   return std::make_pair(Value, OverflowCmp);
 }
 
-SDValue
-ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
-  // Let legalize expand this if it isn't a legal type yet.
-  if (!isTypeLegal(Op.getValueType()))
-    return SDValue();
-
-  SDValue Value, OverflowCmp;
-  SDValue ARMcc;
-  std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
-  SDLoc dl(Op);
-  // We use 0 and 1 as false and true values.
-  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
-  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
-  EVT VT = Op.getValueType();
-
-  SDValue Overflow =
-      DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, OverflowCmp);
-
-  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
-  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
-}
-
 static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry,
                                               SelectionDAG &DAG) {
   SDLoc DL(BoolCarry);
@@ -4947,8 +4930,7 @@ static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT,
                      DAG.getConstant(0, DL, MVT::i32), Flags);
 }
 
-SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
-                                             SelectionDAG &DAG) const {
+SDValue ARMTargetLowering::LowerALUO(SDValue Op, SelectionDAG &DAG) const {
   // Let legalize expand this if it isn't a legal type yet.
   if (!isTypeLegal(Op.getValueType()))
     return SDValue();
@@ -4962,14 +4944,12 @@ SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
   SDValue Value;
   SDValue Overflow;
   switch (Op.getOpcode()) {
-  default:
-    llvm_unreachable("Unknown overflow instruction!");
   case ISD::UADDO:
     Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
     // Convert the carry flag into a boolean value.
     Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
     break;
-  case ISD::USUBO: {
+  case ISD::USUBO:
     Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
     // Convert the carry flag into a boolean value.
     Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
@@ -4978,6 +4958,57 @@ SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
     Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
                            DAG.getConstant(1, dl, MVT::i32), Overflow);
     break;
+  case ISD::UMULO: {
+    // We generate a UMUL_LOHI and then check if the high word is 0.
+    Value = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), LHS, RHS);
+    SDValue HighWord = Value.getValue(1);
+    Value = Value.getValue(0); // We only want the low 32 bits for the result.
+
+    // Compare high word with 0
+    SDValue Flags = DAG.getNode(ARMISD::CMPZ, dl, FlagsVT, HighWord,
+                                DAG.getConstant(0, dl, MVT::i32));
+
+    ARMCC::CondCodes NECond = ARMCC::NE;
+    SDValue NECondValue = DAG.getConstant(NECond, dl, MVT::i32);
+    Overflow =
+        DAG.getNode(ARMISD::CMOV, dl, MVT::i32, HighWord,
+                    DAG.getConstant(1, dl, MVT::i32), NECondValue, Flags);
+    break;
+  }
+  case ISD::SMULO: {
+    // We generate a SMUL_LOHI and then check if all the bits of the high word
+    // are the same as the sign bit of the low word.
+    Value = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), LHS, RHS);
+    SDValue HighWord = Value.getValue(1);
+    SDValue LowWord = Value.getValue(0);
+    Value = LowWord; // We only want the low 32 bits for the result.
+
+    // Compute sign bit of low word: LowWord >> 31
+    SDValue SignBit = DAG.getNode(ISD::SRA, dl, VT, LowWord,
+                                  DAG.getConstant(31, dl, MVT::i32));
+
+    // Subtract sign bit from high word: HighWord - SignBit
+    SDValue SubResult = DAG.getNode(ARMISD::SUBC, dl, VTs, HighWord, SignBit);
+    SDValue Flags = SubResult.getValue(1);
+
+    ARMCC::CondCodes NECond = ARMCC::NE;
+    SDValue NECondValue = DAG.getConstant(NECond, dl, MVT::i32);
+    Overflow =
+        DAG.getNode(ARMISD::CMOV, dl, MVT::i32, SubResult,
+                    DAG.getConstant(1, dl, MVT::i32), NECondValue, Flags);
+    break;
+  }
+  default: {
+    // Handle other operations with getARMXALUOOp
+    SDValue OverflowCmp, ARMcc;
+    std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
+    // We use 0 and 1 as false and true values.
+    SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
+    SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
+
+    Overflow =
+        DAG.getNode(ARMISD::CMOV, dl, MVT::i32, TVal, FVal, ARMcc, OverflowCmp);
+    break;
   }
   }
 
@@ -5045,9 +5076,11 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   SDLoc dl(Op);
   unsigned Opc = Cond.getOpcode();
 
+  bool OptimizeMul =
+      (Opc == ISD::SMULO || Opc == ISD::UMULO) && !Subtarget->isThumb1Only();
   if (Cond.getResNo() == 1 &&
       (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
-       Opc == ISD::USUBO)) {
+       Opc == ISD::USUBO || OptimizeMul)) {
     if (!isTypeLegal(Cond->getValueType(0)))
       return SDValue();
 
@@ -10653,12 +10686,13 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::UADDO_CARRY:
   case ISD::USUBO_CARRY:
     return LowerUADDSUBO_CARRY(Op, DAG);
-  case ISD::SADDO:
-  case ISD::SSUBO:
-    return LowerSignedALUO(Op, DAG);
   case ISD::UADDO:
   case ISD::USUBO:
-    return LowerUnsignedALUO(Op, DAG);
+  case ISD::UMULO:
+  case ISD::SADDO:
+  case ISD::SSUBO:
+  case ISD::SMULO:
+    return LowerALUO(Op, DAG);
   case ISD::SADDSAT:
   case ISD::SSUBSAT:
   case ISD::UADDSAT:
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 778595e93f84a..3e710f6d60a12 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -866,8 +866,7 @@ class VectorType;
     SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerALUO(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/ARM/select.ll b/llvm/test/CodeGen/ARM/select.ll
index 48d6ee925d939..3f186f69c5aa3 100644
--- a/llvm/test/CodeGen/ARM/select.ll
+++ b/llvm/test/CodeGen/ARM/select.ll
@@ -440,17 +440,15 @@ define float @f12(i32 %a, i32 %b) nounwind uwtable readnone ssp {
 define i1 @test_overflow_recombine(i32 %in1, i32 %in2) {
 ; CHECK-LABEL: test_overflow_recombine:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    mul r2, r0, r1
-; CHECK-NEXT:    smmul r0, r0, r1
-; CHECK-NEXT:    subs r0, r0, r2, asr #31
+; CHECK-NEXT:    smull r0, r1, r0, r1
+; CHECK-NEXT:    subs r0, r1, r0, asr #31
 ; CHECK-NEXT:    movwne r0, #1
 ; CHECK-NEXT:    bx lr
 ;
 ; CHECK-NEON-LABEL: test_overflow_recombine:
 ; CHECK-NEON:       @ %bb.0:
-; CHECK-NEON-NEXT:    mul r2, r0, r1
-; CHECK-NEON-NEXT:    smmul r0, r0, r1
-; CHECK-NEON-NEXT:    subs.w r0, r0, r2, asr #31
+; CHECK-NEON-NEXT:    smull r0, r1, r0, r1
+; CHECK-NEON-NEXT:    subs.w r0, r1, r0, asr #31
 ; CHECK-NEON-NEXT:    it ne
 ; CHECK-NEON-NEXT:    movne r0, #1
 ; CHECK-NEON-NEXT:    bx lr
diff --git a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
index fe1d06cb39e16..d5d8c333712e4 100644
--- a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
@@ -6,122 +6,110 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; THUMBV7:       @ %bb.0: @ %start
 ; THUMBV7-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; THUMBV7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; THUMBV7-NEXT:    .pad #44
-; THUMBV7-NEXT:    sub sp, #44
+; THUMBV7-NEXT:    .pad #36
+; THUMBV7-NEXT:    sub sp, #36
+; THUMBV7-NEXT:    ldrd r1, r5, [sp, #80]
+; THUMBV7-NEXT:    mov r12, r0
+; THUMBV7-NEXT:    umull r7, r6, r2, r5
+; THUMBV7-NEXT:    ldrd r10, r4, [sp, #72]
 ; THUMBV7-NEXT:    ldr.w r8, [sp, #88]
-; THUMBV7-NEXT:    mov r9, r0
-; THUMBV7-NEXT:    ldr r7, [sp, #96]
-; THUMBV7-NEXT:    ldr.w lr, [sp, #100]
-; THUMBV7-NEXT:    umull r0, r5, r2, r8
-; THUMBV7-NEXT:    ldr r4, [sp, #80]
-; THUMBV7-NEXT:    str r0, [sp, #32] @ 4-byte Spill
-; THUMBV7-NEXT:    umull r1, r0, r3, r7
-; THUMBV7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; THUMBV7-NEXT:    umull r0, r11, lr, r2
-; THUMBV7-NEXT:    str r1, [sp, #20] @ 4-byte Spill
-; THUMBV7-NEXT:    ldr r1, [sp, #92]
-; THUMBV7-NEXT:    str r0, [sp] @ 4-byte Spill
-; THUMBV7-NEXT:    umull r0, r10, r7, r2
-; THUMBV7-NEXT:    mov r7, r1
-; THUMBV7-NEXT:    umull r6, r12, r1, r4
-; THUMBV7-NEXT:    str r0, [sp, #40] @ 4-byte Spill
-; THUMBV7-NEXT:    ldr r0, [sp, #84]
-; THUMBV7-NEXT:    str r6, [sp, #24] @ 4-byte Spill
-; THUMBV7-NEXT:    umull r6, r1, r0, r8
+; THUMBV7-NEXT:    umull lr, r0, r2, r1
 ; THUMBV7-NEXT:    str r6, [sp, #16] @ 4-byte Spill
-; THUMBV7-NEXT:    umull r6, r2, r2, r7
-; THUMBV7-NEXT:    mov r7, r4
-; THUMBV7-NEXT:    strd r6, r2, [sp, #8] @ 8-byte Folded Spill
-; THUMBV7-NEXT:    umull r2, r6, r4, r8
-; THUMBV7-NEXT:    str r2, [sp, #36] @ 4-byte Spill
-; THUMBV7-NEXT:    ldr r2, [sp, #32] @ 4-byte Reload
-; THUMBV7-NEXT:    str r6, [sp, #28] @ 4-byte Spill
 ; THUMBV7-NEXT:    movs r6, #0
-; THUMBV7-NEXT:    str.w r2, [r9]
-; THUMBV7-NEXT:    umlal r5, r6, r3, r8
-; THUMBV7-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
-; THUMBV7-NEXT:    ldr r4, [sp] @ 4-byte Reload
-; THUMBV7-NEXT:    add r4, r2
-; THUMBV7-NEXT:    adds.w r2, r10, r4
-; THUMBV7-NEXT:    str r2, [sp, #20] @ 4-byte Spill
-; THUMBV7-NEXT:    mov.w r2, #0
-; THUMBV7-NEXT:    adc r2, r2, #0
-; THUMBV7-NEXT:    cmp.w r12, #0
-; THUMBV7-NEXT:    str r2, [sp, #32] @ 4-byte Spill
+; THUMBV7-NEXT:    mul r9, r4, r1
+; THUMBV7-NEXT:    str r7, [sp, #4] @ 4-byte Spill
+; THUMBV7-NEXT:    umlal r0, r6, r3, r1
+; THUMBV7-NEXT:    umull r1, r7, r10, r1
+; THUMBV7-NEXT:    mul r11, r5, r10
+; THUMBV7-NEXT:    str r1, [sp, #32] @ 4-byte Spill
+; THUMBV7-NEXT:    mul r1, r3, r8
+; THUMBV7-NEXT:    str r7, [sp, #12] @ 4-byte Spill
+; THUMBV7-NEXT:    umull r7, r10, r8, r2
+; THUMBV7-NEXT:    subs.w r5, r11, #0
+; THUMBV7-NEXT:    str r1, [sp, #20] @ 4-byte Spill
+; THUMBV7-NEXT:    ldr r1, [sp, #92]
+; THUMBV7-NEXT:    mul r2, r1, r2
+; THUMBV7-NEXT:    strd r10, r7, [sp, #24] @ 8-byte Folded Spill
+; THUMBV7-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; THUMBV7-NEXT:    str.w lr, [r12]
+; THUMBV7-NEXT:    str r5, [sp] @ 4-byte Spill
 ; THUMBV7-NEXT:    it ne
-; THUMBV7-NEXT:    movne.w r12, #1
-; THUMBV7-NEXT:    cmp r1, #0
-; THUMBV7-NEXT:    ldr r2, [sp, #96]
+; THUMBV7-NEXT:    movne r5, #1
+; THUMBV7-NEXT:    subs.w lr, r9, #0
+; THUMBV7-NEXT:    ldr r2, [sp, #72]
+; THUMBV7-NEXT:    ldr.w r9, [sp, #84]
+; THUMBV7-NEXT:    mov r7, lr
 ; THUMBV7-NEXT:    it ne
-; THUMBV7-NEXT:    movne r1, #1
-; THUMBV7-NEXT:    orrs.w r10, r7, r0
+; THUMBV7-NEXT:    movne r7, #1
+; THUMBV7-NEXT:    orrs.w r10, r2, r4
 ; THUMBV7-NEXT:    it ne
 ; THUMBV7-NEXT:    movne.w r10, #1
-; THUMBV7-NEXT:    orrs.w r7, r2, lr
-; THUMBV7-NEXT:    ldr r2, [sp, #92]
-; THUMBV7-NEXT:    it ne
-; THUMBV7-NEXT:    movne r7, #1
-; THUMBV7-NEXT:    cmp r0, #0
+; THUMBV7-NEXT:    orrs.w r8, r8, r1
 ; THUMBV7-NEXT:    it ne
-; THUMBV7-NEXT:    movne r0, #1
-; THUMBV7-NEXT:    cmp r2, #0
-; THUMBV7-NEXT:    mov r4, r2
-; THUMBV7-NEXT:    mov r8, r2
+; THUMBV7-NEXT:    movne.w r8, #1
+; THUMBV7-NEXT:    cmp r4, #0
 ; THUMBV7-NEXT:    it ne
 ; THUMBV7-NEXT:    movne r4, #1
-; THUMBV7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; THUMBV7-NEXT:    ands r0, r4
-; THUMBV7-NEXT:    movs r4, #0
-; THUMBV7-NEXT:    adds r5, r5, r2
-; THUMBV7-NEXT:    str.w r5, [r9, #4]
-; THUMBV7-NEXT:    orr.w r0, r0, r1
-; THUMBV7-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
-; THUMBV7-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
-; THUMBV7-NEXT:    and.w r5, r10, r7
-; THUMBV7-NEXT:    orr.w r0, r0, r12
-; THUMBV7-NEXT:    mov.w r12, #0
-; THUMBV7-NEXT:    add r1, r2
-; THUMBV7-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
-; THUMBV7-NEXT:    adcs r2, r6
-; THUMBV7-NEXT:    ldr r6, [sp, #28] @ 4-byte Reload
-; THUMBV7-NEXT:    adc r7, r4, #0
-; THUMBV7-NEXT:    adds r1, r1, r6
-; THUMBV7-NEXT:    umlal r2, r7, r3, r8
-; THUMBV7-NEXT:    adc r4, r4, #0
-; THUMBV7-NEXT:    orrs r0, r4
-; THUMBV7-NEXT:    orrs r0, r5
-; THUMBV7-NEXT:    ldrd r5, r4, [sp, #36] @ 8-byte Folded Reload
-; THUMBV7-NEXT:    adds r5, r5, r4
-; THUMBV7-NEXT:    ldr r4, [sp, #20] @ 4-byte Reload
-; THUMBV7-NEXT:    adcs r1, r4
-; THUMBV7-NEXT:    ldr r4, [sp, #4] @ 4-byte Reload
-; THUMBV7-NEXT:    cmp r4, #0
+; THUMBV7-NEXT:    cmp.w r9, #0
+; THUMBV7-NEXT:    it ne
+; THUMBV7-NEXT:    movne.w r9, #1
+; THUMBV7-NEXT:    ldr.w r11, [sp, #4] @ 4-byte Reload
+; THUMBV7-NEXT:    and.w r4, r4, r9
+; THUMBV7-NEXT:    adds.w r0, r0, r11
+; THUMBV7-NEXT:    str.w r0, [r12, #4]
+; THUMBV7-NEXT:    orr.w r4, r4, r7
+; THUMBV7-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
+; THUMBV7-NEXT:    ldr r2, [sp] @ 4-byte Reload
+; THUMBV7-NEXT:    orr.w r4, r4, r5
+; THUMBV7-NEXT:    ldr r5, [sp, #84]
+; THUMBV7-NEXT:    adcs r7, r6
+; THUMBV7-NEXT:    add r2, lr
+; THUMBV7-NEXT:    mov.w lr, #0
+; THUMBV7-NEXT:    adc r6, lr, #0
+; THUMBV7-NEXT:    and.w r0, r10, r8
+; THUMBV7-NEXT:    umlal r7, r6, r3, r5
+; THUMBV7-NEXT:    ldr r5, [sp, #12] @ 4-byte Reload
+; THUMBV7-NEXT:    mov.w r10, #0
+; THUMBV7-NEXT:    adds.w r9, r5, r2
+; THUMBV7-NEXT:    adc r5, lr, #0
+; THUMBV7-NEXT:    orrs r5, r4
+; THUMBV7-NEXT:    orr.w lr, r0, r5
+; THUMBV7-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; THUMBV7-NEXT:    subs r5, r0, #0
+; THUMBV7-NEXT:    mov r4, r5
 ; THUMBV7-NEXT:    it ne
 ; THUMBV7-NEXT:    movne r4, #1
+; THUMBV7-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
+; THUMBV7-NEXT:    subs r0, #0
+; THUMBV7-NEXT:    add r5, r0
+; THUMBV7-NEXT:    it ne
+; THUMBV7-NEXT:    movne r0, #1
+; THUMBV7-NEXT:    ldr r2, [sp, #24] @ 4-byte Reload
+; THUMBV7-NEXT:    adds.w r11, r2, r5
+; THUMBV7-NEXT:    ldrd r5, r2, [sp, #28] @ 8-byte Folded Reload
+; THUMBV7-NEXT:    adc r8, r10, #0
+; THUMBV7-NEXT:    adds r2, r2, r5
+; THUMBV7-NEXT:    adc.w r5, r9, r11
 ; THUMBV7-NEXT:    cmp r3, #0
 ; THUMBV7-NEXT:    it ne
 ; THUMBV7-NEXT:    movne r3, #1
-; THUMBV7-NEXT:    cmp.w lr, #0
-; THUMBV7-NEXT:    it ne
-; THUMBV7-NEXT:    movne.w lr, #1
-; THUMBV7-NEXT:    cmp.w r11, #0
+; THUMBV7-NEXT:    cmp r1, #0
 ; THUMBV7-NEXT:    it ne
-; THUMBV7-NEXT:    movne.w r11, #1
-; THUMBV7-NEXT:    adds r2, r2, r5
-; THUMBV7-NEXT:    and.w r3, r3, lr
-; THUMBV7-NEXT:    str.w r2, [r9, #8]
-; THUMBV7-NEXT:    adcs r1, r7
-; THUMBV7-NEXT:    str.w r1, [r9, #12]
-; THUMBV7-NEXT:    orr.w r1, r3, r11
-; THUMBV7-NEXT:    ldr r2, [sp, #32] @ 4-byte Reload
+; THUMBV7-NEXT:    movne r1, #1
+; THUMBV7-NEXT:    adds r2, r2, r7
+; THUMBV7-NEXT:    and.w r1, r1, r3
+; THUMBV7-NEXT:    str.w r2, [r12, #8]
 ; THUMBV7-NEXT:    orr.w r1, r1, r4
-; THUMBV7-NEXT:    orr.w r1, r1, r2
+; THUMBV7-NEXT:    adcs.w r2, r6, r5
 ; THUMBV7-NEXT:    orr.w r0, r0, r1
-; THUMBV7-NEXT:    adc r1, r12, #0
+; THUMBV7-NEXT:    adc r1, r10, #0
+; THUMBV7-NEXT:    orr.w r0, r0, r8
+; THUMBV7-NEXT:    str.w r2, [r12, #12]
+; THUMBV7-NEXT:    orr.w r0, r0, lr
 ; THUMBV7-NEXT:    orrs r0, r1
 ; THUMBV7-NEXT:    and r0, r0, #1
-; THUMBV7-NEXT:    strb.w r0, [r9, #16]
-; THUMBV7-NEXT:    add sp, #44
+; THUMBV7-NEXT:    strb.w r0, [r12, #16]
+; THUMBV7-NEXT:    add sp, #36
 ; THUMBV7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 start:
   %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
diff --git a/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll
index 55e917159fce9..167ffec6be119 100644
--- a/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll
@@ -4,32 +4,33 @@
 define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 {
 ; THUMBV7-LABEL: mulodi_test:
 ; THUMBV7:       @ %bb.0: @ %start
-; THUMBV7-NEXT:    .save {r4, r5, r7, lr}
-; THUMBV7-NEXT:    push {r4, r5, r7, lr}
-; THUMBV7-NEXT:    umull r12, lr, r3, r0
+; THUMBV7-NEXT:    .save {r4, lr}
+; THUMBV7-NEXT:    push {r4, lr}
+; THUMBV7-NEXT:    mul r12, r1, r2
 ; THUMBV7-NEXT:    cmp r3, #0
+; THUMBV7-NEXT:    mul lr, r3, r0
 ; THUMBV7-NEXT:    it ne
 ; THUMBV7-NEXT:    movne r3, #1
 ; THUMBV7-NEXT:    cmp r1, #0
-; THUMBV7-NEXT:    umull r0, r4, r0, r2
-; THUMBV7-NEXT:    umull r2, r5, r1, r2
 ; THUMBV7-NEXT:    it ne
 ; THUMBV7-NEXT:    movne r1, #1
+; THUMBV7-NEXT:    umull r0, r4, r0, r2
 ; THUMBV7-NEXT:    ands r1, r3
-; THUMBV7-NEXT:    cmp r5, #0
+; THUMBV7-NEXT:    subs.w r2, r12, #0
+; THUMBV7-NEXT:    mov.w r12, #0
+; THUMBV7-NEXT:    mov r3, r2
 ; THUMBV7-NEXT:    it ne
-; THUMBV7-NEXT:    movne r5, #1
-; THUMBV7-NEXT:    orrs r1, r5
-; THUMBV7-NEXT:    cmp.w lr, #0
+; THUMBV7-NEXT:    movne r3, #1
+; THUMBV7-NEXT:    orrs r1, r3
+; THUMBV7-NEXT:    subs.w r3, lr, #0
+; THUMBV7-NEXT:    add r2, r3
 ; THUMBV7-NEXT:    it ne
-; THUMBV7-NEXT:    movne.w lr, #1
-; THUMBV7-NEXT:    orr.w r3, r1, lr
-; THUMBV7-NEXT:    add.w r1, r2, r12
-; THUMBV7-NEXT:    movs r2, #0
-; THUMBV7-NEXT:    adds r1, r1, r4
-; THUMBV7-NEXT:    adc r2, r2, #0
+; THUMBV7-NEXT:    movne r3, #1
+; THUMBV7-NEXT:    orrs r3, r1
+; THUMBV7-NEXT:    adds r1, r4, r2
+; THUMBV7-NEXT:    adc r2, r12, #0
 ; THUMBV7-NEXT:    orrs r2, r3
-; THUMBV7-NEXT:    pop {r4, r5, r7, pc}
+; THUMBV7-NEXT:    pop {r4, pc}
 start:
   %0 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %l, i64 %r) #2
   %1 = extractvalue { i64, i1 } %0, 0



More information about the llvm-commits mailing list