[llvm] 7897086 - Do not generate calls to the 128-bit function __multi3() on 32-bit ARM

Renato Golin via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 11 03:46:23 PDT 2021


Author: Koutheir Attouchi
Date: 2021-06-11T11:45:21+01:00
New Revision: 789708617d2023dbb343c65120556b2e6a97044d

URL: https://github.com/llvm/llvm-project/commit/789708617d2023dbb343c65120556b2e6a97044d
DIFF: https://github.com/llvm/llvm-project/commit/789708617d2023dbb343c65120556b2e6a97044d.diff

LOG: Do not generate calls to the 128-bit function __multi3() on 32-bit ARM

Re-applying this patch after bots failures. Should be fine now.

The function __multi3() is undefined on 32-bit ARM, so a call to it should
never be emitted. Instead, plain instructions need to be generated to
perform 128-bit multiplications.

Differential Revision: https://reviews.llvm.org/D103906

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMISelLowering.cpp
    llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
    llvm/test/CodeGen/Thumb/pr35836_2.ll
    llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll
    llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 52b18e92661c0..81c0565f4813f 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -544,6 +544,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   setLibcallName(RTLIB::SHL_I128, nullptr);
   setLibcallName(RTLIB::SRL_I128, nullptr);
   setLibcallName(RTLIB::SRA_I128, nullptr);
+  setLibcallName(RTLIB::MUL_I128, nullptr);
 
   // RTLIB
   if (Subtarget->isAAPCS_ABI() &&

diff  --git a/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
index c237a396bf914..d488599a42b40 100644
--- a/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
@@ -7,190 +7,208 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; ARMV6:       @ %bb.0: @ %start
 ; ARMV6-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; ARMV6-NEXT:    sub sp, sp, #28
-; ARMV6-NEXT:    mov r9, #0
-; ARMV6-NEXT:    mov r11, r0
+; ARMV6-NEXT:    ldr r7, [sp, #72]
+; ARMV6-NEXT:    mov r6, r0
+; ARMV6-NEXT:    str r0, [sp, #8]            @ 4-byte Spill
+; ARMV6-NEXT:    ldr r4, [sp, #84]
+; ARMV6-NEXT:    umull r1, r0, r2, r7
+; ARMV6-NEXT:    mov lr, r7
+; ARMV6-NEXT:    umull r5, r10, r4, r2
+; ARMV6-NEXT:    str r1, [r6]
+; ARMV6-NEXT:    ldr r6, [sp, #80]
+; ARMV6-NEXT:    umull r1, r7, r3, r6
+; ARMV6-NEXT:    str r7, [sp, #12]           @ 4-byte Spill
+; ARMV6-NEXT:    add r1, r5, r1
+; ARMV6-NEXT:    umull r7, r5, r6, r2
+; ARMV6-NEXT:    mov r6, lr
+; ARMV6-NEXT:    str r7, [sp, #16]           @ 4-byte Spill
+; ARMV6-NEXT:    mov r7, #0
+; ARMV6-NEXT:    adds r1, r5, r1
+; ARMV6-NEXT:    str r1, [sp, #4]            @ 4-byte Spill
+; ARMV6-NEXT:    adc r1, r7, #0
+; ARMV6-NEXT:    str r1, [sp, #24]           @ 4-byte Spill
+; ARMV6-NEXT:    ldr r1, [sp, #64]
 ; ARMV6-NEXT:    ldr r7, [sp, #76]
-; ARMV6-NEXT:    mov r5, r3
-; ARMV6-NEXT:    ldr r10, [sp, #72]
-; ARMV6-NEXT:    mov r1, r3
-; ARMV6-NEXT:    mov r6, r2
-; ARMV6-NEXT:    mov r0, r2
+; ARMV6-NEXT:    ldr r5, [sp, #64]
+; ARMV6-NEXT:    umull r12, r9, r7, r1
+; ARMV6-NEXT:    ldr r1, [sp, #68]
+; ARMV6-NEXT:    umull r11, r8, r1, lr
+; ARMV6-NEXT:    add r12, r11, r12
+; ARMV6-NEXT:    umull r11, lr, r5, lr
+; ARMV6-NEXT:    mov r5, r6
+; ARMV6-NEXT:    mov r6, #0
+; ARMV6-NEXT:    adds r12, lr, r12
+; ARMV6-NEXT:    umull r2, lr, r2, r7
+; ARMV6-NEXT:    adc r6, r6, #0
+; ARMV6-NEXT:    str r6, [sp, #20]           @ 4-byte Spill
+; ARMV6-NEXT:    ldr r6, [sp, #16]           @ 4-byte Reload
+; ARMV6-NEXT:    adds r11, r11, r6
+; ARMV6-NEXT:    ldr r6, [sp, #4]            @ 4-byte Reload
+; ARMV6-NEXT:    adc r6, r12, r6
+; ARMV6-NEXT:    mov r12, #0
+; ARMV6-NEXT:    umlal r0, r12, r3, r5
+; ARMV6-NEXT:    ldr r5, [sp, #8]            @ 4-byte Reload
+; ARMV6-NEXT:    str r6, [sp, #16]           @ 4-byte Spill
+; ARMV6-NEXT:    ldr r6, [sp, #64]
+; ARMV6-NEXT:    adds r0, r2, r0
+; ARMV6-NEXT:    str r0, [r5, #4]
+; ARMV6-NEXT:    adcs r0, r12, lr
 ; ARMV6-NEXT:    mov r2, #0
-; ARMV6-NEXT:    mov r3, #0
-; ARMV6-NEXT:    str r9, [sp, #12]
-; ARMV6-NEXT:    str r9, [sp, #8]
-; ARMV6-NEXT:    str r7, [sp, #4]
-; ARMV6-NEXT:    str r10, [sp]
-; ARMV6-NEXT:    bl __multi3
-; ARMV6-NEXT:    str r3, [sp, #20] @ 4-byte Spill
-; ARMV6-NEXT:    str r2, [sp, #16] @ 4-byte Spill
-; ARMV6-NEXT:    stm r11, {r0, r1}
-; ARMV6-NEXT:    ldr r0, [sp, #84]
-; ARMV6-NEXT:    ldr r3, [sp, #80]
-; ARMV6-NEXT:    ldr r8, [sp, #64]
-; ARMV6-NEXT:    umull r4, r0, r0, r6
-; ARMV6-NEXT:    umull r2, r1, r5, r3
-; ARMV6-NEXT:    add r2, r4, r2
-; ARMV6-NEXT:    umull lr, r4, r3, r6
-; ARMV6-NEXT:    umull r3, r6, r7, r8
-; ARMV6-NEXT:    adds r12, r4, r2
-; ARMV6-NEXT:    adc r2, r9, #0
-; ARMV6-NEXT:    str r2, [sp, #24] @ 4-byte Spill
-; ARMV6-NEXT:    ldr r2, [sp, #68]
-; ARMV6-NEXT:    umull r4, r2, r2, r10
-; ARMV6-NEXT:    add r3, r4, r3
-; ARMV6-NEXT:    umull r4, r10, r8, r10
-; ARMV6-NEXT:    adds r3, r10, r3
-; ARMV6-NEXT:    adc r10, r9, #0
-; ARMV6-NEXT:    adds r4, r4, lr
-; ARMV6-NEXT:    adc r12, r3, r12
-; ARMV6-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
-; ARMV6-NEXT:    adds r4, r3, r4
-; ARMV6-NEXT:    str r4, [r11, #8]
-; ARMV6-NEXT:    ldr r4, [sp, #20] @ 4-byte Reload
-; ARMV6-NEXT:    adcs r3, r4, r12
-; ARMV6-NEXT:    str r3, [r11, #12]
-; ARMV6-NEXT:    ldr r3, [sp, #84]
-; ARMV6-NEXT:    adc r12, r9, #0
-; ARMV6-NEXT:    cmp r5, #0
-; ARMV6-NEXT:    movne r5, #1
-; ARMV6-NEXT:    cmp r3, #0
-; ARMV6-NEXT:    mov r4, r3
-; ARMV6-NEXT:    movne r4, #1
-; ARMV6-NEXT:    cmp r0, #0
-; ARMV6-NEXT:    movne r0, #1
+; ARMV6-NEXT:    adc r2, r2, #0
+; ARMV6-NEXT:    orrs lr, r6, r1
+; ARMV6-NEXT:    ldr r6, [sp, #80]
+; ARMV6-NEXT:    movne lr, #1
+; ARMV6-NEXT:    umlal r0, r2, r3, r7
+; ARMV6-NEXT:    orrs r12, r6, r4
+; ARMV6-NEXT:    movne r12, #1
+; ARMV6-NEXT:    cmp r9, #0
+; ARMV6-NEXT:    ldr r6, [sp, #12]           @ 4-byte Reload
+; ARMV6-NEXT:    movne r9, #1
+; ARMV6-NEXT:    cmp r8, #0
+; ARMV6-NEXT:    movne r8, #1
+; ARMV6-NEXT:    cmp r6, #0
+; ARMV6-NEXT:    movne r6, #1
+; ARMV6-NEXT:    cmp r10, #0
+; ARMV6-NEXT:    movne r10, #1
 ; ARMV6-NEXT:    cmp r1, #0
-; ARMV6-NEXT:    and r5, r4, r5
 ; ARMV6-NEXT:    movne r1, #1
-; ARMV6-NEXT:    orr r0, r5, r0
-; ARMV6-NEXT:    ldr r5, [sp, #68]
-; ARMV6-NEXT:    orr r0, r0, r1
-; ARMV6-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
 ; ARMV6-NEXT:    cmp r7, #0
-; ARMV6-NEXT:    orr r0, r0, r1
 ; ARMV6-NEXT:    movne r7, #1
-; ARMV6-NEXT:    cmp r5, #0
-; ARMV6-NEXT:    mov r1, r5
-; ARMV6-NEXT:    movne r1, #1
-; ARMV6-NEXT:    cmp r2, #0
-; ARMV6-NEXT:    movne r2, #1
+; ARMV6-NEXT:    cmp r4, #0
+; ARMV6-NEXT:    movne r4, #1
+; ARMV6-NEXT:    cmp r3, #0
+; ARMV6-NEXT:    movne r3, #1
+; ARMV6-NEXT:    adds r0, r0, r11
+; ARMV6-NEXT:    str r0, [r5, #8]
 ; ARMV6-NEXT:    and r1, r1, r7
+; ARMV6-NEXT:    ldr r0, [sp, #16]           @ 4-byte Reload
+; ARMV6-NEXT:    orr r1, r1, r8
+; ARMV6-NEXT:    orr r1, r1, r9
+; ARMV6-NEXT:    adcs r0, r2, r0
+; ARMV6-NEXT:    str r0, [r5, #12]
+; ARMV6-NEXT:    and r0, r4, r3
+; ARMV6-NEXT:    ldr r2, [sp, #24]           @ 4-byte Reload
+; ARMV6-NEXT:    orr r0, r0, r10
+; ARMV6-NEXT:    orr r0, r0, r6
+; ARMV6-NEXT:    orr r0, r0, r2
+; ARMV6-NEXT:    ldr r2, [sp, #20]           @ 4-byte Reload
 ; ARMV6-NEXT:    orr r1, r1, r2
-; ARMV6-NEXT:    ldr r2, [sp, #80]
-; ARMV6-NEXT:    cmp r6, #0
-; ARMV6-NEXT:    movne r6, #1
-; ARMV6-NEXT:    orrs r2, r2, r3
-; ARMV6-NEXT:    orr r1, r1, r6
-; ARMV6-NEXT:    movne r2, #1
-; ARMV6-NEXT:    orrs r7, r8, r5
-; ARMV6-NEXT:    orr r1, r1, r10
-; ARMV6-NEXT:    movne r7, #1
-; ARMV6-NEXT:    and r2, r7, r2
+; ARMV6-NEXT:    and r2, lr, r12
 ; ARMV6-NEXT:    orr r1, r2, r1
 ; ARMV6-NEXT:    orr r0, r1, r0
-; ARMV6-NEXT:    orr r0, r0, r12
+; ARMV6-NEXT:    mov r1, #0
+; ARMV6-NEXT:    adc r1, r1, #0
+; ARMV6-NEXT:    orr r0, r0, r1
 ; ARMV6-NEXT:    and r0, r0, #1
-; ARMV6-NEXT:    strb r0, [r11, #16]
+; ARMV6-NEXT:    strb r0, [r5, #16]
 ; ARMV6-NEXT:    add sp, sp, #28
 ; ARMV6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; ARMV7-LABEL: muloti_test:
 ; ARMV7:       @ %bb.0: @ %start
 ; ARMV7-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; ARMV7-NEXT:    sub sp, sp, #44
-; ARMV7-NEXT:    str r0, [sp, #40] @ 4-byte Spill
+; ARMV7-NEXT:    sub sp, sp, #36
+; ARMV7-NEXT:    ldr r5, [sp, #84]
+; ARMV7-NEXT:    mov r8, r0
+; ARMV7-NEXT:    ldr r1, [sp, #72]
+; ARMV7-NEXT:    ldr r10, [sp, #80]
+; ARMV7-NEXT:    ldr r9, [sp, #76]
+; ARMV7-NEXT:    umull r4, lr, r5, r1
+; ARMV7-NEXT:    umull r0, r7, r2, r10
+; ARMV7-NEXT:    str r4, [sp, #24]           @ 4-byte Spill
+; ARMV7-NEXT:    ldr r4, [sp, #88]
+; ARMV7-NEXT:    umull r1, r6, r1, r10
+; ARMV7-NEXT:    str r0, [sp, #32]           @ 4-byte Spill
+; ARMV7-NEXT:    umull r11, r0, r2, r5
+; ARMV7-NEXT:    str r6, [sp, #20]           @ 4-byte Spill
+; ARMV7-NEXT:    str r1, [sp, #28]           @ 4-byte Spill
+; ARMV7-NEXT:    umull r6, r12, r3, r4
+; ARMV7-NEXT:    ldr r1, [sp, #92]
+; ARMV7-NEXT:    str r0, [sp, #8]            @ 4-byte Spill
 ; ARMV7-NEXT:    mov r0, #0
-; ARMV7-NEXT:    ldr r8, [sp, #88]
-; ARMV7-NEXT:    mov r5, r3
-; ARMV7-NEXT:    ldr r7, [sp, #92]
-; ARMV7-NEXT:    mov r1, r3
-; ARMV7-NEXT:    mov r6, r2
-; ARMV7-NEXT:    str r0, [sp, #8]
-; ARMV7-NEXT:    str r0, [sp, #12]
-; ARMV7-NEXT:    mov r0, r2
-; ARMV7-NEXT:    mov r2, #0
-; ARMV7-NEXT:    mov r3, #0
-; ARMV7-NEXT:    str r8, [sp]
-; ARMV7-NEXT:    str r7, [sp, #4]
-; ARMV7-NEXT:    bl __multi3
-; ARMV7-NEXT:    str r1, [sp, #28] @ 4-byte Spill
-; ARMV7-NEXT:    ldr r1, [sp, #80]
-; ARMV7-NEXT:    str r2, [sp, #24] @ 4-byte Spill
-; ARMV7-NEXT:    str r3, [sp, #20] @ 4-byte Spill
-; ARMV7-NEXT:    umull r2, r9, r7, r1
-; ARMV7-NEXT:    str r0, [sp, #32] @ 4-byte Spill
-; ARMV7-NEXT:    ldr r4, [sp, #84]
-; ARMV7-NEXT:    ldr r0, [sp, #96]
-; ARMV7-NEXT:    umull r1, r3, r1, r8
-; ARMV7-NEXT:    umull r12, r10, r4, r8
-; ARMV7-NEXT:    str r1, [sp, #16] @ 4-byte Spill
-; ARMV7-NEXT:    umull lr, r1, r5, r0
-; ARMV7-NEXT:    add r2, r12, r2
-; ARMV7-NEXT:    umull r11, r8, r0, r6
-; ARMV7-NEXT:    ldr r0, [sp, #100]
-; ARMV7-NEXT:    adds r2, r3, r2
-; ARMV7-NEXT:    mov r12, #0
-; ARMV7-NEXT:    umull r6, r0, r0, r6
-; ARMV7-NEXT:    adc r3, r12, #0
-; ARMV7-NEXT:    str r3, [sp, #36] @ 4-byte Spill
-; ARMV7-NEXT:    add r3, r6, lr
-; ARMV7-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
-; ARMV7-NEXT:    adds r3, r8, r3
-; ARMV7-NEXT:    adc lr, r12, #0
-; ARMV7-NEXT:    adds r6, r6, r11
-; ARMV7-NEXT:    adc r2, r2, r3
-; ARMV7-NEXT:    ldr r3, [sp, #24] @ 4-byte Reload
-; ARMV7-NEXT:    mov r12, #0
-; ARMV7-NEXT:    adds r3, r3, r6
-; ARMV7-NEXT:    ldr r6, [sp, #20] @ 4-byte Reload
-; ARMV7-NEXT:    adcs r8, r6, r2
-; ARMV7-NEXT:    ldr r6, [sp, #40] @ 4-byte Reload
-; ARMV7-NEXT:    ldr r2, [sp, #32] @ 4-byte Reload
-; ARMV7-NEXT:    str r2, [r6]
-; ARMV7-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
-; ARMV7-NEXT:    stmib r6, {r2, r3, r8}
-; ARMV7-NEXT:    adc r8, r12, #0
-; ARMV7-NEXT:    cmp r5, #0
-; ARMV7-NEXT:    ldr r2, [sp, #100]
-; ARMV7-NEXT:    movwne r5, #1
-; ARMV7-NEXT:    cmp r2, #0
-; ARMV7-NEXT:    mov r3, r2
+; ARMV7-NEXT:    umlal r7, r0, r3, r10
+; ARMV7-NEXT:    str r6, [sp, #16]           @ 4-byte Spill
+; ARMV7-NEXT:    umull r6, r1, r1, r2
+; ARMV7-NEXT:    umull r2, r4, r4, r2
+; ARMV7-NEXT:    str r6, [sp, #4]            @ 4-byte Spill
+; ARMV7-NEXT:    str r2, [sp, #12]           @ 4-byte Spill
+; ARMV7-NEXT:    adds r2, r11, r7
+; ARMV7-NEXT:    ldr r7, [sp, #8]            @ 4-byte Reload
+; ARMV7-NEXT:    mov r11, #0
+; ARMV7-NEXT:    str r4, [sp]                @ 4-byte Spill
+; ARMV7-NEXT:    umull r6, r4, r9, r10
+; ARMV7-NEXT:    adcs r9, r0, r7
+; ARMV7-NEXT:    ldr r0, [sp, #32]           @ 4-byte Reload
+; ARMV7-NEXT:    adc r10, r11, #0
+; ARMV7-NEXT:    stm r8, {r0, r2}
+; ARMV7-NEXT:    ldr r0, [sp, #24]           @ 4-byte Reload
+; ARMV7-NEXT:    umlal r9, r10, r3, r5
+; ARMV7-NEXT:    ldr r2, [sp, #20]           @ 4-byte Reload
+; ARMV7-NEXT:    add r0, r6, r0
+; ARMV7-NEXT:    adds r2, r2, r0
+; ARMV7-NEXT:    ldr r6, [sp, #4]            @ 4-byte Reload
+; ARMV7-NEXT:    adc r0, r11, #0
+; ARMV7-NEXT:    str r0, [sp, #32]           @ 4-byte Spill
+; ARMV7-NEXT:    ldr r0, [sp, #16]           @ 4-byte Reload
+; ARMV7-NEXT:    ldr r7, [sp, #28]           @ 4-byte Reload
+; ARMV7-NEXT:    add r0, r6, r0
+; ARMV7-NEXT:    ldr r6, [sp]                @ 4-byte Reload
+; ARMV7-NEXT:    adds r0, r6, r0
+; ARMV7-NEXT:    ldr r6, [sp, #12]           @ 4-byte Reload
+; ARMV7-NEXT:    adc r11, r11, #0
+; ARMV7-NEXT:    adds r7, r7, r6
+; ARMV7-NEXT:    ldr r6, [sp, #92]
+; ARMV7-NEXT:    adc r0, r2, r0
+; ARMV7-NEXT:    str r0, [sp, #28]           @ 4-byte Spill
+; ARMV7-NEXT:    ldr r0, [sp, #92]
+; ARMV7-NEXT:    cmp r3, #0
 ; ARMV7-NEXT:    movwne r3, #1
+; ARMV7-NEXT:    ldr r2, [sp, #76]
 ; ARMV7-NEXT:    cmp r0, #0
 ; ARMV7-NEXT:    movwne r0, #1
 ; ARMV7-NEXT:    cmp r1, #0
-; ARMV7-NEXT:    and r3, r3, r5
 ; ARMV7-NEXT:    movwne r1, #1
-; ARMV7-NEXT:    orr r0, r3, r0
-; ARMV7-NEXT:    cmp r7, #0
+; ARMV7-NEXT:    cmp r12, #0
+; ARMV7-NEXT:    and r0, r0, r3
+; ARMV7-NEXT:    movwne r12, #1
+; ARMV7-NEXT:    cmp r5, #0
 ; ARMV7-NEXT:    orr r0, r0, r1
-; ARMV7-NEXT:    ldr r1, [sp, #80]
-; ARMV7-NEXT:    movwne r7, #1
+; ARMV7-NEXT:    movwne r5, #1
+; ARMV7-NEXT:    cmp r2, #0
+; ARMV7-NEXT:    mov r1, r2
+; ARMV7-NEXT:    mov r3, r2
+; ARMV7-NEXT:    movwne r1, #1
 ; ARMV7-NEXT:    cmp r4, #0
-; ARMV7-NEXT:    orr r1, r1, r4
+; ARMV7-NEXT:    ldr r2, [sp, #72]
 ; ARMV7-NEXT:    movwne r4, #1
-; ARMV7-NEXT:    cmp r10, #0
-; ARMV7-NEXT:    and r3, r4, r7
-; ARMV7-NEXT:    movwne r10, #1
-; ARMV7-NEXT:    cmp r9, #0
-; ARMV7-NEXT:    orr r3, r3, r10
-; ARMV7-NEXT:    ldr r7, [sp, #36] @ 4-byte Reload
-; ARMV7-NEXT:    movwne r9, #1
-; ARMV7-NEXT:    orr r3, r3, r9
-; ARMV7-NEXT:    orr r3, r3, r7
-; ARMV7-NEXT:    ldr r7, [sp, #96]
-; ARMV7-NEXT:    orr r0, r0, lr
-; ARMV7-NEXT:    orrs r7, r7, r2
-; ARMV7-NEXT:    movwne r7, #1
-; ARMV7-NEXT:    cmp r1, #0
-; ARMV7-NEXT:    movwne r1, #1
-; ARMV7-NEXT:    and r1, r1, r7
-; ARMV7-NEXT:    orr r1, r1, r3
+; ARMV7-NEXT:    cmp lr, #0
+; ARMV7-NEXT:    and r1, r1, r5
+; ARMV7-NEXT:    movwne lr, #1
+; ARMV7-NEXT:    orrs r2, r2, r3
+; ARMV7-NEXT:    ldr r3, [sp, #88]
+; ARMV7-NEXT:    movwne r2, #1
+; ARMV7-NEXT:    orr r1, r1, r4
+; ARMV7-NEXT:    orr r0, r0, r12
+; ARMV7-NEXT:    orrs r3, r3, r6
+; ARMV7-NEXT:    orr r1, r1, lr
+; ARMV7-NEXT:    movwne r3, #1
+; ARMV7-NEXT:    adds r7, r9, r7
+; ARMV7-NEXT:    str r7, [r8, #8]
+; ARMV7-NEXT:    and r2, r2, r3
+; ARMV7-NEXT:    ldr r7, [sp, #28]           @ 4-byte Reload
+; ARMV7-NEXT:    orr r0, r0, r11
+; ARMV7-NEXT:    adcs r7, r10, r7
+; ARMV7-NEXT:    str r7, [r8, #12]
+; ARMV7-NEXT:    ldr r7, [sp, #32]           @ 4-byte Reload
+; ARMV7-NEXT:    orr r1, r1, r7
+; ARMV7-NEXT:    orr r1, r2, r1
 ; ARMV7-NEXT:    orr r0, r1, r0
-; ARMV7-NEXT:    orr r0, r0, r8
+; ARMV7-NEXT:    mov r1, #0
+; ARMV7-NEXT:    adc r1, r1, #0
+; ARMV7-NEXT:    orr r0, r0, r1
 ; ARMV7-NEXT:    and r0, r0, #1
-; ARMV7-NEXT:    strb r0, [r6, #16]
-; ARMV7-NEXT:    add sp, sp, #44
+; ARMV7-NEXT:    strb r0, [r8, #16]
+; ARMV7-NEXT:    add sp, sp, #36
 ; ARMV7-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 start:
   %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2

diff  --git a/llvm/test/CodeGen/Thumb/pr35836_2.ll b/llvm/test/CodeGen/Thumb/pr35836_2.ll
index 26e27293dc784..3738f0c684958 100644
--- a/llvm/test/CodeGen/Thumb/pr35836_2.ll
+++ b/llvm/test/CodeGen/Thumb/pr35836_2.ll
@@ -36,21 +36,20 @@ entry:
   %mul = mul i128 %add18, %add
   ret i128 %mul
 }
-; CHECK: adds	r4, r2, r7
-; CHECK: mov	r4, r1
-; CHECK: adcs	r4, r6
-; CHECK: ldr	r4, [sp, #20]           @ 4-byte Reload
-; CHECK: adcs	r5, r4
-; CHECK: ldr	r4, [sp, #24]           @ 4-byte Reload
-; CHECK: adcs	r3, r4
-; CHECK: adds	r4, r2, r7
-; CHECK: adcs	r1, r6
-; CHECK: str	r4, [sp]
-; CHECK: str	r1, [sp, #4]
-; CHECK: ldr	r2, [r0, #16]
-; CHECK: ldr	r6, [r0, #24]
-; CHECK: adcs	r6, r2
-; CHECK: str	r6, [sp, #8]
-; CHECK: ldr	r2, [r0, #20]
-; CHECK: ldr	r0, [r0, #28]
-; CHECK: adcs	r0, r2
+; CHECK: adds r5, r1, r7
+; CHECK: mov r5, r4
+; CHECK: adcs r5, r6
+; CHECK: ldr r5, [sp, #12]                   @ 4-byte Reload
+; CHECK: adcs r2, r5
+; CHECK: ldr r5, [sp, #16]                   @ 4-byte Reload
+; CHECK: adcs r3, r5
+; CHECK: adds r5, r1, r7
+; CHECK: adcs r4, r6
+; CHECK: ldr r1, [r0, #20]
+; CHECK: str r1, [sp, #16]                   @ 4-byte Spill
+; CHECK: ldr r6, [r0, #28]
+; CHECK: ldr r1, [r0, #16]
+; CHECK: ldr r7, [r0, #24]
+; CHECK: adcs r7, r1
+; CHECK: ldr r0, [sp, #16]                   @ 4-byte Reload
+; CHECK: adcs r6, r0

diff  --git a/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll
index 8b966b536c944..9b5fa1c2bc811 100644
--- a/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll
@@ -6,140 +6,197 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; THUMBV6:       @ %bb.0: @ %start
 ; THUMBV6-NEXT:    .save {r4, r5, r6, r7, lr}
 ; THUMBV6-NEXT:    push {r4, r5, r6, r7, lr}
-; THUMBV6-NEXT:    .pad #68
-; THUMBV6-NEXT:    sub sp, #68
-; THUMBV6-NEXT:    mov r4, r3
-; THUMBV6-NEXT:    str r2, [sp, #56] @ 4-byte Spill
-; THUMBV6-NEXT:    mov r6, r0
+; THUMBV6-NEXT:    .pad #60
+; THUMBV6-NEXT:    sub sp, #60
+; THUMBV6-NEXT:    mov r6, r3
+; THUMBV6-NEXT:    mov r1, r2
+; THUMBV6-NEXT:    str r2, [sp, #52] @ 4-byte Spill
+; THUMBV6-NEXT:    mov r4, r0
+; THUMBV6-NEXT:    str r0, [sp, #40] @ 4-byte Spill
+; THUMBV6-NEXT:    ldr r2, [sp, #88]
+; THUMBV6-NEXT:    str r2, [sp, #48] @ 4-byte Spill
 ; THUMBV6-NEXT:    movs r5, #0
-; THUMBV6-NEXT:    str r5, [sp, #12]
-; THUMBV6-NEXT:    str r5, [sp, #8]
-; THUMBV6-NEXT:    ldr r0, [sp, #100]
-; THUMBV6-NEXT:    str r0, [sp, #28] @ 4-byte Spill
-; THUMBV6-NEXT:    str r0, [sp, #4]
-; THUMBV6-NEXT:    ldr r0, [sp, #96]
-; THUMBV6-NEXT:    str r0, [sp, #64] @ 4-byte Spill
-; THUMBV6-NEXT:    str r0, [sp]
-; THUMBV6-NEXT:    mov r0, r2
-; THUMBV6-NEXT:    mov r1, r3
-; THUMBV6-NEXT:    mov r2, r5
+; THUMBV6-NEXT:    mov r0, r1
+; THUMBV6-NEXT:    mov r1, r5
 ; THUMBV6-NEXT:    mov r3, r5
-; THUMBV6-NEXT:    bl __multi3
-; THUMBV6-NEXT:    str r2, [sp, #48] @ 4-byte Spill
-; THUMBV6-NEXT:    str r3, [sp, #52] @ 4-byte Spill
-; THUMBV6-NEXT:    str r6, [sp, #44] @ 4-byte Spill
-; THUMBV6-NEXT:    stm r6!, {r0, r1}
-; THUMBV6-NEXT:    ldr r2, [sp, #104]
-; THUMBV6-NEXT:    str r2, [sp, #60] @ 4-byte Spill
-; THUMBV6-NEXT:    mov r0, r4
+; THUMBV6-NEXT:    bl __aeabi_lmul
+; THUMBV6-NEXT:    str r1, [sp, #28] @ 4-byte Spill
+; THUMBV6-NEXT:    str r0, [r4]
+; THUMBV6-NEXT:    ldr r2, [sp, #96]
+; THUMBV6-NEXT:    str r2, [sp, #36] @ 4-byte Spill
+; THUMBV6-NEXT:    mov r4, r6
+; THUMBV6-NEXT:    str r6, [sp, #56] @ 4-byte Spill
+; THUMBV6-NEXT:    mov r0, r6
 ; THUMBV6-NEXT:    mov r1, r5
 ; THUMBV6-NEXT:    mov r3, r5
 ; THUMBV6-NEXT:    bl __aeabi_lmul
-; THUMBV6-NEXT:    str r0, [sp, #36] @ 4-byte Spill
+; THUMBV6-NEXT:    str r0, [sp, #44] @ 4-byte Spill
 ; THUMBV6-NEXT:    mov r7, r1
 ; THUMBV6-NEXT:    subs r0, r1, #1
 ; THUMBV6-NEXT:    sbcs r7, r0
-; THUMBV6-NEXT:    ldr r6, [sp, #108]
-; THUMBV6-NEXT:    mov r0, r6
+; THUMBV6-NEXT:    ldr r0, [sp, #100]
+; THUMBV6-NEXT:    str r0, [sp, #32] @ 4-byte Spill
 ; THUMBV6-NEXT:    mov r1, r5
-; THUMBV6-NEXT:    ldr r2, [sp, #56] @ 4-byte Reload
+; THUMBV6-NEXT:    ldr r6, [sp, #52] @ 4-byte Reload
+; THUMBV6-NEXT:    mov r2, r6
 ; THUMBV6-NEXT:    mov r3, r5
 ; THUMBV6-NEXT:    bl __aeabi_lmul
+; THUMBV6-NEXT:    str r0, [sp, #24] @ 4-byte Spill
 ; THUMBV6-NEXT:    subs r2, r1, #1
 ; THUMBV6-NEXT:    sbcs r1, r2
 ; THUMBV6-NEXT:    subs r2, r4, #1
+; THUMBV6-NEXT:    mov r3, r4
+; THUMBV6-NEXT:    sbcs r3, r2
+; THUMBV6-NEXT:    ldr r4, [sp, #32] @ 4-byte Reload
+; THUMBV6-NEXT:    subs r2, r4, #1
 ; THUMBV6-NEXT:    sbcs r4, r2
-; THUMBV6-NEXT:    str r6, [sp, #40] @ 4-byte Spill
-; THUMBV6-NEXT:    subs r2, r6, #1
-; THUMBV6-NEXT:    sbcs r6, r2
-; THUMBV6-NEXT:    ands r6, r4
-; THUMBV6-NEXT:    orrs r6, r1
-; THUMBV6-NEXT:    orrs r6, r7
-; THUMBV6-NEXT:    ldr r1, [sp, #36] @ 4-byte Reload
-; THUMBV6-NEXT:    adds r4, r0, r1
-; THUMBV6-NEXT:    ldr r0, [sp, #60] @ 4-byte Reload
+; THUMBV6-NEXT:    ands r4, r3
+; THUMBV6-NEXT:    orrs r4, r1
+; THUMBV6-NEXT:    orrs r4, r7
+; THUMBV6-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; THUMBV6-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
+; THUMBV6-NEXT:    adds r7, r1, r0
+; THUMBV6-NEXT:    ldr r0, [sp, #36] @ 4-byte Reload
 ; THUMBV6-NEXT:    mov r1, r5
-; THUMBV6-NEXT:    ldr r2, [sp, #56] @ 4-byte Reload
+; THUMBV6-NEXT:    mov r2, r6
 ; THUMBV6-NEXT:    mov r3, r5
 ; THUMBV6-NEXT:    bl __aeabi_lmul
-; THUMBV6-NEXT:    str r0, [sp, #36] @ 4-byte Spill
-; THUMBV6-NEXT:    adds r0, r1, r4
-; THUMBV6-NEXT:    str r0, [sp, #32] @ 4-byte Spill
+; THUMBV6-NEXT:    str r0, [sp, #24] @ 4-byte Spill
+; THUMBV6-NEXT:    adds r0, r1, r7
+; THUMBV6-NEXT:    str r0, [sp, #20] @ 4-byte Spill
 ; THUMBV6-NEXT:    mov r0, r5
 ; THUMBV6-NEXT:    adcs r0, r5
-; THUMBV6-NEXT:    orrs r0, r6
-; THUMBV6-NEXT:    str r0, [sp, #24] @ 4-byte Spill
-; THUMBV6-NEXT:    ldr r4, [sp, #88]
-; THUMBV6-NEXT:    ldr r7, [sp, #28] @ 4-byte Reload
-; THUMBV6-NEXT:    mov r0, r7
+; THUMBV6-NEXT:    orrs r0, r4
+; THUMBV6-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; THUMBV6-NEXT:    ldr r0, [sp, #92]
+; THUMBV6-NEXT:    str r0, [sp, #44] @ 4-byte Spill
+; THUMBV6-NEXT:    ldr r7, [sp, #80]
 ; THUMBV6-NEXT:    mov r1, r5
-; THUMBV6-NEXT:    mov r2, r4
+; THUMBV6-NEXT:    mov r2, r7
 ; THUMBV6-NEXT:    mov r3, r5
 ; THUMBV6-NEXT:    bl __aeabi_lmul
-; THUMBV6-NEXT:    str r0, [sp, #20] @ 4-byte Spill
-; THUMBV6-NEXT:    mov r6, r1
+; THUMBV6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; THUMBV6-NEXT:    mov r4, r1
 ; THUMBV6-NEXT:    subs r0, r1, #1
-; THUMBV6-NEXT:    sbcs r6, r0
-; THUMBV6-NEXT:    ldr r0, [sp, #92]
-; THUMBV6-NEXT:    str r0, [sp, #56] @ 4-byte Spill
+; THUMBV6-NEXT:    sbcs r4, r0
+; THUMBV6-NEXT:    ldr r6, [sp, #84]
+; THUMBV6-NEXT:    mov r0, r6
 ; THUMBV6-NEXT:    mov r1, r5
-; THUMBV6-NEXT:    ldr r2, [sp, #64] @ 4-byte Reload
+; THUMBV6-NEXT:    ldr r2, [sp, #48] @ 4-byte Reload
 ; THUMBV6-NEXT:    mov r3, r5
 ; THUMBV6-NEXT:    bl __aeabi_lmul
-; THUMBV6-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; THUMBV6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
 ; THUMBV6-NEXT:    subs r2, r1, #1
 ; THUMBV6-NEXT:    sbcs r1, r2
-; THUMBV6-NEXT:    subs r2, r7, #1
-; THUMBV6-NEXT:    sbcs r7, r2
-; THUMBV6-NEXT:    mov r3, r7
-; THUMBV6-NEXT:    ldr r7, [sp, #56] @ 4-byte Reload
+; THUMBV6-NEXT:    ldr r3, [sp, #44] @ 4-byte Reload
+; THUMBV6-NEXT:    subs r2, r3, #1
+; THUMBV6-NEXT:    sbcs r3, r2
+; THUMBV6-NEXT:    str r6, [sp, #8] @ 4-byte Spill
+; THUMBV6-NEXT:    subs r2, r6, #1
+; THUMBV6-NEXT:    sbcs r6, r2
+; THUMBV6-NEXT:    ands r6, r3
+; THUMBV6-NEXT:    orrs r6, r1
+; THUMBV6-NEXT:    orrs r6, r4
+; THUMBV6-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; THUMBV6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; THUMBV6-NEXT:    adds r0, r1, r0
+; THUMBV6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; THUMBV6-NEXT:    mov r0, r7
+; THUMBV6-NEXT:    mov r1, r5
+; THUMBV6-NEXT:    ldr r4, [sp, #48] @ 4-byte Reload
+; THUMBV6-NEXT:    mov r2, r4
+; THUMBV6-NEXT:    mov r3, r5
+; THUMBV6-NEXT:    bl __aeabi_lmul
+; THUMBV6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; THUMBV6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; THUMBV6-NEXT:    adds r0, r1, r0
+; THUMBV6-NEXT:    mov r1, r5
+; THUMBV6-NEXT:    adcs r1, r5
+; THUMBV6-NEXT:    orrs r1, r6
+; THUMBV6-NEXT:    ldr r3, [sp, #36] @ 4-byte Reload
+; THUMBV6-NEXT:    ldr r2, [sp, #32] @ 4-byte Reload
+; THUMBV6-NEXT:    orrs r3, r2
+; THUMBV6-NEXT:    subs r2, r3, #1
+; THUMBV6-NEXT:    sbcs r3, r2
+; THUMBV6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; THUMBV6-NEXT:    orrs r7, r2
 ; THUMBV6-NEXT:    subs r2, r7, #1
 ; THUMBV6-NEXT:    sbcs r7, r2
 ; THUMBV6-NEXT:    ands r7, r3
 ; THUMBV6-NEXT:    orrs r7, r1
-; THUMBV6-NEXT:    orrs r7, r6
-; THUMBV6-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
 ; THUMBV6-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
-; THUMBV6-NEXT:    adds r6, r1, r0
-; THUMBV6-NEXT:    mov r0, r4
+; THUMBV6-NEXT:    orrs r7, r1
+; THUMBV6-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
+; THUMBV6-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; THUMBV6-NEXT:    adds r1, r2, r1
+; THUMBV6-NEXT:    str r1, [sp, #32] @ 4-byte Spill
+; THUMBV6-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; THUMBV6-NEXT:    adcs r0, r1
+; THUMBV6-NEXT:    str r0, [sp, #36] @ 4-byte Spill
+; THUMBV6-NEXT:    ldr r0, [sp, #56] @ 4-byte Reload
 ; THUMBV6-NEXT:    mov r1, r5
-; THUMBV6-NEXT:    ldr r2, [sp, #64] @ 4-byte Reload
+; THUMBV6-NEXT:    mov r2, r4
+; THUMBV6-NEXT:    mov r3, r5
+; THUMBV6-NEXT:    bl __aeabi_lmul
+; THUMBV6-NEXT:    mov r4, r1
+; THUMBV6-NEXT:    ldr r1, [sp, #28] @ 4-byte Reload
+; THUMBV6-NEXT:    adds r6, r0, r1
+; THUMBV6-NEXT:    adcs r4, r5
+; THUMBV6-NEXT:    ldr r0, [sp, #52] @ 4-byte Reload
+; THUMBV6-NEXT:    mov r1, r5
+; THUMBV6-NEXT:    ldr r2, [sp, #44] @ 4-byte Reload
+; THUMBV6-NEXT:    mov r3, r5
+; THUMBV6-NEXT:    bl __aeabi_lmul
+; THUMBV6-NEXT:    adds r0, r0, r6
+; THUMBV6-NEXT:    ldr r2, [sp, #40] @ 4-byte Reload
+; THUMBV6-NEXT:    str r0, [r2, #4]
+; THUMBV6-NEXT:    adcs r1, r5
+; THUMBV6-NEXT:    adds r0, r4, r1
+; THUMBV6-NEXT:    str r0, [sp, #28] @ 4-byte Spill
+; THUMBV6-NEXT:    mov r6, r5
+; THUMBV6-NEXT:    adcs r6, r5
+; THUMBV6-NEXT:    ldr r0, [sp, #56] @ 4-byte Reload
+; THUMBV6-NEXT:    mov r1, r5
+; THUMBV6-NEXT:    ldr r4, [sp, #44] @ 4-byte Reload
+; THUMBV6-NEXT:    mov r2, r4
 ; THUMBV6-NEXT:    mov r3, r5
 ; THUMBV6-NEXT:    bl __aeabi_lmul
-; THUMBV6-NEXT:    adds r1, r1, r6
+; THUMBV6-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
+; THUMBV6-NEXT:    adds r0, r0, r2
+; THUMBV6-NEXT:    str r0, [sp, #28] @ 4-byte Spill
+; THUMBV6-NEXT:    adcs r1, r6
+; THUMBV6-NEXT:    str r1, [sp, #24] @ 4-byte Spill
+; THUMBV6-NEXT:    ldr r0, [sp, #48] @ 4-byte Reload
+; THUMBV6-NEXT:    mov r1, r4
 ; THUMBV6-NEXT:    mov r2, r5
-; THUMBV6-NEXT:    adcs r2, r5
-; THUMBV6-NEXT:    orrs r2, r7
-; THUMBV6-NEXT:    ldr r6, [sp, #60] @ 4-byte Reload
-; THUMBV6-NEXT:    ldr r3, [sp, #40] @ 4-byte Reload
-; THUMBV6-NEXT:    orrs r6, r3
-; THUMBV6-NEXT:    subs r3, r6, #1
-; THUMBV6-NEXT:    sbcs r6, r3
-; THUMBV6-NEXT:    ldr r3, [sp, #56] @ 4-byte Reload
-; THUMBV6-NEXT:    orrs r4, r3
-; THUMBV6-NEXT:    subs r3, r4, #1
-; THUMBV6-NEXT:    sbcs r4, r3
-; THUMBV6-NEXT:    ands r4, r6
-; THUMBV6-NEXT:    orrs r4, r2
+; THUMBV6-NEXT:    mov r3, r5
+; THUMBV6-NEXT:    bl __aeabi_lmul
+; THUMBV6-NEXT:    mov r6, r0
+; THUMBV6-NEXT:    mov r4, r1
+; THUMBV6-NEXT:    ldr r0, [sp, #52] @ 4-byte Reload
+; THUMBV6-NEXT:    ldr r1, [sp, #56] @ 4-byte Reload
+; THUMBV6-NEXT:    mov r2, r5
+; THUMBV6-NEXT:    mov r3, r5
+; THUMBV6-NEXT:    bl __aeabi_lmul
+; THUMBV6-NEXT:    adds r0, r0, r6
+; THUMBV6-NEXT:    adcs r1, r4
+; THUMBV6-NEXT:    ldr r2, [sp, #28] @ 4-byte Reload
+; THUMBV6-NEXT:    adds r0, r2, r0
 ; THUMBV6-NEXT:    ldr r2, [sp, #24] @ 4-byte Reload
-; THUMBV6-NEXT:    orrs r4, r2
-; THUMBV6-NEXT:    ldr r2, [sp, #36] @ 4-byte Reload
-; THUMBV6-NEXT:    adds r0, r0, r2
-; THUMBV6-NEXT:    ldr r2, [sp, #32] @ 4-byte Reload
 ; THUMBV6-NEXT:    adcs r1, r2
-; THUMBV6-NEXT:    ldr r2, [sp, #48] @ 4-byte Reload
-; THUMBV6-NEXT:    adds r0, r2, r0
-; THUMBV6-NEXT:    ldr r2, [sp, #44] @ 4-byte Reload
+; THUMBV6-NEXT:    ldr r2, [sp, #32] @ 4-byte Reload
+; THUMBV6-NEXT:    adds r0, r0, r2
+; THUMBV6-NEXT:    ldr r2, [sp, #40] @ 4-byte Reload
 ; THUMBV6-NEXT:    str r0, [r2, #8]
-; THUMBV6-NEXT:    ldr r0, [sp, #52] @ 4-byte Reload
+; THUMBV6-NEXT:    ldr r0, [sp, #36] @ 4-byte Reload
 ; THUMBV6-NEXT:    adcs r1, r0
 ; THUMBV6-NEXT:    str r1, [r2, #12]
 ; THUMBV6-NEXT:    adcs r5, r5
-; THUMBV6-NEXT:    orrs r5, r4
+; THUMBV6-NEXT:    orrs r5, r7
 ; THUMBV6-NEXT:    movs r0, #1
 ; THUMBV6-NEXT:    ands r0, r5
 ; THUMBV6-NEXT:    strb r0, [r2, #16]
-; THUMBV6-NEXT:    add sp, #68
+; THUMBV6-NEXT:    add sp, #60
 ; THUMBV6-NEXT:    pop {r4, r5, r6, r7, pc}
 start:
   %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2

diff  --git a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
index f57c9226179b5..cff16c300e703 100644
--- a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
@@ -8,100 +8,121 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; THUMBV7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; THUMBV7-NEXT:    .pad #44
 ; THUMBV7-NEXT:    sub sp, #44
+; THUMBV7-NEXT:    ldr.w lr, [sp, #88]
+; THUMBV7-NEXT:    mov r11, r0
+; THUMBV7-NEXT:    ldr r4, [sp, #96]
+; THUMBV7-NEXT:    ldr.w r12, [sp, #80]
+; THUMBV7-NEXT:    umull r1, r5, r2, lr
+; THUMBV7-NEXT:    umull r7, r6, r3, r4
+; THUMBV7-NEXT:    str r1, [sp, #40] @ 4-byte Spill
+; THUMBV7-NEXT:    ldr r1, [sp, #100]
+; THUMBV7-NEXT:    umull r4, r0, r4, r2
+; THUMBV7-NEXT:    str r7, [sp, #32] @ 4-byte Spill
+; THUMBV7-NEXT:    umull r7, r1, r1, r2
+; THUMBV7-NEXT:    str r4, [sp, #24] @ 4-byte Spill
+; THUMBV7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; THUMBV7-NEXT:    ldr r0, [sp, #84]
+; THUMBV7-NEXT:    str r7, [sp, #20] @ 4-byte Spill
+; THUMBV7-NEXT:    ldr r7, [sp, #92]
+; THUMBV7-NEXT:    umull r10, r8, r0, lr
+; THUMBV7-NEXT:    umull r4, r9, r7, r12
+; THUMBV7-NEXT:    str r4, [sp, #8] @ 4-byte Spill
+; THUMBV7-NEXT:    umull r4, r0, r12, lr
+; THUMBV7-NEXT:    mov.w r12, #0
+; THUMBV7-NEXT:    umlal r5, r12, r3, lr
+; THUMBV7-NEXT:    str r4, [sp, #16] @ 4-byte Spill
+; THUMBV7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; THUMBV7-NEXT:    umull r4, r2, r2, r7
+; THUMBV7-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
+; THUMBV7-NEXT:    str r4, [sp, #28] @ 4-byte Spill
+; THUMBV7-NEXT:    str r2, [sp, #36] @ 4-byte Spill
+; THUMBV7-NEXT:    str.w r0, [r11]
+; THUMBV7-NEXT:    ldr r0, [sp, #32] @ 4-byte Reload
+; THUMBV7-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
+; THUMBV7-NEXT:    add r2, r0
+; THUMBV7-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; THUMBV7-NEXT:    adds.w lr, r0, r2
+; THUMBV7-NEXT:    mov.w r2, #0
+; THUMBV7-NEXT:    adc r0, r2, #0
+; THUMBV7-NEXT:    str r0, [sp, #32] @ 4-byte Spill
+; THUMBV7-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; THUMBV7-NEXT:    add.w r4, r10, r0
+; THUMBV7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; THUMBV7-NEXT:    adds r4, r4, r0
+; THUMBV7-NEXT:    adc r0, r2, #0
 ; THUMBV7-NEXT:    str r0, [sp, #40] @ 4-byte Spill
-; THUMBV7-NEXT:    movs r0, #0
-; THUMBV7-NEXT:    ldrd r4, r7, [sp, #88]
-; THUMBV7-NEXT:    mov r5, r3
-; THUMBV7-NEXT:    strd r0, r0, [sp, #8]
-; THUMBV7-NEXT:    mov r1, r3
-; THUMBV7-NEXT:    mov r6, r2
-; THUMBV7-NEXT:    mov r0, r2
-; THUMBV7-NEXT:    movs r2, #0
-; THUMBV7-NEXT:    movs r3, #0
-; THUMBV7-NEXT:    strd r4, r7, [sp]
-; THUMBV7-NEXT:    bl __multi3
-; THUMBV7-NEXT:    strd r1, r0, [sp, #32] @ 8-byte Folded Spill
-; THUMBV7-NEXT:    strd r3, r2, [sp, #24] @ 8-byte Folded Spill
-; THUMBV7-NEXT:    ldrd r2, r0, [sp, #96]
-; THUMBV7-NEXT:    ldr.w r9, [sp, #80]
-; THUMBV7-NEXT:    umull lr, r0, r0, r6
-; THUMBV7-NEXT:    ldr.w r11, [sp, #84]
-; THUMBV7-NEXT:    umull r3, r1, r5, r2
-; THUMBV7-NEXT:    umull r2, r12, r2, r6
-; THUMBV7-NEXT:    add r3, lr
-; THUMBV7-NEXT:    umull r8, r10, r7, r9
-; THUMBV7-NEXT:    str r2, [sp, #20] @ 4-byte Spill
-; THUMBV7-NEXT:    adds.w lr, r12, r3
-; THUMBV7-NEXT:    umull r6, r9, r9, r4
-; THUMBV7-NEXT:    mov.w r3, #0
-; THUMBV7-NEXT:    adc r12, r3, #0
-; THUMBV7-NEXT:    umull r2, r4, r11, r4
-; THUMBV7-NEXT:    add r2, r8
-; THUMBV7-NEXT:    mov.w r8, #0
-; THUMBV7-NEXT:    adds.w r2, r2, r9
-; THUMBV7-NEXT:    adc r9, r3, #0
-; THUMBV7-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
-; THUMBV7-NEXT:    adds r3, r3, r6
-; THUMBV7-NEXT:    ldr r6, [sp, #28] @ 4-byte Reload
-; THUMBV7-NEXT:    adc.w r2, r2, lr
-; THUMBV7-NEXT:    adds r3, r3, r6
-; THUMBV7-NEXT:    ldr r6, [sp, #24] @ 4-byte Reload
-; THUMBV7-NEXT:    adcs r2, r6
-; THUMBV7-NEXT:    ldrd r6, lr, [sp, #36] @ 8-byte Folded Reload
-; THUMBV7-NEXT:    str.w r6, [lr]
-; THUMBV7-NEXT:    adc r8, r8, #0
-; THUMBV7-NEXT:    ldr r6, [sp, #32] @ 4-byte Reload
-; THUMBV7-NEXT:    cmp r5, #0
-; THUMBV7-NEXT:    strd r6, r3, [lr, #4]
-; THUMBV7-NEXT:    str.w r2, [lr, #12]
+; THUMBV7-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
+; THUMBV7-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
+; THUMBV7-NEXT:    adds.w r10, r2, r0
+; THUMBV7-NEXT:    mov r2, r3
+; THUMBV7-NEXT:    adc.w r0, r4, lr
+; THUMBV7-NEXT:    ldr.w lr, [sp, #100]
+; THUMBV7-NEXT:    cmp r1, #0
+; THUMBV7-NEXT:    str r0, [sp, #24] @ 4-byte Spill
 ; THUMBV7-NEXT:    it ne
-; THUMBV7-NEXT:    movne r5, #1
-; THUMBV7-NEXT:    ldr r2, [sp, #100]
-; THUMBV7-NEXT:    cmp r2, #0
-; THUMBV7-NEXT:    mov r3, r2
+; THUMBV7-NEXT:    movne r1, #1
+; THUMBV7-NEXT:    cmp r3, #0
+; THUMBV7-NEXT:    mov r0, lr
 ; THUMBV7-NEXT:    it ne
-; THUMBV7-NEXT:    movne r3, #1
-; THUMBV7-NEXT:    cmp r0, #0
+; THUMBV7-NEXT:    movne r2, #1
+; THUMBV7-NEXT:    cmp.w lr, #0
 ; THUMBV7-NEXT:    it ne
 ; THUMBV7-NEXT:    movne r0, #1
-; THUMBV7-NEXT:    cmp r1, #0
-; THUMBV7-NEXT:    and.w r3, r3, r5
+; THUMBV7-NEXT:    ldr r4, [sp, #28] @ 4-byte Reload
+; THUMBV7-NEXT:    ands r0, r2
+; THUMBV7-NEXT:    orrs r1, r0
+; THUMBV7-NEXT:    adds r5, r5, r4
+; THUMBV7-NEXT:    str.w r5, [r11, #4]
+; THUMBV7-NEXT:    ldr r0, [sp, #36] @ 4-byte Reload
+; THUMBV7-NEXT:    mov.w r5, #0
+; THUMBV7-NEXT:    adcs.w r0, r0, r12
+; THUMBV7-NEXT:    adc r2, r5, #0
+; THUMBV7-NEXT:    cmp r6, #0
 ; THUMBV7-NEXT:    it ne
-; THUMBV7-NEXT:    movne r1, #1
-; THUMBV7-NEXT:    orrs r0, r3
+; THUMBV7-NEXT:    movne r6, #1
+; THUMBV7-NEXT:    orrs r1, r6
+; THUMBV7-NEXT:    ldr r6, [sp, #84]
+; THUMBV7-NEXT:    umlal r0, r2, r3, r7
+; THUMBV7-NEXT:    ldr r3, [sp, #32] @ 4-byte Reload
 ; THUMBV7-NEXT:    cmp r7, #0
-; THUMBV7-NEXT:    orr.w r0, r0, r1
 ; THUMBV7-NEXT:    it ne
 ; THUMBV7-NEXT:    movne r7, #1
-; THUMBV7-NEXT:    cmp.w r11, #0
-; THUMBV7-NEXT:    mov r1, r11
-; THUMBV7-NEXT:    it ne
-; THUMBV7-NEXT:    movne r1, #1
-; THUMBV7-NEXT:    cmp r4, #0
-; THUMBV7-NEXT:    ldr r3, [sp, #96]
+; THUMBV7-NEXT:    orrs r1, r3
+; THUMBV7-NEXT:    mov r3, r6
+; THUMBV7-NEXT:    cmp r6, #0
 ; THUMBV7-NEXT:    it ne
-; THUMBV7-NEXT:    movne r4, #1
-; THUMBV7-NEXT:    cmp.w r10, #0
-; THUMBV7-NEXT:    and.w r1, r1, r7
+; THUMBV7-NEXT:    movne r3, #1
+; THUMBV7-NEXT:    cmp.w r8, #0
+; THUMBV7-NEXT:    and.w r3, r3, r7
+; THUMBV7-NEXT:    ldr r7, [sp, #80]
 ; THUMBV7-NEXT:    it ne
-; THUMBV7-NEXT:    movne.w r10, #1
-; THUMBV7-NEXT:    orrs r3, r2
-; THUMBV7-NEXT:    ldr r2, [sp, #80]
-; THUMBV7-NEXT:    orr.w r1, r1, r4
-; THUMBV7-NEXT:    orr.w r1, r1, r10
+; THUMBV7-NEXT:    movne.w r8, #1
+; THUMBV7-NEXT:    cmp.w r9, #0
 ; THUMBV7-NEXT:    it ne
-; THUMBV7-NEXT:    movne r3, #1
-; THUMBV7-NEXT:    orrs.w r7, r2, r11
-; THUMBV7-NEXT:    orr.w r1, r1, r9
+; THUMBV7-NEXT:    movne.w r9, #1
+; THUMBV7-NEXT:    orrs r7, r6
+; THUMBV7-NEXT:    ldr r6, [sp, #96]
 ; THUMBV7-NEXT:    it ne
 ; THUMBV7-NEXT:    movne r7, #1
-; THUMBV7-NEXT:    ands r3, r7
-; THUMBV7-NEXT:    orr.w r0, r0, r12
-; THUMBV7-NEXT:    orrs r1, r3
+; THUMBV7-NEXT:    orr.w r3, r3, r8
+; THUMBV7-NEXT:    orrs.w r6, r6, lr
+; THUMBV7-NEXT:    orr.w r3, r3, r9
+; THUMBV7-NEXT:    it ne
+; THUMBV7-NEXT:    movne r6, #1
+; THUMBV7-NEXT:    adds.w r0, r0, r10
+; THUMBV7-NEXT:    str.w r0, [r11, #8]
+; THUMBV7-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
+; THUMBV7-NEXT:    adcs r0, r2
+; THUMBV7-NEXT:    str.w r0, [r11, #12]
+; THUMBV7-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
+; THUMBV7-NEXT:    and.w r2, r7, r6
+; THUMBV7-NEXT:    orr.w r0, r0, r3
+; THUMBV7-NEXT:    orr.w r0, r0, r2
+; THUMBV7-NEXT:    orr.w r0, r0, r1
+; THUMBV7-NEXT:    adc r1, r5, #0
 ; THUMBV7-NEXT:    orrs r0, r1
-; THUMBV7-NEXT:    orr.w r0, r0, r8
 ; THUMBV7-NEXT:    and r0, r0, #1
-; THUMBV7-NEXT:    strb.w r0, [lr, #16]
+; THUMBV7-NEXT:    strb.w r0, [r11, #16]
 ; THUMBV7-NEXT:    add sp, #44
 ; THUMBV7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 start:


        


More information about the llvm-commits mailing list