[llvm] [Thumb1] Resolve FIXME: use 'mov hi, $src; mov $dst, hi' (PR #81908)

via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 16 08:26:06 PST 2024


https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/81908

>From 1b017b546c9f537e5c8a2e0920132c2f2f9670d7 Mon Sep 17 00:00:00 2001
From: Rose <83477269+AtariDreams at users.noreply.github.com>
Date: Thu, 15 Feb 2024 14:21:56 -0500
Subject: [PATCH] [Thumb1] Resolve FIXME: use 'mov hi, $src; mov $dst, hi'

Consider the following:

        ldr     r0, [r4]
        ldr     r7, [r0, #4]
        cmp     r7, r3
        bhi     .LBB0_6
        cmp     r0, r2
        push    {r0}
        pop     {r4}
        bne     .LBB0_3
        movs    r0, r6
        pop     {r4, r5, r6, r7}
        pop     {r1}
        bx      r1

Here is a snippet of the generated THUMB1 code of the K&R malloc function that clang currently compiles to.

push    {r0} ends up being popped to pop {r4}.

movs r4, r0 would destroy the flags set by cmp right above.

The compiler has no alternative in this case, except one:
the only alternative is to transfer through a high register.

However, it seems like LLVM does not consider that this is a valid approach, even though it is a free clobbering a high register.

This patch addresses the FIXME so the compiler can do that when it can in r10 or r11.
---
 llvm/lib/Target/ARM/Thumb1InstrInfo.cpp       |  46 ++++-
 llvm/test/CodeGen/ARM/sadd_sat.ll             | 169 ++++++++++++------
 llvm/test/CodeGen/ARM/select_const.ll         |  16 +-
 llvm/test/CodeGen/ARM/wide-compares.ll        |   8 +-
 llvm/test/CodeGen/Thumb/pr35836.ll            |  47 +++++
 .../CodeGen/Thumb/urem-seteq-illegal-types.ll |   8 +-
 6 files changed, 220 insertions(+), 74 deletions(-)

diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index 85eabdb17ad190..3d93d60dbc45da 100644
--- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -53,10 +53,12 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
         .addReg(SrcReg, getKillRegState(KillSrc))
         .add(predOps(ARMCC::AL));
   else {
-    // FIXME: Can also use 'mov hi, $src; mov $dst, hi',
-    // with hi as either r10 or r11.
+
+    bool canUseR10 = true;
+    bool canUseR11 = true;
 
     const TargetRegisterInfo *RegInfo = st.getRegisterInfo();
+
     if (MBB.computeRegisterLiveness(RegInfo, ARM::CPSR, I)
         == MachineBasicBlock::LQR_Dead) {
       BuildMI(MBB, I, DL, get(ARM::tMOVSr), DestReg)
@@ -65,6 +67,46 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       return;
     }
 
+    const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+    if (CSRegs) {
+      for (const MCPhysReg *I = CSRegs; *I && (canUseR10 || canUseR11); ++I) {
+        if (*I == ARM::R10) {
+          canUseR10 = false;
+          continue;
+        }
+
+        if (*I == ARM::R11) {
+          canUseR11 = false;
+        }
+      }
+    }
+
+    // Can also use 'mov hi, $src; mov $dst, hi',
+    // with hi as either r10 or r11.
+    if (canUseR10 && MBB.computeRegisterLiveness(RegInfo, ARM::R10, I) ==
+                         MachineBasicBlock::LQR_Dead) {
+      // Use high register to move source to destination
+      BuildMI(MBB, I, DL, get(ARM::tMOVr), ARM::R10)
+          .addReg(SrcReg, getKillRegState(KillSrc))
+          .add(predOps(ARMCC::AL));
+      BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+          .addReg(ARM::R10, RegState::Kill)
+          .add(predOps(ARMCC::AL));
+      return;
+    }
+
+    if (canUseR11 && MBB.computeRegisterLiveness(RegInfo, ARM::R11, I) ==
+                         MachineBasicBlock::LQR_Dead) {
+      // Use high register to move source to destination
+      BuildMI(MBB, I, DL, get(ARM::tMOVr), ARM::R11)
+          .addReg(SrcReg, getKillRegState(KillSrc))
+          .add(predOps(ARMCC::AL));
+      BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+          .addReg(ARM::R11, RegState::Kill)
+          .add(predOps(ARMCC::AL));
+      return;
+    }
+
     // 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it
     BuildMI(MBB, I, DL, get(ARM::tPUSH))
         .add(predOps(ARMCC::AL))
diff --git a/llvm/test/CodeGen/ARM/sadd_sat.ll b/llvm/test/CodeGen/ARM/sadd_sat.ll
index 1632c4e86c7629..b6e3cf6bedfbb3 100644
--- a/llvm/test/CodeGen/ARM/sadd_sat.ll
+++ b/llvm/test/CodeGen/ARM/sadd_sat.ll
@@ -130,8 +130,8 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
 ; CHECK-T15TE-NEXT:    bics r4, r1
 ; CHECK-T15TE-NEXT:    asrs r1, r3, #31
 ; CHECK-T15TE-NEXT:    cmp r4, #0
-; CHECK-T15TE-NEXT:    push {r1}
-; CHECK-T15TE-NEXT:    pop {r0}
+; CHECK-T15TE-NEXT:    mov r10, r1
+; CHECK-T15TE-NEXT:    mov r0, r10
 ; CHECK-T15TE-NEXT:    bmi .LBB1_2
 ; CHECK-T15TE-NEXT:  @ %bb.1:
 ; CHECK-T15TE-NEXT:    movs r0, r2
@@ -151,28 +151,28 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
 }
 
 define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
-; CHECK-T1-LABEL: func16:
-; CHECK-T1:       @ %bb.0:
-; CHECK-T1-NEXT:    adds r0, r0, r1
-; CHECK-T1-NEXT:    ldr r1, .LCPI2_0
-; CHECK-T1-NEXT:    cmp r0, r1
-; CHECK-T1-NEXT:    blt .LBB2_2
-; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    {{movs|mov}} r0, r1
-; CHECK-T1-NEXT:  .LBB2_2:
-; CHECK-T1-NEXT:    ldr r1, .LCPI2_1
-; CHECK-T1-NEXT:    cmp r0, r1
-; CHECK-T1-NEXT:    bgt .LBB2_4
-; CHECK-T1-NEXT:  @ %bb.3:
-; CHECK-T1-NEXT:    {{movs|mov}} r0, r1
-; CHECK-T1-NEXT:  .LBB2_4:
-; CHECK-T1-NEXT:    bx lr
-; CHECK-T1-NEXT:    .p2align 2
-; CHECK-T1-NEXT:  @ %bb.5:
-; CHECK-T1-NEXT:  .LCPI2_0:
-; CHECK-T1-NEXT:    .long 32767 @ 0x7fff
-; CHECK-T1-NEXT:  .LCPI2_1:
-; CHECK-T1-NEXT:    .long 4294934528 @ 0xffff8000
+; CHECK-T16-LABEL: func16:
+; CHECK-T16:       @ %bb.0:
+; CHECK-T16-NEXT:    adds r0, r0, r1
+; CHECK-T16-NEXT:    ldr r1, .LCPI2_0
+; CHECK-T16-NEXT:    cmp r0, r1
+; CHECK-T16-NEXT:    blt .LBB2_2
+; CHECK-T16-NEXT:  @ %bb.1:
+; CHECK-T16-NEXT:    mov r0, r1
+; CHECK-T16-NEXT:  .LBB2_2:
+; CHECK-T16-NEXT:    ldr r1, .LCPI2_1
+; CHECK-T16-NEXT:    cmp r0, r1
+; CHECK-T16-NEXT:    bgt .LBB2_4
+; CHECK-T16-NEXT:  @ %bb.3:
+; CHECK-T16-NEXT:    mov r0, r1
+; CHECK-T16-NEXT:  .LBB2_4:
+; CHECK-T16-NEXT:    bx lr
+; CHECK-T16-NEXT:    .p2align 2
+; CHECK-T16-NEXT:  @ %bb.5:
+; CHECK-T16-NEXT:  .LCPI2_0:
+; CHECK-T16-NEXT:    .long 32767 @ 0x7fff
+; CHECK-T16-NEXT:  .LCPI2_1:
+; CHECK-T16-NEXT:    .long 4294934528 @ 0xffff8000
 ;
 ; CHECK-T2NODSP-LABEL: func16:
 ; CHECK-T2NODSP:       @ %bb.0:
@@ -210,6 +210,29 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
 ; CHECK-ARMBASEDSP-NEXT:    asr r0, r0, #16
 ; CHECK-ARMBASEDSP-NEXT:    bx lr
 ;
+; CHECK-T15TE-LABEL: func16:
+; CHECK-T15TE:       @ %bb.0:
+; CHECK-T15TE-NEXT:    adds r0, r0, r1
+; CHECK-T15TE-NEXT:    ldr r1, .LCPI2_0
+; CHECK-T15TE-NEXT:    cmp r0, r1
+; CHECK-T15TE-NEXT:    blt .LBB2_2
+; CHECK-T15TE-NEXT:  @ %bb.1:
+; CHECK-T15TE-NEXT:    movs r0, r1
+; CHECK-T15TE-NEXT:  .LBB2_2:
+; CHECK-T15TE-NEXT:    ldr r1, .LCPI2_1
+; CHECK-T15TE-NEXT:    cmp r0, r1
+; CHECK-T15TE-NEXT:    bgt .LBB2_4
+; CHECK-T15TE-NEXT:  @ %bb.3:
+; CHECK-T15TE-NEXT:    movs r0, r1
+; CHECK-T15TE-NEXT:  .LBB2_4:
+; CHECK-T15TE-NEXT:    bx lr
+; CHECK-T15TE-NEXT:    .p2align 2
+; CHECK-T15TE-NEXT:  @ %bb.5:
+; CHECK-T15TE-NEXT:  .LCPI2_0:
+; CHECK-T15TE-NEXT:    .long 32767 @ 0x7fff
+; CHECK-T15TE-NEXT:  .LCPI2_1:
+; CHECK-T15TE-NEXT:    .long 4294934528 @ 0xffff8000
+;
 ; CHECK-ARMDSP-LABEL: func16:
 ; CHECK-ARMDSP:       @ %bb.0:
 ; CHECK-ARMDSP-NEXT:    qadd16 r0, r0, r1
@@ -220,22 +243,22 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
 }
 
 define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
-; CHECK-T1-LABEL: func8:
-; CHECK-T1:       @ %bb.0:
-; CHECK-T1-NEXT:    adds r0, r0, r1
-; CHECK-T1-NEXT:    movs r1, #127
-; CHECK-T1-NEXT:    cmp r0, #127
-; CHECK-T1-NEXT:    blt .LBB3_2
-; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    {{movs|mov}} r0, r1
-; CHECK-T1-NEXT:  .LBB3_2:
-; CHECK-T1-NEXT:    mvns r1, r1
-; CHECK-T1-NEXT:    cmp r0, r1
-; CHECK-T1-NEXT:    bgt .LBB3_4
-; CHECK-T1-NEXT:  @ %bb.3:
-; CHECK-T1-NEXT:    {{movs|mov}} r0, r1
-; CHECK-T1-NEXT:  .LBB3_4:
-; CHECK-T1-NEXT:    bx lr
+; CHECK-T16-LABEL: func8:
+; CHECK-T16:       @ %bb.0:
+; CHECK-T16-NEXT:    adds r0, r0, r1
+; CHECK-T16-NEXT:    movs r1, #127
+; CHECK-T16-NEXT:    cmp r0, #127
+; CHECK-T16-NEXT:    blt .LBB3_2
+; CHECK-T16-NEXT:  @ %bb.1:
+; CHECK-T16-NEXT:    mov r0, r1
+; CHECK-T16-NEXT:  .LBB3_2:
+; CHECK-T16-NEXT:    mvns r1, r1
+; CHECK-T16-NEXT:    cmp r0, r1
+; CHECK-T16-NEXT:    bgt .LBB3_4
+; CHECK-T16-NEXT:  @ %bb.3:
+; CHECK-T16-NEXT:    mov r0, r1
+; CHECK-T16-NEXT:  .LBB3_4:
+; CHECK-T16-NEXT:    bx lr
 ;
 ; CHECK-T2NODSP-LABEL: func8:
 ; CHECK-T2NODSP:       @ %bb.0:
@@ -266,6 +289,23 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
 ; CHECK-ARMBASEDSP-NEXT:    asr r0, r0, #24
 ; CHECK-ARMBASEDSP-NEXT:    bx lr
 ;
+; CHECK-T15TE-LABEL: func8:
+; CHECK-T15TE:       @ %bb.0:
+; CHECK-T15TE-NEXT:    adds r0, r0, r1
+; CHECK-T15TE-NEXT:    movs r1, #127
+; CHECK-T15TE-NEXT:    cmp r0, #127
+; CHECK-T15TE-NEXT:    blt .LBB3_2
+; CHECK-T15TE-NEXT:  @ %bb.1:
+; CHECK-T15TE-NEXT:    movs r0, r1
+; CHECK-T15TE-NEXT:  .LBB3_2:
+; CHECK-T15TE-NEXT:    mvns r1, r1
+; CHECK-T15TE-NEXT:    cmp r0, r1
+; CHECK-T15TE-NEXT:    bgt .LBB3_4
+; CHECK-T15TE-NEXT:  @ %bb.3:
+; CHECK-T15TE-NEXT:    movs r0, r1
+; CHECK-T15TE-NEXT:  .LBB3_4:
+; CHECK-T15TE-NEXT:    bx lr
+;
 ; CHECK-ARMDSP-LABEL: func8:
 ; CHECK-ARMDSP:       @ %bb.0:
 ; CHECK-ARMDSP-NEXT:    qadd8 r0, r0, r1
@@ -276,22 +316,22 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
 }
 
 define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
-; CHECK-T1-LABEL: func3:
-; CHECK-T1:       @ %bb.0:
-; CHECK-T1-NEXT:    adds r0, r0, r1
-; CHECK-T1-NEXT:    movs r1, #7
-; CHECK-T1-NEXT:    cmp r0, #7
-; CHECK-T1-NEXT:    blt .LBB4_2
-; CHECK-T1-NEXT:  @ %bb.1:
-; CHECK-T1-NEXT:    {{movs|mov}} r0, r1
-; CHECK-T1-NEXT:  .LBB4_2:
-; CHECK-T1-NEXT:    mvns r1, r1
-; CHECK-T1-NEXT:    cmp r0, r1
-; CHECK-T1-NEXT:    bgt .LBB4_4
-; CHECK-T1-NEXT:  @ %bb.3:
-; CHECK-T1-NEXT:    {{movs|mov}} r0, r1
-; CHECK-T1-NEXT:  .LBB4_4:
-; CHECK-T1-NEXT:    bx lr
+; CHECK-T16-LABEL: func3:
+; CHECK-T16:       @ %bb.0:
+; CHECK-T16-NEXT:    adds r0, r0, r1
+; CHECK-T16-NEXT:    movs r1, #7
+; CHECK-T16-NEXT:    cmp r0, #7
+; CHECK-T16-NEXT:    blt .LBB4_2
+; CHECK-T16-NEXT:  @ %bb.1:
+; CHECK-T16-NEXT:    mov r0, r1
+; CHECK-T16-NEXT:  .LBB4_2:
+; CHECK-T16-NEXT:    mvns r1, r1
+; CHECK-T16-NEXT:    cmp r0, r1
+; CHECK-T16-NEXT:    bgt .LBB4_4
+; CHECK-T16-NEXT:  @ %bb.3:
+; CHECK-T16-NEXT:    mov r0, r1
+; CHECK-T16-NEXT:  .LBB4_4:
+; CHECK-T16-NEXT:    bx lr
 ;
 ; CHECK-T2NODSP-LABEL: func3:
 ; CHECK-T2NODSP:       @ %bb.0:
@@ -324,6 +364,23 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
 ; CHECK-ARMBASEDSP-NEXT:    asr r0, r0, #28
 ; CHECK-ARMBASEDSP-NEXT:    bx lr
 ;
+; CHECK-T15TE-LABEL: func3:
+; CHECK-T15TE:       @ %bb.0:
+; CHECK-T15TE-NEXT:    adds r0, r0, r1
+; CHECK-T15TE-NEXT:    movs r1, #7
+; CHECK-T15TE-NEXT:    cmp r0, #7
+; CHECK-T15TE-NEXT:    blt .LBB4_2
+; CHECK-T15TE-NEXT:  @ %bb.1:
+; CHECK-T15TE-NEXT:    movs r0, r1
+; CHECK-T15TE-NEXT:  .LBB4_2:
+; CHECK-T15TE-NEXT:    mvns r1, r1
+; CHECK-T15TE-NEXT:    cmp r0, r1
+; CHECK-T15TE-NEXT:    bgt .LBB4_4
+; CHECK-T15TE-NEXT:  @ %bb.3:
+; CHECK-T15TE-NEXT:    movs r0, r1
+; CHECK-T15TE-NEXT:  .LBB4_4:
+; CHECK-T15TE-NEXT:    bx lr
+;
 ; CHECK-ARMDSP-LABEL: func3:
 ; CHECK-ARMDSP:       @ %bb.0:
 ; CHECK-ARMDSP-NEXT:    lsl r0, r0, #28
diff --git a/llvm/test/CodeGen/ARM/select_const.ll b/llvm/test/CodeGen/ARM/select_const.ll
index e12dd02f16c2fa..a22bed4c2c8414 100644
--- a/llvm/test/CodeGen/ARM/select_const.ll
+++ b/llvm/test/CodeGen/ARM/select_const.ll
@@ -665,8 +665,8 @@ define i64 @opaque_constant1(i1 %cond, i64 %x) {
 ; THUMB-NEXT:    movs r7, #1
 ; THUMB-NEXT:    ands r0, r7
 ; THUMB-NEXT:    subs r1, r0, #1
-; THUMB-NEXT:    push {r0}
-; THUMB-NEXT:    pop {r4}
+; THUMB-NEXT:    mov r10, r0
+; THUMB-NEXT:    mov r4, r10
 ; THUMB-NEXT:    sbcs r4, r1
 ; THUMB-NEXT:    cmp r0, #0
 ; THUMB-NEXT:    bne .LBB24_2
@@ -681,8 +681,8 @@ define i64 @opaque_constant1(i1 %cond, i64 %x) {
 ; THUMB-NEXT:    ands r5, r0
 ; THUMB-NEXT:    movs r6, #0
 ; THUMB-NEXT:    subs r0, r5, #1
-; THUMB-NEXT:    push {r4}
-; THUMB-NEXT:    pop {r1}
+; THUMB-NEXT:    mov r10, r4
+; THUMB-NEXT:    mov r1, r10
 ; THUMB-NEXT:    sbcs r1, r6
 ; THUMB-NEXT:    eors r3, r7
 ; THUMB-NEXT:    ldr r6, .LCPI24_0
@@ -786,11 +786,11 @@ define i64 @func(i64 %arg) {
 ; THUMB-NEXT:    push {r4, lr}
 ; THUMB-NEXT:    movs r2, #0
 ; THUMB-NEXT:    adds r3, r0, #1
-; THUMB-NEXT:    push {r1}
-; THUMB-NEXT:    pop {r3}
+; THUMB-NEXT:    mov r10, r1
+; THUMB-NEXT:    mov r3, r10
 ; THUMB-NEXT:    adcs r3, r2
-; THUMB-NEXT:    push {r2}
-; THUMB-NEXT:    pop {r3}
+; THUMB-NEXT:    mov r10, r2
+; THUMB-NEXT:    mov r3, r10
 ; THUMB-NEXT:    adcs r3, r2
 ; THUMB-NEXT:    subs r4, r3, #1
 ; THUMB-NEXT:    adds r0, r0, #1
diff --git a/llvm/test/CodeGen/ARM/wide-compares.ll b/llvm/test/CodeGen/ARM/wide-compares.ll
index 6584f0c7616c52..d5a99bcef9c2be 100644
--- a/llvm/test/CodeGen/ARM/wide-compares.ll
+++ b/llvm/test/CodeGen/ARM/wide-compares.ll
@@ -257,12 +257,12 @@ define {i32, i32} @test_slt_not(i32 %c, i32 %d, i64 %a, i64 %b) {
 ; CHECK-THUMB1-NOMOV-NEXT:    ldr r5, [sp, #16]
 ; CHECK-THUMB1-NOMOV-NEXT:    subs r2, r2, r5
 ; CHECK-THUMB1-NOMOV-NEXT:    sbcs r3, r0
-; CHECK-THUMB1-NOMOV-NEXT:    push {r1}
-; CHECK-THUMB1-NOMOV-NEXT:    pop {r0}
+; CHECK-THUMB1-NOMOV-NEXT:    mov r10, r1
+; CHECK-THUMB1-NOMOV-NEXT:    mov r0, r10
 ; CHECK-THUMB1-NOMOV-NEXT:    blt .LBB3_2
 ; CHECK-THUMB1-NOMOV-NEXT:  @ %bb.1: @ %entry
-; CHECK-THUMB1-NOMOV-NEXT:    push {r4}
-; CHECK-THUMB1-NOMOV-NEXT:    pop {r0}
+; CHECK-THUMB1-NOMOV-NEXT:    mov r10, r4
+; CHECK-THUMB1-NOMOV-NEXT:    mov r0, r10
 ; CHECK-THUMB1-NOMOV-NEXT:  .LBB3_2: @ %entry
 ; CHECK-THUMB1-NOMOV-NEXT:    bge .LBB3_4
 ; CHECK-THUMB1-NOMOV-NEXT:  @ %bb.3: @ %entry
diff --git a/llvm/test/CodeGen/Thumb/pr35836.ll b/llvm/test/CodeGen/Thumb/pr35836.ll
index 96a6fe5d142025..40177bb9a32e01 100644
--- a/llvm/test/CodeGen/Thumb/pr35836.ll
+++ b/llvm/test/CodeGen/Thumb/pr35836.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 ; RUN: llc < %s | FileCheck %s
 
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -5,6 +6,52 @@ target triple = "thumbv5e-none-linux-gnueabi"
 
 ; Function Attrs: norecurse nounwind optsize
 define void @f(i32,i32,i32,i32,ptr %x4p, ptr %x5p, ptr %x6p) {
+; CHECK-LABEL: f:
+; CHECK:       @ %bb.0: @ %if.end
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .pad #24
+; CHECK-NEXT:    sub sp, #24
+; CHECK-NEXT:    add r4, sp, #8
+; CHECK-NEXT:    stm r4!, {r0, r1, r2, r3} @ 16-byte Folded Spill
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    ldr r0, [sp, #52]
+; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    ldr r0, [sp, #48]
+; CHECK-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT:    ldr r7, [sp, #44]
+; CHECK-NEXT:  .LBB0_1: @ %while.body
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    adds r3, r0, r1
+; CHECK-NEXT:    mov r10, r5
+; CHECK-NEXT:    mov r1, r10
+; CHECK-NEXT:    adcs r1, r5
+; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    adds r2, r0, r2
+; CHECK-NEXT:    mov r10, r5
+; CHECK-NEXT:    mov r4, r10
+; CHECK-NEXT:    adcs r4, r5
+; CHECK-NEXT:    adds r0, r2, r5
+; CHECK-NEXT:    mov r10, r3
+; CHECK-NEXT:    mov r0, r10
+; CHECK-NEXT:    adcs r0, r4
+; CHECK-NEXT:    ldr r6, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    str r0, [r6]
+; CHECK-NEXT:    ldr r0, [r7]
+; CHECK-NEXT:    ldr r6, [sp] @ 4-byte Reload
+; CHECK-NEXT:    ldr r6, [r6]
+; CHECK-NEXT:    adds r0, r6, r0
+; CHECK-NEXT:    mov r10, r5
+; CHECK-NEXT:    mov r6, r10
+; CHECK-NEXT:    adcs r6, r5
+; CHECK-NEXT:    adds r2, r2, r5
+; CHECK-NEXT:    adcs r4, r3
+; CHECK-NEXT:    adcs r0, r1
+; CHECK-NEXT:    adcs r6, r5
+; CHECK-NEXT:    b .LBB0_1
 if.end:
   br label %while.body
 
diff --git a/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll
index aa5deb6542b2b0..2c2191ebf941f7 100644
--- a/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll
@@ -122,8 +122,8 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
 ; CHECK-NEXT:    movs r3, #1
 ; CHECK-NEXT:    movs r4, #0
 ; CHECK-NEXT:    cmp r0, #170
-; CHECK-NEXT:    push {r3}
-; CHECK-NEXT:    pop {r0}
+; CHECK-NEXT:    mov r10, r3
+; CHECK-NEXT:    mov r0, r10
 ; CHECK-NEXT:    bhi .LBB4_2
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:    movs r0, r4
@@ -134,8 +134,8 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
 ; CHECK-NEXT:    movs r1, #73
 ; CHECK-NEXT:    lsls r1, r1, #23
 ; CHECK-NEXT:    cmp r5, r1
-; CHECK-NEXT:    push {r3}
-; CHECK-NEXT:    pop {r1}
+; CHECK-NEXT:    mov r10, r3
+; CHECK-NEXT:    mov r1, r10
 ; CHECK-NEXT:    bhi .LBB4_4
 ; CHECK-NEXT:  @ %bb.3:
 ; CHECK-NEXT:    movs r1, r4



More information about the llvm-commits mailing list