[llvm] [Thumb] Improve THUMB code generation by moving movs around if it avoids emitting an extra instruction (PR #88032)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 8 12:29:15 PDT 2024
https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/88032
>From 02ba709c35d9b4f75c1d7b408850133664c03cac Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Mon, 8 Apr 2024 11:31:07 -0400
Subject: [PATCH] [Thumb] Improve THUMB code generation by moving movs around
if it avoids emitting an extra instruction
When the ARM backend decides it is best to copy a value from one register to another, it does not take THUMB limitations into account, which means we have to find a way to do a transfer AFTER it has been decided that a mov would work, sometimes using movs, and other times using two instructions, which was not what was intended by the compiler backend.
Until we find a better solution, this is unfortunately the best I could do unless we can somehow find a more proper way that involves updating the cost model to take the possibility of pushing through stack or a high register into account.
---
llvm/lib/Target/ARM/Thumb1InstrInfo.cpp | 67 +++++++
llvm/test/CodeGen/ARM/sadd_sat.ll | 168 ++++++++++++------
llvm/test/CodeGen/ARM/select_const.ll | 9 +-
llvm/test/CodeGen/Thumb/pr35836.ll | 43 +++++
.../CodeGen/Thumb/urem-seteq-illegal-types.ll | 6 +-
5 files changed, 227 insertions(+), 66 deletions(-)
diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index 5b0b799880a35f..ded78319e838cb 100644
--- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -38,6 +38,45 @@ unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
return 0;
}
+static bool tryToSinkCSPRDef(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &I,
+ const BitVector &RegUnits, const DebugLoc &DL,
+ MCRegister DestReg, MCRegister SrcReg,
+ bool KillSrc, const TargetRegisterInfo *RegInfo) {
+
+ // Try to find the cspr instruction
+ LiveRegUnits UsedRegs(*RegInfo);
+
+ // Pick up where we left off with last RegUnits.
+ UsedRegs.addUnits(RegUnits);
+
+ // We are assuming at this point SrcReg and DestReg are both available
+ // Because we want to change where it is inserted.
+
+ auto InstUpToI = I;
+
+ auto begin = MBB.begin();
+ // Okay, so let's do this:
+ while (InstUpToI != begin && !UsedRegs.available(ARM::CPSR) &&
+ UsedRegs.available(DestReg) && !UsedRegs.available(SrcReg)) {
+
+ // Do not move any instruction across function call.
+ if (InstUpToI->isCall())
+ return false;
+
+ UsedRegs.stepBackward(*--InstUpToI);
+ }
+
+ // If we reached the beginning, then there is nothing we can do.
+ // FIXME: Can we keep going back if there is only one predecessor?
+ if (!UsedRegs.available(ARM::CPSR) || !UsedRegs.available(DestReg) ||
+ UsedRegs.available(SrcReg))
+ return false;
+
+ I = InstUpToI;
+ return true;
+}
+
void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg,
@@ -72,6 +111,34 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
+ // Not ideal, but since the solution involves 2 instructions instead of 1,
+ // Which the scheduler did not account for, codegen is not ideal anyway, so
+ // lets see if we can manually sink this copy
+ // FIXME: Shouldn't this be done by the MachineSink pass?
+ // Though the sink pass won't see the two instructions as one copy but two.
+ // Here is the only change we could remedy that.
+
+ // TODO: What if the definition of the last is outside the basic block?
+ // FIXME: For now, we sink only to a successor which has a single
+ // predecessor
+ // so that we can directly sink COPY instructions to the successor without
+ // adding any new block or branch instruction.
+
+ // See if we can find the instruction where CSPR is defined.
+ // Bail if any reg dependencies will be violated
+
+ // InstUpToI is equal to I
+
+ if (tryToSinkCSPRDef(MBB, InstUpToI, UsedRegs.getBitVector(), DL, DestReg,
+ SrcReg, KillSrc, RegInfo)) {
+
+ // We found the place to insert the MOVS
+ BuildMI(MBB, InstUpToI, DL, get(ARM::tMOVSr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ ->addRegisterDead(ARM::CPSR, RegInfo);
+ return;
+ }
+
// Use high register to move source to destination
// if movs is not an option.
BitVector Allocatable = RegInfo->getAllocatableSet(
diff --git a/llvm/test/CodeGen/ARM/sadd_sat.ll b/llvm/test/CodeGen/ARM/sadd_sat.ll
index 0060b4458081bc..c94c4edd64ef85 100644
--- a/llvm/test/CodeGen/ARM/sadd_sat.ll
+++ b/llvm/test/CodeGen/ARM/sadd_sat.ll
@@ -129,9 +129,8 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
; CHECK-T15TE-NEXT: eors r4, r3
; CHECK-T15TE-NEXT: bics r4, r1
; CHECK-T15TE-NEXT: asrs r1, r3, #31
+; CHECK-T15TE-NEXT: movs r0, r1
; CHECK-T15TE-NEXT: cmp r4, #0
-; CHECK-T15TE-NEXT: mov r12, r1
-; CHECK-T15TE-NEXT: mov r0, r12
; CHECK-T15TE-NEXT: bmi .LBB1_2
; CHECK-T15TE-NEXT: @ %bb.1:
; CHECK-T15TE-NEXT: movs r0, r2
@@ -151,28 +150,28 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
}
define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
-; CHECK-T1-LABEL: func16:
-; CHECK-T1: @ %bb.0:
-; CHECK-T1-NEXT: adds r0, r0, r1
-; CHECK-T1-NEXT: ldr r1, .LCPI2_0
-; CHECK-T1-NEXT: cmp r0, r1
-; CHECK-T1-NEXT: blt .LBB2_2
-; CHECK-T1-NEXT: @ %bb.1:
-; CHECK-T1-NEXT: {{movs|mov}} r0, r1
-; CHECK-T1-NEXT: .LBB2_2:
-; CHECK-T1-NEXT: ldr r1, .LCPI2_1
-; CHECK-T1-NEXT: cmp r0, r1
-; CHECK-T1-NEXT: bgt .LBB2_4
-; CHECK-T1-NEXT: @ %bb.3:
-; CHECK-T1-NEXT: {{movs|mov}} r0, r1
-; CHECK-T1-NEXT: .LBB2_4:
-; CHECK-T1-NEXT: bx lr
-; CHECK-T1-NEXT: .p2align 2
-; CHECK-T1-NEXT: @ %bb.5:
-; CHECK-T1-NEXT: .LCPI2_0:
-; CHECK-T1-NEXT: .long 32767 @ 0x7fff
-; CHECK-T1-NEXT: .LCPI2_1:
-; CHECK-T1-NEXT: .long 4294934528 @ 0xffff8000
+; CHECK-T16-LABEL: func16:
+; CHECK-T16: @ %bb.0:
+; CHECK-T16-NEXT: adds r0, r0, r1
+; CHECK-T16-NEXT: ldr r1, .LCPI2_0
+; CHECK-T16-NEXT: cmp r0, r1
+; CHECK-T16-NEXT: blt .LBB2_2
+; CHECK-T16-NEXT: @ %bb.1:
+; CHECK-T16-NEXT: mov r0, r1
+; CHECK-T16-NEXT: .LBB2_2:
+; CHECK-T16-NEXT: ldr r1, .LCPI2_1
+; CHECK-T16-NEXT: cmp r0, r1
+; CHECK-T16-NEXT: bgt .LBB2_4
+; CHECK-T16-NEXT: @ %bb.3:
+; CHECK-T16-NEXT: mov r0, r1
+; CHECK-T16-NEXT: .LBB2_4:
+; CHECK-T16-NEXT: bx lr
+; CHECK-T16-NEXT: .p2align 2
+; CHECK-T16-NEXT: @ %bb.5:
+; CHECK-T16-NEXT: .LCPI2_0:
+; CHECK-T16-NEXT: .long 32767 @ 0x7fff
+; CHECK-T16-NEXT: .LCPI2_1:
+; CHECK-T16-NEXT: .long 4294934528 @ 0xffff8000
;
; CHECK-T2NODSP-LABEL: func16:
; CHECK-T2NODSP: @ %bb.0:
@@ -210,6 +209,29 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
; CHECK-ARMBASEDSP-NEXT: asr r0, r0, #16
; CHECK-ARMBASEDSP-NEXT: bx lr
;
+; CHECK-T15TE-LABEL: func16:
+; CHECK-T15TE: @ %bb.0:
+; CHECK-T15TE-NEXT: adds r0, r0, r1
+; CHECK-T15TE-NEXT: ldr r1, .LCPI2_0
+; CHECK-T15TE-NEXT: cmp r0, r1
+; CHECK-T15TE-NEXT: blt .LBB2_2
+; CHECK-T15TE-NEXT: @ %bb.1:
+; CHECK-T15TE-NEXT: movs r0, r1
+; CHECK-T15TE-NEXT: .LBB2_2:
+; CHECK-T15TE-NEXT: ldr r1, .LCPI2_1
+; CHECK-T15TE-NEXT: cmp r0, r1
+; CHECK-T15TE-NEXT: bgt .LBB2_4
+; CHECK-T15TE-NEXT: @ %bb.3:
+; CHECK-T15TE-NEXT: movs r0, r1
+; CHECK-T15TE-NEXT: .LBB2_4:
+; CHECK-T15TE-NEXT: bx lr
+; CHECK-T15TE-NEXT: .p2align 2
+; CHECK-T15TE-NEXT: @ %bb.5:
+; CHECK-T15TE-NEXT: .LCPI2_0:
+; CHECK-T15TE-NEXT: .long 32767 @ 0x7fff
+; CHECK-T15TE-NEXT: .LCPI2_1:
+; CHECK-T15TE-NEXT: .long 4294934528 @ 0xffff8000
+;
; CHECK-ARMDSP-LABEL: func16:
; CHECK-ARMDSP: @ %bb.0:
; CHECK-ARMDSP-NEXT: qadd16 r0, r0, r1
@@ -220,22 +242,22 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
}
define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
-; CHECK-T1-LABEL: func8:
-; CHECK-T1: @ %bb.0:
-; CHECK-T1-NEXT: adds r0, r0, r1
-; CHECK-T1-NEXT: movs r1, #127
-; CHECK-T1-NEXT: cmp r0, #127
-; CHECK-T1-NEXT: blt .LBB3_2
-; CHECK-T1-NEXT: @ %bb.1:
-; CHECK-T1-NEXT: {{movs|mov}} r0, r1
-; CHECK-T1-NEXT: .LBB3_2:
-; CHECK-T1-NEXT: mvns r1, r1
-; CHECK-T1-NEXT: cmp r0, r1
-; CHECK-T1-NEXT: bgt .LBB3_4
-; CHECK-T1-NEXT: @ %bb.3:
-; CHECK-T1-NEXT: {{movs|mov}} r0, r1
-; CHECK-T1-NEXT: .LBB3_4:
-; CHECK-T1-NEXT: bx lr
+; CHECK-T16-LABEL: func8:
+; CHECK-T16: @ %bb.0:
+; CHECK-T16-NEXT: adds r0, r0, r1
+; CHECK-T16-NEXT: movs r1, #127
+; CHECK-T16-NEXT: cmp r0, #127
+; CHECK-T16-NEXT: blt .LBB3_2
+; CHECK-T16-NEXT: @ %bb.1:
+; CHECK-T16-NEXT: mov r0, r1
+; CHECK-T16-NEXT: .LBB3_2:
+; CHECK-T16-NEXT: mvns r1, r1
+; CHECK-T16-NEXT: cmp r0, r1
+; CHECK-T16-NEXT: bgt .LBB3_4
+; CHECK-T16-NEXT: @ %bb.3:
+; CHECK-T16-NEXT: mov r0, r1
+; CHECK-T16-NEXT: .LBB3_4:
+; CHECK-T16-NEXT: bx lr
;
; CHECK-T2NODSP-LABEL: func8:
; CHECK-T2NODSP: @ %bb.0:
@@ -266,6 +288,23 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
; CHECK-ARMBASEDSP-NEXT: asr r0, r0, #24
; CHECK-ARMBASEDSP-NEXT: bx lr
;
+; CHECK-T15TE-LABEL: func8:
+; CHECK-T15TE: @ %bb.0:
+; CHECK-T15TE-NEXT: adds r0, r0, r1
+; CHECK-T15TE-NEXT: movs r1, #127
+; CHECK-T15TE-NEXT: cmp r0, #127
+; CHECK-T15TE-NEXT: blt .LBB3_2
+; CHECK-T15TE-NEXT: @ %bb.1:
+; CHECK-T15TE-NEXT: movs r0, r1
+; CHECK-T15TE-NEXT: .LBB3_2:
+; CHECK-T15TE-NEXT: mvns r1, r1
+; CHECK-T15TE-NEXT: cmp r0, r1
+; CHECK-T15TE-NEXT: bgt .LBB3_4
+; CHECK-T15TE-NEXT: @ %bb.3:
+; CHECK-T15TE-NEXT: movs r0, r1
+; CHECK-T15TE-NEXT: .LBB3_4:
+; CHECK-T15TE-NEXT: bx lr
+;
; CHECK-ARMDSP-LABEL: func8:
; CHECK-ARMDSP: @ %bb.0:
; CHECK-ARMDSP-NEXT: qadd8 r0, r0, r1
@@ -276,22 +315,22 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
}
define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
-; CHECK-T1-LABEL: func3:
-; CHECK-T1: @ %bb.0:
-; CHECK-T1-NEXT: adds r0, r0, r1
-; CHECK-T1-NEXT: movs r1, #7
-; CHECK-T1-NEXT: cmp r0, #7
-; CHECK-T1-NEXT: blt .LBB4_2
-; CHECK-T1-NEXT: @ %bb.1:
-; CHECK-T1-NEXT: {{movs|mov}} r0, r1
-; CHECK-T1-NEXT: .LBB4_2:
-; CHECK-T1-NEXT: mvns r1, r1
-; CHECK-T1-NEXT: cmp r0, r1
-; CHECK-T1-NEXT: bgt .LBB4_4
-; CHECK-T1-NEXT: @ %bb.3:
-; CHECK-T1-NEXT: {{movs|mov}} r0, r1
-; CHECK-T1-NEXT: .LBB4_4:
-; CHECK-T1-NEXT: bx lr
+; CHECK-T16-LABEL: func3:
+; CHECK-T16: @ %bb.0:
+; CHECK-T16-NEXT: adds r0, r0, r1
+; CHECK-T16-NEXT: movs r1, #7
+; CHECK-T16-NEXT: cmp r0, #7
+; CHECK-T16-NEXT: blt .LBB4_2
+; CHECK-T16-NEXT: @ %bb.1:
+; CHECK-T16-NEXT: mov r0, r1
+; CHECK-T16-NEXT: .LBB4_2:
+; CHECK-T16-NEXT: mvns r1, r1
+; CHECK-T16-NEXT: cmp r0, r1
+; CHECK-T16-NEXT: bgt .LBB4_4
+; CHECK-T16-NEXT: @ %bb.3:
+; CHECK-T16-NEXT: mov r0, r1
+; CHECK-T16-NEXT: .LBB4_4:
+; CHECK-T16-NEXT: bx lr
;
; CHECK-T2NODSP-LABEL: func3:
; CHECK-T2NODSP: @ %bb.0:
@@ -324,6 +363,23 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
; CHECK-ARMBASEDSP-NEXT: asr r0, r0, #28
; CHECK-ARMBASEDSP-NEXT: bx lr
;
+; CHECK-T15TE-LABEL: func3:
+; CHECK-T15TE: @ %bb.0:
+; CHECK-T15TE-NEXT: adds r0, r0, r1
+; CHECK-T15TE-NEXT: movs r1, #7
+; CHECK-T15TE-NEXT: cmp r0, #7
+; CHECK-T15TE-NEXT: blt .LBB4_2
+; CHECK-T15TE-NEXT: @ %bb.1:
+; CHECK-T15TE-NEXT: movs r0, r1
+; CHECK-T15TE-NEXT: .LBB4_2:
+; CHECK-T15TE-NEXT: mvns r1, r1
+; CHECK-T15TE-NEXT: cmp r0, r1
+; CHECK-T15TE-NEXT: bgt .LBB4_4
+; CHECK-T15TE-NEXT: @ %bb.3:
+; CHECK-T15TE-NEXT: movs r0, r1
+; CHECK-T15TE-NEXT: .LBB4_4:
+; CHECK-T15TE-NEXT: bx lr
+;
; CHECK-ARMDSP-LABEL: func3:
; CHECK-ARMDSP: @ %bb.0:
; CHECK-ARMDSP-NEXT: lsl r0, r0, #28
diff --git a/llvm/test/CodeGen/ARM/select_const.ll b/llvm/test/CodeGen/ARM/select_const.ll
index df95af313eac66..b412acb8e56ec8 100644
--- a/llvm/test/CodeGen/ARM/select_const.ll
+++ b/llvm/test/CodeGen/ARM/select_const.ll
@@ -664,9 +664,8 @@ define i64 @opaque_constant1(i1 %cond, i64 %x) {
; THUMB-NEXT: push {r4, r5, r6, r7, lr}
; THUMB-NEXT: movs r7, #1
; THUMB-NEXT: ands r0, r7
+; THUMB-NEXT: movs r4, r0
; THUMB-NEXT: subs r1, r0, #1
-; THUMB-NEXT: mov r12, r0
-; THUMB-NEXT: mov r4, r12
; THUMB-NEXT: sbcs r4, r1
; THUMB-NEXT: cmp r0, #0
; THUMB-NEXT: bne .LBB24_2
@@ -680,9 +679,8 @@ define i64 @opaque_constant1(i1 %cond, i64 %x) {
; THUMB-NEXT: ldr r0, .LCPI24_0
; THUMB-NEXT: ands r5, r0
; THUMB-NEXT: movs r6, #0
+; THUMB-NEXT: movs r1, r4
; THUMB-NEXT: subs r0, r5, #1
-; THUMB-NEXT: mov r12, r4
-; THUMB-NEXT: mov r1, r12
; THUMB-NEXT: sbcs r1, r6
; THUMB-NEXT: eors r3, r7
; THUMB-NEXT: ldr r6, .LCPI24_0
@@ -785,9 +783,8 @@ define i64 @func(i64 %arg) {
; THUMB-NEXT: .save {r4, lr}
; THUMB-NEXT: push {r4, lr}
; THUMB-NEXT: movs r2, #0
+; THUMB-NEXT: movs r3, r1
; THUMB-NEXT: adds r3, r0, #1
-; THUMB-NEXT: mov r12, r1
-; THUMB-NEXT: mov r3, r12
; THUMB-NEXT: adcs r3, r2
; THUMB-NEXT: mov r12, r2
; THUMB-NEXT: mov r3, r12
diff --git a/llvm/test/CodeGen/Thumb/pr35836.ll b/llvm/test/CodeGen/Thumb/pr35836.ll
index ba33a8184bcc71..9278e61a34d3db 100644
--- a/llvm/test/CodeGen/Thumb/pr35836.ll
+++ b/llvm/test/CodeGen/Thumb/pr35836.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s | FileCheck %s
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -5,6 +6,48 @@ target triple = "thumbv5e-none-linux-gnueabi"
; Function Attrs: norecurse nounwind optsize
define void @f(i32,i32,i32,i32,ptr %x4p, ptr %x5p, ptr %x6p) {
+; CHECK-LABEL: f:
+; CHECK: @ %bb.0: @ %if.end
+; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: .pad #24
+; CHECK-NEXT: sub sp, #24
+; CHECK-NEXT: add r4, sp, #8
+; CHECK-NEXT: stm r4!, {r0, r1, r2, r3} @ 16-byte Folded Spill
+; CHECK-NEXT: movs r5, #0
+; CHECK-NEXT: ldr r0, [sp, #52]
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: ldr r0, [sp, #48]
+; CHECK-NEXT: str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT: ldr r7, [sp, #44]
+; CHECK-NEXT: .LBB0_1: @ %while.body
+; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: movs r1, r5
+; CHECK-NEXT: adds r3, r0, r1
+; CHECK-NEXT: adcs r1, r5
+; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: movs r4, r5
+; CHECK-NEXT: adds r2, r0, r2
+; CHECK-NEXT: adcs r4, r5
+; CHECK-NEXT: movs r0, r3
+; CHECK-NEXT: adds r0, r2, r5
+; CHECK-NEXT: adcs r0, r4
+; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: str r0, [r6]
+; CHECK-NEXT: ldr r0, [r7]
+; CHECK-NEXT: ldr r6, [sp] @ 4-byte Reload
+; CHECK-NEXT: ldr r6, [r6]
+; CHECK-NEXT: movs r6, r5
+; CHECK-NEXT: adds r0, r6, r0
+; CHECK-NEXT: adcs r6, r5
+; CHECK-NEXT: adds r2, r2, r5
+; CHECK-NEXT: adcs r4, r3
+; CHECK-NEXT: adcs r0, r1
+; CHECK-NEXT: adcs r6, r5
+; CHECK-NEXT: b .LBB0_1
if.end:
br label %while.body
diff --git a/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll
index 61a741445b81cf..c54e4547243eb3 100644
--- a/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll
@@ -121,9 +121,8 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; CHECK-NEXT: lsrs r0, r3, #1
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: movs r4, #0
+; CHECK-NEXT: movs r0, r3
; CHECK-NEXT: cmp r0, #170
-; CHECK-NEXT: mov r12, r3
-; CHECK-NEXT: mov r0, r12
; CHECK-NEXT: bhi .LBB4_2
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: movs r0, r4
@@ -133,9 +132,8 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; CHECK-NEXT: muls r5, r1, r5
; CHECK-NEXT: movs r1, #73
; CHECK-NEXT: lsls r1, r1, #23
+; CHECK-NEXT: movs r1, r3
; CHECK-NEXT: cmp r5, r1
-; CHECK-NEXT: mov r12, r3
-; CHECK-NEXT: mov r1, r12
; CHECK-NEXT: bhi .LBB4_4
; CHECK-NEXT: @ %bb.3:
; CHECK-NEXT: movs r1, r4
More information about the llvm-commits
mailing list