[llvm] [Thumb1] Resolve FIXME: Use 'mov hi, $src; mov $dst, hi' (PR #81908)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 3 09:09:41 PST 2024
https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/81908
>From be65e795c08879d5e3470c836aebd11b2c6b5557 Mon Sep 17 00:00:00 2001
From: Rose <83477269+AtariDreams at users.noreply.github.com>
Date: Thu, 15 Feb 2024 14:21:56 -0500
Subject: [PATCH] [Thumb1] Resolve FIXME: Use 'mov hi, $src; mov $dst, hi'
Consider the following:
ldr r0, [r4]
ldr r7, [r0, #4]
cmp r7, r3
bhi .LBB0_6
cmp r0, r2
push {r0}
pop {r4}
bne .LBB0_3
movs r0, r6
pop {r4, r5, r6, r7}
pop {r1}
bx r1
Here is a snippet of the generated THUMB1 code of the K&R malloc function that clang currently compiles to.
push {r0} ends up being popped to pop {r4}.
movs r4, r0 would destroy the flags set by cmp right above.
The compiler has no alternative in this case, except one:
the only alternative is to transfer through a high register.
However, it seems like LLVM does not consider that this is a valid approach, even though it is a free clobbering a high register.
This patch addresses the FIXME so the compiler can do that when it can in r10 or r11, or r12.
---
llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 7 ++-
llvm/lib/Target/ARM/Thumb1InstrInfo.cpp | 58 ++++++++++++++++---
llvm/test/CodeGen/ARM/sadd_sat.ll | 4 +-
llvm/test/CodeGen/ARM/select_const.ll | 16 ++---
llvm/test/CodeGen/ARM/wide-compares.ll | 8 +--
llvm/test/CodeGen/Thumb/pr35836.ll | 12 ++--
.../CodeGen/Thumb/urem-seteq-illegal-types.ll | 8 +--
7 files changed, 79 insertions(+), 34 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 6121055eb02176..ed1eeb7844c6c4 100644
--- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -635,9 +635,10 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
// For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
// Compute liveness information for that register to make the decision.
- bool SafeToClobberCPSR = !isThumb1 ||
- (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
- MachineBasicBlock::LQR_Dead);
+ bool SafeToClobberCPSR =
+ !isThumb1 ||
+ (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, MBB.size()) ==
+ MachineBasicBlock::LQR_Dead);
bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index 85eabdb17ad190..5c5f79f39d8867 100644
--- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -12,6 +12,7 @@
#include "Thumb1InstrInfo.h"
#include "ARMSubtarget.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -47,24 +48,67 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
"Thumb1 can only copy GPR registers");
- if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg)
- || !ARM::tGPRRegClass.contains(DestReg))
+ if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg) ||
+ !ARM::tGPRRegClass.contains(DestReg))
BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc))
.add(predOps(ARMCC::AL));
else {
- // FIXME: Can also use 'mov hi, $src; mov $dst, hi',
- // with hi as either r10 or r11.
-
const TargetRegisterInfo *RegInfo = st.getRegisterInfo();
- if (MBB.computeRegisterLiveness(RegInfo, ARM::CPSR, I)
- == MachineBasicBlock::LQR_Dead) {
+
+ LivePhysRegs UsedRegs(*RegInfo);
+ UsedRegs.addLiveOuts(MBB);
+
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ UsedRegs.addReg(CSRegs[i]);
+
+ auto InstUpToI = MBB.end();
+ while (InstUpToI != I)
+ // The pre-decrement is on purpose here.
+ // We want to have the liveness right before I.
+ UsedRegs.stepBackward(*--InstUpToI);
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (UsedRegs.available(MRI, ARM::CPSR)) {
BuildMI(MBB, I, DL, get(ARM::tMOVSr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc))
->addRegisterDead(ARM::CPSR, RegInfo);
return;
}
+ // Look for a temporary register to use.
+ BitVector GPRsNoLRSP = RegInfo->getAllocatableSet(
+ MF, RegInfo->getRegClass(ARM::hGPRRegClassID));
+
+ // Avoid messing with these, just to be safe
+ // NOTE: lr is caller-save, so we can use it
+ GPRsNoLRSP.reset(ARM::SP);
+ GPRsNoLRSP.reset(ARM::PC);
+
+ Register Reg = ARM::NoRegister;
+
+ // Find the first high-register that is available
+ for (auto RegT : GPRsNoLRSP.set_bits()) {
+ if (UsedRegs.available(MRI, RegT)) {
+ Reg = RegT;
+ break;
+ }
+ }
+
+ // Can also use 'mov hi, $src; mov $dst, hi',
+ // with hi as any of the high registers available
+ if (Reg) {
+ // Use high register to move source to destination
+ BuildMI(MBB, I, DL, get(ARM::tMOVr), Reg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .add(predOps(ARMCC::AL));
+ BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+ .addReg(Reg, RegState::Kill)
+ .add(predOps(ARMCC::AL));
+ return;
+ }
+
// 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it
BuildMI(MBB, I, DL, get(ARM::tPUSH))
.add(predOps(ARMCC::AL))
diff --git a/llvm/test/CodeGen/ARM/sadd_sat.ll b/llvm/test/CodeGen/ARM/sadd_sat.ll
index 1632c4e86c7629..0060b4458081bc 100644
--- a/llvm/test/CodeGen/ARM/sadd_sat.ll
+++ b/llvm/test/CodeGen/ARM/sadd_sat.ll
@@ -130,8 +130,8 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
; CHECK-T15TE-NEXT: bics r4, r1
; CHECK-T15TE-NEXT: asrs r1, r3, #31
; CHECK-T15TE-NEXT: cmp r4, #0
-; CHECK-T15TE-NEXT: push {r1}
-; CHECK-T15TE-NEXT: pop {r0}
+; CHECK-T15TE-NEXT: mov r12, r1
+; CHECK-T15TE-NEXT: mov r0, r12
; CHECK-T15TE-NEXT: bmi .LBB1_2
; CHECK-T15TE-NEXT: @ %bb.1:
; CHECK-T15TE-NEXT: movs r0, r2
diff --git a/llvm/test/CodeGen/ARM/select_const.ll b/llvm/test/CodeGen/ARM/select_const.ll
index e12dd02f16c2fa..dc69c5889ad52f 100644
--- a/llvm/test/CodeGen/ARM/select_const.ll
+++ b/llvm/test/CodeGen/ARM/select_const.ll
@@ -665,8 +665,8 @@ define i64 @opaque_constant1(i1 %cond, i64 %x) {
; THUMB-NEXT: movs r7, #1
; THUMB-NEXT: ands r0, r7
; THUMB-NEXT: subs r1, r0, #1
-; THUMB-NEXT: push {r0}
-; THUMB-NEXT: pop {r4}
+; THUMB-NEXT: mov r12, r0
+; THUMB-NEXT: mov r4, r12
; THUMB-NEXT: sbcs r4, r1
; THUMB-NEXT: cmp r0, #0
; THUMB-NEXT: bne .LBB24_2
@@ -681,8 +681,8 @@ define i64 @opaque_constant1(i1 %cond, i64 %x) {
; THUMB-NEXT: ands r5, r0
; THUMB-NEXT: movs r6, #0
; THUMB-NEXT: subs r0, r5, #1
-; THUMB-NEXT: push {r4}
-; THUMB-NEXT: pop {r1}
+; THUMB-NEXT: mov r12, r4
+; THUMB-NEXT: mov r1, r12
; THUMB-NEXT: sbcs r1, r6
; THUMB-NEXT: eors r3, r7
; THUMB-NEXT: ldr r6, .LCPI24_0
@@ -786,11 +786,11 @@ define i64 @func(i64 %arg) {
; THUMB-NEXT: push {r4, lr}
; THUMB-NEXT: movs r2, #0
; THUMB-NEXT: adds r3, r0, #1
-; THUMB-NEXT: push {r1}
-; THUMB-NEXT: pop {r3}
+; THUMB-NEXT: mov lr, r1
+; THUMB-NEXT: mov r3, lr
; THUMB-NEXT: adcs r3, r2
-; THUMB-NEXT: push {r2}
-; THUMB-NEXT: pop {r3}
+; THUMB-NEXT: mov lr, r2
+; THUMB-NEXT: mov r3, lr
; THUMB-NEXT: adcs r3, r2
; THUMB-NEXT: subs r4, r3, #1
; THUMB-NEXT: adds r0, r0, #1
diff --git a/llvm/test/CodeGen/ARM/wide-compares.ll b/llvm/test/CodeGen/ARM/wide-compares.ll
index 6584f0c7616c52..09e3592b6d420e 100644
--- a/llvm/test/CodeGen/ARM/wide-compares.ll
+++ b/llvm/test/CodeGen/ARM/wide-compares.ll
@@ -257,12 +257,12 @@ define {i32, i32} @test_slt_not(i32 %c, i32 %d, i64 %a, i64 %b) {
; CHECK-THUMB1-NOMOV-NEXT: ldr r5, [sp, #16]
; CHECK-THUMB1-NOMOV-NEXT: subs r2, r2, r5
; CHECK-THUMB1-NOMOV-NEXT: sbcs r3, r0
-; CHECK-THUMB1-NOMOV-NEXT: push {r1}
-; CHECK-THUMB1-NOMOV-NEXT: pop {r0}
+; CHECK-THUMB1-NOMOV-NEXT: mov r12, r1
+; CHECK-THUMB1-NOMOV-NEXT: mov r0, r12
; CHECK-THUMB1-NOMOV-NEXT: blt .LBB3_2
; CHECK-THUMB1-NOMOV-NEXT: @ %bb.1: @ %entry
-; CHECK-THUMB1-NOMOV-NEXT: push {r4}
-; CHECK-THUMB1-NOMOV-NEXT: pop {r0}
+; CHECK-THUMB1-NOMOV-NEXT: mov r12, r4
+; CHECK-THUMB1-NOMOV-NEXT: mov r0, r12
; CHECK-THUMB1-NOMOV-NEXT: .LBB3_2: @ %entry
; CHECK-THUMB1-NOMOV-NEXT: bge .LBB3_4
; CHECK-THUMB1-NOMOV-NEXT: @ %bb.3: @ %entry
diff --git a/llvm/test/CodeGen/Thumb/pr35836.ll b/llvm/test/CodeGen/Thumb/pr35836.ll
index 96a6fe5d142025..ba33a8184bcc71 100644
--- a/llvm/test/CodeGen/Thumb/pr35836.ll
+++ b/llvm/test/CodeGen/Thumb/pr35836.ll
@@ -35,18 +35,18 @@ while.body:
br label %while.body
}
; CHECK: adds r3, r0, r1
-; CHECK: push {r5}
-; CHECK: pop {r1}
+; CHECK: mov r12, r5
+; CHECK: mov r1, r12
; CHECK: adcs r1, r5
; CHECK: ldr r0, [sp, #12] @ 4-byte Reload
; CHECK: ldr r2, [sp, #8] @ 4-byte Reload
; CHECK: adds r2, r0, r2
-; CHECK: push {r5}
-; CHECK: pop {r4}
+; CHECK: mov r12, r5
+; CHECK: mov r4, r12
; CHECK: adcs r4, r5
; CHECK: adds r0, r2, r5
-; CHECK: push {r3}
-; CHECK: pop {r0}
+; CHECK: mov r12, r3
+; CHECK: mov r0, r12
; CHECK: adcs r0, r4
; CHECK: ldr r6, [sp, #4] @ 4-byte Reload
; CHECK: str r0, [r6]
diff --git a/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll
index aa5deb6542b2b0..61a741445b81cf 100644
--- a/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll
@@ -122,8 +122,8 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: cmp r0, #170
-; CHECK-NEXT: push {r3}
-; CHECK-NEXT: pop {r0}
+; CHECK-NEXT: mov r12, r3
+; CHECK-NEXT: mov r0, r12
; CHECK-NEXT: bhi .LBB4_2
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: movs r0, r4
@@ -134,8 +134,8 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; CHECK-NEXT: movs r1, #73
; CHECK-NEXT: lsls r1, r1, #23
; CHECK-NEXT: cmp r5, r1
-; CHECK-NEXT: push {r3}
-; CHECK-NEXT: pop {r1}
+; CHECK-NEXT: mov r12, r3
+; CHECK-NEXT: mov r1, r12
; CHECK-NEXT: bhi .LBB4_4
; CHECK-NEXT: @ %bb.3:
; CHECK-NEXT: movs r1, r4
More information about the llvm-commits
mailing list