[llvm] 3b34b06 - [ARM] Expand the range of allowed post-incs in load/store optimizer
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 18 06:59:41 PST 2021
Author: David Green
Date: 2021-02-18T14:59:02Z
New Revision: 3b34b06fc5908b4f7dc720c0655d5756bd8e2a28
URL: https://github.com/llvm/llvm-project/commit/3b34b06fc5908b4f7dc720c0655d5756bd8e2a28
DIFF: https://github.com/llvm/llvm-project/commit/3b34b06fc5908b4f7dc720c0655d5756bd8e2a28.diff
LOG: [ARM] Expand the range of allowed post-incs in load/store optimizer
Currently the load/store optimizer will only fold in increments of the
same size as the load/store. This patch expands that to any legal
immediate for the post-inc instruction.
Differential Revision: https://reviews.llvm.org/D95885
Added:
Modified:
llvm/lib/Target/ARM/ARMBaseInstrInfo.h
llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll
llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 1b843c428130..672163f569a2 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -886,10 +886,13 @@ inline bool isLegalAddressImm(unsigned Opcode, int Imm,
return std::abs(Imm) < (((1 << 7) * 2) - 1) && Imm % 2 == 0;
case ARMII::AddrModeT2_i7s4:
return std::abs(Imm) < (((1 << 7) * 4) - 1) && Imm % 4 == 0;
+ case ARMII::AddrMode2:
case ARMII::AddrModeT2_i8:
return std::abs(Imm) < (((1 << 8) * 1) - 1);
case ARMII::AddrModeT2_i12:
return Imm >= 0 && Imm < (((1 << 12) * 1) - 1);
+ case ARMII::AddrModeT2_i8s4:
+ return std::abs(Imm) < (((1 << 8) * 4) - 1) && Imm % 4 == 0;
default:
llvm_unreachable("Unhandled Addressing mode");
}
diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 5fe61809f31b..bf6f7752b40e 100644
--- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1502,12 +1502,16 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
} else {
MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
- if (Offset == Bytes) {
- NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
- } else if (!isAM5 && Offset == -Bytes) {
- NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
- } else
+ if (MergeInstr == MBB.end())
return false;
+
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
+ if ((isAM5 && Offset != Bytes) ||
+ (!isAM5 && !isLegalAddressImm(NewOpc, Offset, TII))) {
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
+ if (isAM5 || !isLegalAddressImm(NewOpc, Offset, TII))
+ return false;
+ }
}
LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
MBB.erase(MergeInstr);
@@ -1546,7 +1550,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
(void)MIB;
LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
} else {
- int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+ int Imm = ARM_AM::getAM2Opc(AddSub, abs(Offset), ARM_AM::no_shift);
auto MIB =
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
@@ -1576,7 +1580,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
// the vestigal zero-reg offset register. When that's fixed, this clause
// can be removed entirely.
if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
- int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+ int Imm = ARM_AM::getAM2Opc(AddSub, abs(Offset), ARM_AM::no_shift);
// STR_PRE, STR_POST
auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
@@ -1629,13 +1633,14 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
PredReg, Offset);
unsigned NewOpc;
- if (Offset == 8 || Offset == -8) {
+ if (Offset != 0) {
NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
} else {
MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
- if (Offset == 8 || Offset == -8) {
- NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
- } else
+ if (MergeInstr == MBB.end())
+ return false;
+ NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
+ if (!isLegalAddressImm(NewOpc, Offset, TII))
return false;
}
LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
diff --git a/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll b/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll
index b5c63af5a348..ac0e5b76b6b0 100644
--- a/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll
@@ -1190,11 +1190,10 @@ define i32 @callVariadicFunc(i32 %cond, i32 %N) "frame-pointer"="all" {
; THUMB-ENABLE-NEXT: @ %bb.1: @ %if.then
; THUMB-ENABLE-NEXT: push {r7, lr}
; THUMB-ENABLE-NEXT: mov r7, sp
-; THUMB-ENABLE-NEXT: sub sp, #12
+; THUMB-ENABLE-NEXT: strd r1, r1, [sp, #-12]!
; THUMB-ENABLE-NEXT: mov r0, r1
; THUMB-ENABLE-NEXT: mov r2, r1
; THUMB-ENABLE-NEXT: mov r3, r1
-; THUMB-ENABLE-NEXT: strd r1, r1, [sp]
; THUMB-ENABLE-NEXT: str r1, [sp, #8]
; THUMB-ENABLE-NEXT: bl _someVariadicFunc
; THUMB-ENABLE-NEXT: lsls r0, r0, #3
diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
index 7e4603e4b4c6..44a152b32c0b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
@@ -1715,7 +1715,7 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_df1_f32(%struct.arm_biquad_casd_
; CHECK-NEXT: vmov r3, s10
; CHECK-NEXT: vldrw.u32 q3, [r11, #48]
; CHECK-NEXT: vfma.f32 q1, q0, r3
-; CHECK-NEXT: ldr r3, [r1]
+; CHECK-NEXT: ldr r3, [r1], #16
; CHECK-NEXT: vfma.f32 q1, q7, r6
; CHECK-NEXT: vldrw.u32 q6, [r11, #64]
; CHECK-NEXT: vfma.f32 q1, q3, r3
@@ -1725,7 +1725,6 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_df1_f32(%struct.arm_biquad_casd_
; CHECK-NEXT: vfma.f32 q1, q5, r0
; CHECK-NEXT: vldrw.u32 q0, [sp, #64] @ 16-byte Reload
; CHECK-NEXT: vfma.f32 q1, q4, r7
-; CHECK-NEXT: adds r1, #16
; CHECK-NEXT: vfma.f32 q1, q0, r9
; CHECK-NEXT: vmov.f32 s2, s8
; CHECK-NEXT: vstrb.8 q1, [r5], #16
More information about the llvm-commits
mailing list