[llvm] fd69df6 - [ARM] Distribute post-inc for Thumb2 sign/zero extending loads/stores
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 1 06:01:32 PDT 2020
Author: David Green
Date: 2020-08-01T14:01:18+01:00
New Revision: fd69df62ed1091f25ba4749cc5152e9ce2fe3af4
URL: https://github.com/llvm/llvm-project/commit/fd69df62ed1091f25ba4749cc5152e9ce2fe3af4
DIFF: https://github.com/llvm/llvm-project/commit/fd69df62ed1091f25ba4749cc5152e9ce2fe3af4.diff
LOG: [ARM] Distribute post-inc for Thumb2 sign/zero extending loads/stores
This adds sign/zero extending scalar loads/stores to the MVE
instructions added in D77813, allowing us to create up more post-inc
instructions. These are comparatively simple, compared to LDR/STR (which
may be better turned into an LDRD/LDM), but still require some additions
over MVE instructions. Because there are i12 and i8 variants of the
offset loads/stores dealing with different signs, we may need to convert
an i12 address to a i8 negative instruction. t2LDRBi12 can also be
shrunk to a tLDRi under the right conditions, so we need to be careful
with codesize too.
Differential Revision: https://reviews.llvm.org/D78625
Added:
Modified:
llvm/lib/Target/ARM/ARMBaseInstrInfo.h
llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll
llvm/test/CodeGen/Thumb2/postinc-distribute.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 1a75b011ca59..f71445cf59c3 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -829,6 +829,10 @@ inline bool isLegalAddressImm(unsigned Opcode, int Imm,
return std::abs(Imm) < (((1 << 7) * 2) - 1) && Imm % 2 == 0;
case ARMII::AddrModeT2_i7s4:
return std::abs(Imm) < (((1 << 7) * 4) - 1) && Imm % 4 == 0;
+ case ARMII::AddrModeT2_i8:
+ return std::abs(Imm) < (((1 << 8) * 1) - 1);
+ case ARMII::AddrModeT2_i12:
+ return Imm >= 0 && Imm < (((1 << 12) * 1) - 1);
default:
llvm_unreachable("Unhandled Addressing mode");
}
diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index a84d23d3bb96..09bb3b3c6f72 100644
--- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1382,9 +1382,27 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
case ARM::t2LDRi8:
case ARM::t2LDRi12:
return ARM::t2LDR_POST;
+ case ARM::t2LDRBi8:
+ case ARM::t2LDRBi12:
+ return ARM::t2LDRB_POST;
+ case ARM::t2LDRSBi8:
+ case ARM::t2LDRSBi12:
+ return ARM::t2LDRSB_POST;
+ case ARM::t2LDRHi8:
+ case ARM::t2LDRHi12:
+ return ARM::t2LDRH_POST;
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRSHi12:
+ return ARM::t2LDRSH_POST;
case ARM::t2STRi8:
case ARM::t2STRi12:
return ARM::t2STR_POST;
+ case ARM::t2STRBi8:
+ case ARM::t2STRBi12:
+ return ARM::t2STRB_POST;
+ case ARM::t2STRHi8:
+ case ARM::t2STRHi12:
+ return ARM::t2STRH_POST;
case ARM::MVE_VLDRBS16:
return ARM::MVE_VLDRBS16_post;
@@ -2539,11 +2557,94 @@ static int getBaseOperandIndex(MachineInstr &MI) {
case ARM::MVE_VSTRBU8:
case ARM::MVE_VSTRHU16:
case ARM::MVE_VSTRWU32:
+ case ARM::t2LDRHi8:
+ case ARM::t2LDRHi12:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRSHi12:
+ case ARM::t2LDRBi8:
+ case ARM::t2LDRBi12:
+ case ARM::t2LDRSBi8:
+ case ARM::t2LDRSBi12:
+ case ARM::t2STRBi8:
+ case ARM::t2STRBi12:
+ case ARM::t2STRHi8:
+ case ARM::t2STRHi12:
return 1;
}
return -1;
}
+// Given a memory access Opcode, check that the give Imm would be a valid Offset
+// for this instruction (same as isLegalAddressImm), Or if the instruction
+// could be easily converted to one where that was valid. For example converting
+// t2LDRi12 to t2LDRi8 for negative offsets. Works in conjunction with
+// AdjustBaseAndOffset below.
+static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm,
+ const TargetInstrInfo *TII,
+ int &CodesizeEstimate) {
+ if (isLegalAddressImm(Opcode, Imm, TII))
+ return true;
+
+ // We can convert AddrModeT2_i12 to AddrModeT2_i8.
+ const MCInstrDesc &Desc = TII->get(Opcode);
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i12:
+ CodesizeEstimate += 1;
+ return std::abs(Imm) < (((1 << 8) * 1) - 1);
+ }
+ return false;
+}
+
+// Given an MI adjust its address BaseReg to use NewBaseReg and address offset
+// by -Offset. This can either happen in-place or be a replacement as MI is
+// converted to another instruction type.
+static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg,
+ int Offset, const TargetInstrInfo *TII) {
+ unsigned BaseOp = getBaseOperandIndex(*MI);
+ MI->getOperand(BaseOp).setReg(NewBaseReg);
+ int OldOffset = MI->getOperand(BaseOp + 1).getImm();
+ if (isLegalAddressImm(MI->getOpcode(), OldOffset - Offset, TII))
+ MI->getOperand(BaseOp + 1).setImm(OldOffset - Offset);
+ else {
+ unsigned ConvOpcode;
+ switch (MI->getOpcode()) {
+ case ARM::t2LDRHi12:
+ ConvOpcode = ARM::t2LDRHi8;
+ break;
+ case ARM::t2LDRSHi12:
+ ConvOpcode = ARM::t2LDRSHi8;
+ break;
+ case ARM::t2LDRBi12:
+ ConvOpcode = ARM::t2LDRBi8;
+ break;
+ case ARM::t2LDRSBi12:
+ ConvOpcode = ARM::t2LDRSBi8;
+ break;
+ case ARM::t2STRHi12:
+ ConvOpcode = ARM::t2STRHi8;
+ break;
+ case ARM::t2STRBi12:
+ ConvOpcode = ARM::t2STRBi8;
+ break;
+ default:
+ llvm_unreachable("Unhandled convertable opcode");
+ }
+ assert(isLegalAddressImm(ConvOpcode, OldOffset - Offset, TII) &&
+ "Illegal Address Immediate after convert!");
+
+ const MCInstrDesc &MCID = TII->get(ConvOpcode);
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1))
+ .addImm(OldOffset - Offset)
+ .add(MI->getOperand(3))
+ .add(MI->getOperand(4))
+ .cloneMemRefs(*MI);
+ MI->eraseFromParent();
+ }
+}
+
static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset,
Register NewReg,
const TargetInstrInfo *TII,
@@ -2562,14 +2663,43 @@ static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset,
TRC = TII->getRegClass(MCID, 2, TRI, *MF);
MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC);
- return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
- .addReg(NewReg, RegState::Define)
- .add(MI->getOperand(0))
- .add(MI->getOperand(1))
- .addImm(Offset)
- .add(MI->getOperand(3))
- .add(MI->getOperand(4))
- .cloneMemRefs(*MI);
+ unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask);
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i7:
+ case ARMII::AddrModeT2_i7s2:
+ case ARMII::AddrModeT2_i7s4:
+ // Any MVE load/store
+ return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
+ .addReg(NewReg, RegState::Define)
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1))
+ .addImm(Offset)
+ .add(MI->getOperand(3))
+ .add(MI->getOperand(4))
+ .cloneMemRefs(*MI);
+ case ARMII::AddrModeT2_i8:
+ if (MI->mayLoad()) {
+ return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
+ .add(MI->getOperand(0))
+ .addReg(NewReg, RegState::Define)
+ .add(MI->getOperand(1))
+ .addImm(Offset)
+ .add(MI->getOperand(3))
+ .add(MI->getOperand(4))
+ .cloneMemRefs(*MI);
+ } else {
+ return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
+ .addReg(NewReg, RegState::Define)
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1))
+ .addImm(Offset)
+ .add(MI->getOperand(3))
+ .add(MI->getOperand(4))
+ .cloneMemRefs(*MI);
+ }
+ default:
+ llvm_unreachable("Unhandled createPostIncLoadStore");
+ }
}
// Given a Base Register, optimise the load/store uses to attempt to create more
@@ -2589,7 +2719,7 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
// An increment that can be folded in
MachineInstr *Increment = nullptr;
// Other accesses after BaseAccess that will need to be updated to use the
- // postinc value
+ // postinc value.
SmallPtrSet<MachineInstr *, 8> OtherAccesses;
for (auto &Use : MRI->use_nodbg_instructions(Base)) {
if (!Increment && getAddSubImmediate(Use) != 0) {
@@ -2643,14 +2773,20 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
// other offsets after the BaseAccess. We rely on either
// dominates(BaseAccess, OtherAccess) or dominates(OtherAccess, BaseAccess)
// to keep things simple.
+ // This also adds a simple codesize metric, to detect if an instruction (like
+ // t2LDRBi12) which can often be shrunk to a thumb1 instruction (tLDRBi)
+ // cannot because it is converted to something else (t2LDRBi8). We start this
+ // at -1 for the gain from removing the increment.
SmallPtrSet<MachineInstr *, 4> SuccessorAccesses;
+ int CodesizeEstimate = -1;
for (auto *Use : OtherAccesses) {
if (DT->dominates(BaseAccess, Use)) {
SuccessorAccesses.insert(Use);
unsigned BaseOp = getBaseOperandIndex(*Use);
- if (!isLegalAddressImm(
- Use->getOpcode(),
- Use->getOperand(BaseOp + 1).getImm() - IncrementOffset, TII)) {
+ if (!isLegalOrConvertableAddressImm(Use->getOpcode(),
+ Use->getOperand(BaseOp + 1).getImm() -
+ IncrementOffset,
+ TII, CodesizeEstimate)) {
LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on use\n");
return false;
}
@@ -2660,6 +2796,10 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
return false;
}
}
+ if (STI->hasMinSize() && CodesizeEstimate > 0) {
+ LLVM_DEBUG(dbgs() << " Expected to grow instructions under minsize\n");
+ return false;
+ }
// Replace BaseAccess with a post inc
LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump());
@@ -2674,10 +2814,7 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
for (auto *Use : SuccessorAccesses) {
LLVM_DEBUG(dbgs() << "Changing: "; Use->dump());
- unsigned BaseOp = getBaseOperandIndex(*Use);
- Use->getOperand(BaseOp).setReg(NewBaseReg);
- int OldOffset = Use->getOperand(BaseOp + 1).getImm();
- Use->getOperand(BaseOp + 1).setImm(OldOffset - IncrementOffset);
+ AdjustBaseAndOffset(Use, NewBaseReg, IncrementOffset, TII);
LLVM_DEBUG(dbgs() << " To : "; Use->dump());
}
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll
index e69610f9df2a..261222f60f17 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll
@@ -1798,20 +1798,20 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrsh.w r4, [r3, #2]
; CHECK-NEXT: vldr.16 s2, [r2, #2]
-; CHECK-NEXT: ldrsh r5, [r3, #-2]
; CHECK-NEXT: add.w r12, r12, #4
; CHECK-NEXT: vmov s4, r4
-; CHECK-NEXT: ldrsh.w r4, [r3]
+; CHECK-NEXT: ldrsh r4, [r3], #8
; CHECK-NEXT: vcvt.f16.s32 s4, s4
-; CHECK-NEXT: vmov s8, r5
+; CHECK-NEXT: ldrsh r5, [r3, #-10]
; CHECK-NEXT: vmul.f16 s2, s2, s4
-; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vmov s6, r4
-; CHECK-NEXT: ldrsh r4, [r3, #-4]
+; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vcvt.f16.s32 s6, s6
-; CHECK-NEXT: vcvt.f16.s32 s8, s8
+; CHECK-NEXT: ldrsh r4, [r3, #-12]
; CHECK-NEXT: vmul.f16 s4, s4, s6
+; CHECK-NEXT: vmov s8, r5
; CHECK-NEXT: vldr.16 s6, [r2, #-2]
+; CHECK-NEXT: vcvt.f16.s32 s8, s8
; CHECK-NEXT: vmov s10, r4
; CHECK-NEXT: vcvtb.f32.f16 s4, s4
; CHECK-NEXT: vmul.f16 s6, s6, s8
@@ -1821,9 +1821,8 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n
; CHECK-NEXT: vmul.f16 s8, s8, s10
; CHECK-NEXT: vcvtb.f32.f16 s2, s2
; CHECK-NEXT: vcvtb.f32.f16 s8, s8
-; CHECK-NEXT: adds r3, #8
-; CHECK-NEXT: vadd.f32 s0, s0, s8
; CHECK-NEXT: adds r2, #8
+; CHECK-NEXT: vadd.f32 s0, s0, s8
; CHECK-NEXT: vadd.f32 s0, s0, s6
; CHECK-NEXT: vadd.f32 s0, s0, s4
; CHECK-NEXT: vadd.f32 s0, s0, s2
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
index 428c703dd341..69039f9a4eaa 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
@@ -437,17 +437,15 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly
; CHECK-NEXT: smlabb r7, r7, r8, r2
; CHECK-NEXT: str r7, [r4, #-8]
; CHECK-NEXT: ldrb r8, [r5, #-2]
-; CHECK-NEXT: ldrb r7, [r6]
+; CHECK-NEXT: ldrb r7, [r6], #4
; CHECK-NEXT: smlabb r7, r7, r8, r2
; CHECK-NEXT: str r7, [r4, #-4]
; CHECK-NEXT: ldrb r8, [r5, #-1]
-; CHECK-NEXT: ldrb r7, [r6, #1]
+; CHECK-NEXT: ldrb r7, [r6, #-3]
; CHECK-NEXT: smlabb r7, r7, r8, r2
; CHECK-NEXT: str r7, [r4]
-; CHECK-NEXT: ldrb.w r8, [r5]
-; CHECK-NEXT: adds r5, #4
-; CHECK-NEXT: ldrb r7, [r6, #2]
-; CHECK-NEXT: adds r6, #4
+; CHECK-NEXT: ldrb r8, [r5], #4
+; CHECK-NEXT: ldrb r7, [r6, #-2]
; CHECK-NEXT: smlabb r7, r7, r8, r2
; CHECK-NEXT: str r7, [r4, #4]
; CHECK-NEXT: adds r4, #16
@@ -740,17 +738,15 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl
; CHECK-NEXT: smlabb r7, r7, r8, r2
; CHECK-NEXT: str r7, [r4, #-8]
; CHECK-NEXT: ldrb r8, [r5, #-2]
-; CHECK-NEXT: ldrb r7, [r6]
+; CHECK-NEXT: ldrb r7, [r6], #4
; CHECK-NEXT: smlabb r7, r7, r8, r2
; CHECK-NEXT: str r7, [r4, #-4]
; CHECK-NEXT: ldrb r8, [r5, #-1]
-; CHECK-NEXT: ldrb r7, [r6, #1]
+; CHECK-NEXT: ldrb r7, [r6, #-3]
; CHECK-NEXT: smlabb r7, r7, r8, r2
; CHECK-NEXT: str r7, [r4]
-; CHECK-NEXT: ldrb.w r8, [r5]
-; CHECK-NEXT: adds r5, #4
-; CHECK-NEXT: ldrb r7, [r6, #2]
-; CHECK-NEXT: adds r6, #4
+; CHECK-NEXT: ldrb r8, [r5], #4
+; CHECK-NEXT: ldrb r7, [r6, #-2]
; CHECK-NEXT: smlabb r7, r7, r8, r2
; CHECK-NEXT: str r7, [r4, #4]
; CHECK-NEXT: adds r4, #16
diff --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
index e8ab7792b6df..1a5356138813 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
@@ -1147,31 +1147,30 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, half* noca
; CHECK-NEXT: .LBB16_6: @ %for.body
; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
-; CHECK-NEXT: ldrh r0, [r6]
+; CHECK-NEXT: ldrh r0, [r6], #16
; CHECK-NEXT: vldrw.u32 q1, [r5]
; CHECK-NEXT: adds r1, r5, #2
; CHECK-NEXT: vfma.f16 q0, q1, r0
; CHECK-NEXT: vldrw.u32 q1, [r1]
-; CHECK-NEXT: ldrh r0, [r6, #2]
+; CHECK-NEXT: ldrh r0, [r6, #-14]
; CHECK-NEXT: adds r1, r5, #6
; CHECK-NEXT: vfma.f16 q0, q1, r0
-; CHECK-NEXT: ldrh r0, [r6, #4]
+; CHECK-NEXT: ldrh r0, [r6, #-12]
; CHECK-NEXT: vldrw.u32 q1, [r5, #4]
; CHECK-NEXT: vfma.f16 q0, q1, r0
; CHECK-NEXT: vldrw.u32 q1, [r1]
-; CHECK-NEXT: ldrh r0, [r6, #6]
+; CHECK-NEXT: ldrh r0, [r6, #-10]
; CHECK-NEXT: add.w r1, r5, #10
; CHECK-NEXT: vfma.f16 q0, q1, r0
-; CHECK-NEXT: ldrh r0, [r6, #8]
+; CHECK-NEXT: ldrh r0, [r6, #-8]
; CHECK-NEXT: vldrw.u32 q1, [r5, #8]
; CHECK-NEXT: vfma.f16 q0, q1, r0
; CHECK-NEXT: vldrw.u32 q1, [r1]
-; CHECK-NEXT: ldrh r0, [r6, #10]
-; CHECK-NEXT: ldrh r1, [r6, #14]
+; CHECK-NEXT: ldrh r0, [r6, #-6]
+; CHECK-NEXT: ldrh r1, [r6, #-2]
; CHECK-NEXT: vfma.f16 q0, q1, r0
-; CHECK-NEXT: ldrh r0, [r6, #12]
+; CHECK-NEXT: ldrh r0, [r6, #-4]
; CHECK-NEXT: vldrw.u32 q1, [r5, #12]
-; CHECK-NEXT: adds r6, #16
; CHECK-NEXT: vfma.f16 q0, q1, r0
; CHECK-NEXT: add.w r0, r5, #14
; CHECK-NEXT: vldrw.u32 q1, [r0]
diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll
index fe9e7d197ca2..2db5bf59ecfa 100644
--- a/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll
@@ -106,14 +106,12 @@ define void @arm_cmplx_dot_prod_q15(i16* nocapture readonly %pSrcA, i16* nocaptu
; CHECK-NEXT: wls lr, lr, .LBB1_7
; CHECK-NEXT: .LBB1_5: @ %while.body11
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldrsh.w r5, [r0, #2]
-; CHECK-NEXT: ldrsh.w r6, [r1]
-; CHECK-NEXT: ldrsh.w r9, [r0]
-; CHECK-NEXT: adds r0, #4
-; CHECK-NEXT: ldrsh.w r2, [r1, #2]
-; CHECK-NEXT: adds r1, #4
-; CHECK-NEXT: smlalbb r4, r11, r6, r5
+; CHECK-NEXT: ldrsh r9, [r0], #4
+; CHECK-NEXT: ldrsh r6, [r1], #4
+; CHECK-NEXT: ldrsh r5, [r0, #-2]
+; CHECK-NEXT: ldrsh r2, [r1, #-2]
; CHECK-NEXT: smlalbb r12, r7, r6, r9
+; CHECK-NEXT: smlalbb r4, r11, r6, r5
; CHECK-NEXT: muls r5, r2, r5
; CHECK-NEXT: smlalbb r4, r11, r2, r9
; CHECK-NEXT: subs.w r12, r12, r5
diff --git a/llvm/test/CodeGen/Thumb2/postinc-distribute.mir b/llvm/test/CodeGen/Thumb2/postinc-distribute.mir
index af39cced110a..d08ac4754c70 100644
--- a/llvm/test/CodeGen/Thumb2/postinc-distribute.mir
+++ b/llvm/test/CodeGen/Thumb2/postinc-distribute.mir
@@ -12,6 +12,8 @@
define i32* @t2STRBi12(i32* %x, i32 %y) { unreachable }
define i32* @storedadd(i32* %x, i32 %y) { unreachable }
+ define i32* @minsize2(i32* %x, i32 %y) minsize optsize { unreachable }
+ define i32* @minsize3(i32* %x, i32 %y) minsize optsize { unreachable }
...
---
@@ -57,9 +59,8 @@ body: |
; CHECK-LABEL: name: t2LDRHi12
; CHECK: liveins: $r0
; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
- ; CHECK: [[t2LDRHi12_:%[0-9]+]]:rgpr = t2LDRHi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4)
- ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg
- ; CHECK: $r0 = COPY [[t2ADDri]]
+ ; CHECK: [[t2LDRH_POST:%[0-9]+]]:rgpr, [[t2LDRH_POST1:%[0-9]+]]:rgpr = t2LDRH_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4)
+ ; CHECK: $r0 = COPY [[t2LDRH_POST1]]
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
%0:gprnopc = COPY $r0
%1:rgpr = t2LDRHi12 %0, 0, 14, $noreg :: (load 4, align 4)
@@ -84,9 +85,8 @@ body: |
; CHECK-LABEL: name: t2LDRSHi12
; CHECK: liveins: $r0
; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
- ; CHECK: [[t2LDRSHi12_:%[0-9]+]]:rgpr = t2LDRSHi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4)
- ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg
- ; CHECK: $r0 = COPY [[t2ADDri]]
+ ; CHECK: [[t2LDRSH_POST:%[0-9]+]]:rgpr, [[t2LDRSH_POST1:%[0-9]+]]:rgpr = t2LDRSH_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4)
+ ; CHECK: $r0 = COPY [[t2LDRSH_POST1]]
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
%0:gprnopc = COPY $r0
%1:rgpr = t2LDRSHi12 %0, 0, 14, $noreg :: (load 4, align 4)
@@ -111,9 +111,8 @@ body: |
; CHECK-LABEL: name: t2LDRBi12
; CHECK: liveins: $r0
; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
- ; CHECK: [[t2LDRBi12_:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4)
- ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg
- ; CHECK: $r0 = COPY [[t2ADDri]]
+ ; CHECK: [[t2LDRB_POST:%[0-9]+]]:rgpr, [[t2LDRB_POST1:%[0-9]+]]:rgpr = t2LDRB_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4)
+ ; CHECK: $r0 = COPY [[t2LDRB_POST1]]
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
%0:gprnopc = COPY $r0
%1:rgpr = t2LDRBi12 %0, 0, 14, $noreg :: (load 4, align 4)
@@ -138,9 +137,8 @@ body: |
; CHECK-LABEL: name: t2LDRSBi12
; CHECK: liveins: $r0
; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
- ; CHECK: [[t2LDRSBi12_:%[0-9]+]]:rgpr = t2LDRSBi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4)
- ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg
- ; CHECK: $r0 = COPY [[t2ADDri]]
+ ; CHECK: [[t2LDRSB_POST:%[0-9]+]]:rgpr, [[t2LDRSB_POST1:%[0-9]+]]:rgpr = t2LDRSB_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4)
+ ; CHECK: $r0 = COPY [[t2LDRSB_POST1]]
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
%0:gprnopc = COPY $r0
%1:rgpr = t2LDRSBi12 %0, 0, 14, $noreg :: (load 4, align 4)
@@ -197,9 +195,8 @@ body: |
; CHECK: liveins: $r0, $r1
; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r1
- ; CHECK: t2STRHi12 [[COPY1]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 4)
- ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg
- ; CHECK: $r0 = COPY [[t2ADDri]]
+ ; CHECK: early-clobber %2:rgpr = t2STRH_POST [[COPY1]], [[COPY]], 32, 14 /* CC::al */, $noreg :: (store 4)
+ ; CHECK: $r0 = COPY %2
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
%0:gprnopc = COPY $r0
%1:rgpr = COPY $r1
@@ -227,9 +224,8 @@ body: |
; CHECK: liveins: $r0, $r1
; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r1
- ; CHECK: t2STRBi12 [[COPY1]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 4)
- ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg
- ; CHECK: $r0 = COPY [[t2ADDri]]
+ ; CHECK: early-clobber %2:rgpr = t2STRB_POST [[COPY1]], [[COPY]], 32, 14 /* CC::al */, $noreg :: (store 4)
+ ; CHECK: $r0 = COPY %2
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
%0:gprnopc = COPY $r0
%1:rgpr = COPY $r1
@@ -265,3 +261,65 @@ body: |
tBX_RET 14, $noreg, implicit $r0
...
+---
+name: minsize2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gprnopc, preferred-register: '' }
+ - { id: 1, class: rgpr, preferred-register: '' }
+ - { id: 2, class: rgpr, preferred-register: '' }
+ - { id: 3, class: rgpr, preferred-register: '' }
+liveins:
+ - { reg: '$r0', virtual-reg: '%0' }
+body: |
+ bb.0:
+ liveins: $r0
+
+ ; CHECK-LABEL: name: minsize2
+ ; CHECK: liveins: $r0
+ ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
+ ; CHECK: [[t2LDRB_POST:%[0-9]+]]:rgpr, [[t2LDRB_POST1:%[0-9]+]]:rgpr = t2LDRB_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4)
+ ; CHECK: [[t2LDRBi8_:%[0-9]+]]:rgpr = t2LDRBi8 [[t2LDRB_POST1]], -30, 14 /* CC::al */, $noreg :: (load 4)
+ ; CHECK: $r0 = COPY [[t2LDRB_POST1]]
+ ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
+ %0:gprnopc = COPY $r0
+ %1:rgpr = t2LDRBi12 %0, 0, 14, $noreg :: (load 4, align 4)
+ %3:rgpr = t2LDRBi12 %0, 2, 14, $noreg :: (load 4, align 4)
+ %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg
+ $r0 = COPY %2
+ tBX_RET 14, $noreg, implicit $r0
+
+...
+---
+name: minsize3
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gprnopc, preferred-register: '' }
+ - { id: 1, class: rgpr, preferred-register: '' }
+ - { id: 2, class: rgpr, preferred-register: '' }
+ - { id: 3, class: rgpr, preferred-register: '' }
+ - { id: 4, class: rgpr, preferred-register: '' }
+liveins:
+ - { reg: '$r0', virtual-reg: '%0' }
+body: |
+ bb.0:
+ liveins: $r0
+
+ ; CHECK-LABEL: name: minsize3
+ ; CHECK: liveins: $r0
+ ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
+ ; CHECK: [[t2LDRBi12_:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4)
+ ; CHECK: [[t2LDRBi12_1:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 2, 14 /* CC::al */, $noreg :: (load 4)
+ ; CHECK: [[t2LDRBi12_2:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 4, 14 /* CC::al */, $noreg :: (load 4)
+ ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK: $r0 = COPY [[t2ADDri]]
+ ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
+ %0:gprnopc = COPY $r0
+ %1:rgpr = t2LDRBi12 %0, 0, 14, $noreg :: (load 4, align 4)
+ %3:rgpr = t2LDRBi12 %0, 2, 14, $noreg :: (load 4, align 4)
+ %4:rgpr = t2LDRBi12 %0, 4, 14, $noreg :: (load 4, align 4)
+ %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg
+ $r0 = COPY %2
+ tBX_RET 14, $noreg, implicit $r0
+
+...
More information about the llvm-commits
mailing list