[llvm] 8232497 - [ARM][THUMB2] Allow emitting T3 types of add and sub
Diogo Sampaio via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 30 03:04:05 PST 2019
Author: Diogo Sampaio
Date: 2019-12-30T11:03:58Z
New Revision: 8232497c313e28ed277b28733301f2020f225e69
URL: https://github.com/llvm/llvm-project/commit/8232497c313e28ed277b28733301f2020f225e69
DIFF: https://github.com/llvm/llvm-project/commit/8232497c313e28ed277b28733301f2020f225e69.diff
LOG: [ARM][THUMB2] Allow emitting T3 types of add and sub
Summary:
This patch allows to emit thumb2 add and sub
instructions with 12 bit immediates in the
emitT2RegPlusImmediate function.
- Splitting parts of the D70680
Reviewers: eli.friedman, olista01, efriedma
Reviewed By: efriedma
Subscribers: efriedma, kristof.beyls, hiraditya, dmgreen, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D71361
Added:
llvm/test/CodeGen/MIR/ARM/thumb2-sub-sp-t3.mir
Modified:
llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
llvm/test/CodeGen/Thumb2/large-call.ll
llvm/test/CodeGen/Thumb2/mve-stacksplot.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 4a459d2c2fb1..dbb1b27a67af 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -303,50 +303,41 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
continue;
}
+ assert((DestReg != ARM::SP || BaseReg == ARM::SP) &&
+ "Writing to SP, from other register.");
+
+ // Try to use T1, as it smaller
+ if ((DestReg == ARM::SP) && (ThisVal < ((1 << 7) - 1) * 4)) {
+ assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
+ Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg)
+ .addImm(ThisVal / 4)
+ .setMIFlags(MIFlags)
+ .add(predOps(ARMCC::AL));
+ break;
+ }
bool HasCCOut = true;
- if (BaseReg == ARM::SP) {
- // sub sp, sp, #imm7
- if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) {
- assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
- Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
- BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
- .addReg(BaseReg)
- .addImm(ThisVal / 4)
- .setMIFlags(MIFlags)
- .add(predOps(ARMCC::AL));
- NumBytes = 0;
- continue;
- }
-
- // sub rd, sp, so_imm
- Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
- if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
- NumBytes = 0;
- } else {
- // FIXME: Move this to ARMAddressingModes.h?
- unsigned RotAmt = countLeadingZeros(ThisVal);
- ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
- NumBytes &= ~ThisVal;
- assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
- "Bit extraction didn't work?");
- }
+ int ImmIsT2SO = ARM_AM::getT2SOImmVal(ThisVal);
+
+ Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
+ // Prefer T2: sub rd, rn, so_imm | sub sp, sp, so_imm
+ if (ImmIsT2SO != -1) {
+ NumBytes = 0;
+ } else if (ThisVal < 4096) {
+ // Prefer T3 if can make it in a single go: subw rd, rn, imm12 | subw sp,
+ // sp, imm12
+ Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
+ HasCCOut = false;
+ NumBytes = 0;
} else {
- assert(DestReg != ARM::SP && BaseReg != ARM::SP);
- Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
- if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
- NumBytes = 0;
- } else if (ThisVal < 4096) {
- Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
- HasCCOut = false;
- NumBytes = 0;
- } else {
- // FIXME: Move this to ARMAddressingModes.h?
- unsigned RotAmt = countLeadingZeros(ThisVal);
- ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
- NumBytes &= ~ThisVal;
- assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
- "Bit extraction didn't work?");
- }
+ // Use one T2 instruction to reduce NumBytes
+ // FIXME: Move this to ARMAddressingModes.h?
+ unsigned RotAmt = countLeadingZeros(ThisVal);
+ ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+ NumBytes &= ~ThisVal;
+ assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
+ "Bit extraction didn't work?");
}
// Build the new ADD / SUB.
diff --git a/llvm/test/CodeGen/MIR/ARM/thumb2-sub-sp-t3.mir b/llvm/test/CodeGen/MIR/ARM/thumb2-sub-sp-t3.mir
new file mode 100644
index 000000000000..d793bfc21f45
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/ARM/thumb2-sub-sp-t3.mir
@@ -0,0 +1,88 @@
+--- |
+ ; RUN: llc --run-pass=prologepilog -o - %s | FileCheck %s
+ ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_register $r7
+ ; CHECK-NEXT: $sp = frame-setup t2SUBri12 killed $sp, 4008, 14, $noreg
+
+ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumbv7-none-none-eabi"
+ define void @foo() #0 {
+ entry:
+ %v = alloca [4000 x i8], align 1
+ %s = alloca i8*, align 4
+ %0 = bitcast [4000 x i8]* %v to i8*
+ store i8* %0, i8** %s, align 4
+ %1 = load i8*, i8** %s, align 4
+ call void @bar(i8* %1)
+ ret void
+ }
+ declare void @bar(i8*) #1
+ ; Function Attrs: nounwind
+ declare void @llvm.stackprotector(i8*, i8**) #2
+
+ attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv7-a,+d32,+dsp,+fp64,+fpregs,+neon,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv7-a,+d32,+dsp,+fp64,+fpregs,+neon,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #2 = { nounwind }
+
+ !llvm.module.flags = !{!0, !1}
+ !llvm.ident = !{!2}
+
+ !0 = !{i32 1, !"wchar_size", i32 4}
+ !1 = !{i32 1, !"min_enum_size", i32 4}
+ !2 = !{!"clang version 10.0.0 (git at github.com:llvm/llvm-project.git ee219345881bdf2c144d40731f055e7b36bc8bce)"}
+
+...
+---
+name: foo
+alignment: 2
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+registers: []
+liveins: []
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 4
+ adjustsStack: true
+ hasCalls: true
+ stackProtector: ''
+ maxCallFrameSize: 0
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ localFrameSize: 4004
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack:
+ - { id: 0, name: v, type: default, offset: 0, size: 4000, alignment: 1,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: -4000, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 1, name: s, type: default, offset: 0, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: -4004, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+callSites: []
+constants: []
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ renamable $r0 = t2ADDri %stack.0.v, 0, 14, $noreg, $noreg
+ t2STRi12 killed renamable $r0, %stack.1.s, 0, 14, $noreg :: (store 4 into %ir.s)
+ renamable $r0 = t2LDRi12 %stack.1.s, 0, 14, $noreg :: (dereferenceable load 4 from %ir.s)
+ ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp
+ tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp
+ tBX_RET 14, $noreg
+
+...
diff --git a/llvm/test/CodeGen/Thumb2/large-call.ll b/llvm/test/CodeGen/Thumb2/large-call.ll
index f6a5a60ba3c2..9ec622ef6fbb 100644
--- a/llvm/test/CodeGen/Thumb2/large-call.ll
+++ b/llvm/test/CodeGen/Thumb2/large-call.ll
@@ -9,7 +9,7 @@ target triple = "thumbv7-apple-ios0.0.0"
; CHECK: main
; CHECK: vmov.f64
; Adjust SP for the large call
-; CHECK: sub sp,
+; CHECK: subw sp, sp, #3720
; Store to call frame + #8
; CHECK: vstr{{.*\[}}sp, #8]
; Don't clobber that store until the call.
diff --git a/llvm/test/CodeGen/Thumb2/mve-stacksplot.mir b/llvm/test/CodeGen/Thumb2/mve-stacksplot.mir
index dee020aa0e71..ff1229d93ed0 100644
--- a/llvm/test/CodeGen/Thumb2/mve-stacksplot.mir
+++ b/llvm/test/CodeGen/Thumb2/mve-stacksplot.mir
@@ -118,8 +118,7 @@ body: |
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r6, -28
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r5, -32
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r4, -36
- ; CHECK-NEXT: $sp = frame-setup t2SUBri killed $sp, 1216, 14, $noreg, $noreg
- ; CHECK-NEXT: $sp = frame-setup tSUBspi $sp, 1, 14, $noreg
+ ; CHECK-NEXT: $sp = frame-setup t2SUBri12 killed $sp, 1220, 14, $noreg
; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 1256
; CHECK-NEXT: $r0 = IMPLICIT_DEF
; CHECK-NEXT: $r1 = IMPLICIT_DEF
More information about the llvm-commits
mailing list