[llvm] 8232497 - [ARM][THUMB2] Allow emitting T3 types of add and sub

Mon Dec 30 03:04:05 PST 2019

Author: Diogo Sampaio
Date: 2019-12-30T11:03:58Z
New Revision: 8232497c313e28ed277b28733301f2020f225e69

URL: https://github.com/llvm/llvm-project/commit/8232497c313e28ed277b28733301f2020f225e69
DIFF: https://github.com/llvm/llvm-project/commit/8232497c313e28ed277b28733301f2020f225e69.diff

LOG: [ARM][THUMB2] Allow emitting T3 types of add and sub

Summary:
This patch allows to emit thumb2 add and sub
instructions with 12 bit immediates in the
emitT2RegPlusImmediate function.
- Splitting parts of the D70680

Reviewers: eli.friedman, olista01, efriedma

Reviewed By: efriedma

Subscribers: efriedma, kristof.beyls, hiraditya, dmgreen, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D71361

Added: 
    llvm/test/CodeGen/MIR/ARM/thumb2-sub-sp-t3.mir

Modified: 
    llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
    llvm/test/CodeGen/Thumb2/large-call.ll
    llvm/test/CodeGen/Thumb2/mve-stacksplot.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 4a459d2c2fb1..dbb1b27a67af 100644

--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -303,50 +303,41 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
       continue;
     }
 
+    assert((DestReg != ARM::SP || BaseReg == ARM::SP) &&
+           "Writing to SP, from other register.");
+
+    // Try to use T1, as it smaller
+    if ((DestReg == ARM::SP) && (ThisVal < ((1 << 7) - 1) * 4)) {
+      assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
+      Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+      BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+          .addReg(BaseReg)
+          .addImm(ThisVal / 4)
+          .setMIFlags(MIFlags)
+          .add(predOps(ARMCC::AL));
+      break;
+    }
     bool HasCCOut = true;
-    if (BaseReg == ARM::SP) {
-      // sub sp, sp, #imm7
-      if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) {
-        assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
-        Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
-        BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
-            .addReg(BaseReg)
-            .addImm(ThisVal / 4)
-            .setMIFlags(MIFlags)
-            .add(predOps(ARMCC::AL));
-        NumBytes = 0;
-        continue;
-      }
-
-      // sub rd, sp, so_imm
-      Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
-      if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
-        NumBytes = 0;
-      } else {
-        // FIXME: Move this to ARMAddressingModes.h?
-        unsigned RotAmt = countLeadingZeros(ThisVal);
-        ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
-        NumBytes &= ~ThisVal;
-        assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
-               "Bit extraction didn't work?");
-      }
+    int ImmIsT2SO = ARM_AM::getT2SOImmVal(ThisVal);
+
+    Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
+    // Prefer T2: sub rd, rn, so_imm | sub sp, sp, so_imm
+    if (ImmIsT2SO != -1) {
+      NumBytes = 0;
+    } else if (ThisVal < 4096) {
+      // Prefer T3 if can make it in a single go: subw rd, rn, imm12 | subw sp,
+      // sp, imm12
+      Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
+      HasCCOut = false;
+      NumBytes = 0;
     } else {
-      assert(DestReg != ARM::SP && BaseReg != ARM::SP);
-      Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
-      if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
-        NumBytes = 0;
-      } else if (ThisVal < 4096) {
-        Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
-        HasCCOut = false;
-        NumBytes = 0;
-      } else {
-        // FIXME: Move this to ARMAddressingModes.h?
-        unsigned RotAmt = countLeadingZeros(ThisVal);
-        ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
-        NumBytes &= ~ThisVal;
-        assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
-               "Bit extraction didn't work?");
-      }
+      // Use one T2 instruction to reduce NumBytes
+      // FIXME: Move this to ARMAddressingModes.h?
+      unsigned RotAmt = countLeadingZeros(ThisVal);
+      ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+      NumBytes &= ~ThisVal;
+      assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
+             "Bit extraction didn't work?");
     }
 
     // Build the new ADD / SUB.

diff  --git a/llvm/test/CodeGen/MIR/ARM/thumb2-sub-sp-t3.mir b/llvm/test/CodeGen/MIR/ARM/thumb2-sub-sp-t3.mir
new file mode 100644
index 000000000000..d793bfc21f45
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/ARM/thumb2-sub-sp-t3.mir
@@ -0,0 +1,88 @@
+--- |
+  ; RUN: llc --run-pass=prologepilog -o - %s | FileCheck %s
+  ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_register $r7
+  ; CHECK-NEXT: $sp = frame-setup t2SUBri12 killed $sp, 4008, 14, $noreg
+
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv7-none-none-eabi"
+  define void @foo() #0 {
+  entry:
+    %v = alloca [4000 x i8], align 1
+    %s = alloca i8*, align 4
+    %0 = bitcast [4000 x i8]* %v to i8*
+    store i8* %0, i8** %s, align 4
+    %1 = load i8*, i8** %s, align 4
+    call void @bar(i8* %1)
+    ret void
+  }
+  declare void @bar(i8*) #1
+  ; Function Attrs: nounwind
+  declare void @llvm.stackprotector(i8*, i8**) #2
+
+  attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv7-a,+d32,+dsp,+fp64,+fpregs,+neon,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv7-a,+d32,+dsp,+fp64,+fpregs,+neon,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #2 = { nounwind }
+
+  !llvm.module.flags = !{!0, !1}
+  !llvm.ident = !{!2}
+
+  !0 = !{i32 1, !"wchar_size", i32 4}
+  !1 = !{i32 1, !"min_enum_size", i32 4}
+  !2 = !{!"clang version 10.0.0 (git at github.com:llvm/llvm-project.git ee219345881bdf2c144d40731f055e7b36bc8bce)"}
+
+...
+---
+name:            foo
+alignment:       2
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:       []
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    true
+  hasCalls:        true
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  4004
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:
+  - { id: 0, name: v, type: default, offset: 0, size: 4000, alignment: 1,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      local-offset: -4000, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+  - { id: 1, name: s, type: default, offset: 0, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      local-offset: -4004, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+callSites:       []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    renamable $r0 = t2ADDri %stack.0.v, 0, 14, $noreg, $noreg
+    t2STRi12 killed renamable $r0, %stack.1.s, 0, 14, $noreg :: (store 4 into %ir.s)
+    renamable $r0 = t2LDRi12 %stack.1.s, 0, 14, $noreg :: (dereferenceable load 4 from %ir.s)
+    ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp
+    tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit-def $sp
+    ADJCALLSTACKUP 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp
+    tBX_RET 14, $noreg
+
+...

diff  --git a/llvm/test/CodeGen/Thumb2/large-call.ll b/llvm/test/CodeGen/Thumb2/large-call.ll
index f6a5a60ba3c2..9ec622ef6fbb 100644
--- a/llvm/test/CodeGen/Thumb2/large-call.ll
+++ b/llvm/test/CodeGen/Thumb2/large-call.ll
@@ -9,7 +9,7 @@ target triple = "thumbv7-apple-ios0.0.0"
 ; CHECK: main
 ; CHECK: vmov.f64
 ; Adjust SP for the large call
-; CHECK: sub sp,
+; CHECK: subw sp, sp, #3720
 ; Store to call frame + #8
 ; CHECK: vstr{{.*\[}}sp, #8]
 ; Don't clobber that store until the call.

diff  --git a/llvm/test/CodeGen/Thumb2/mve-stacksplot.mir b/llvm/test/CodeGen/Thumb2/mve-stacksplot.mir
index dee020aa0e71..ff1229d93ed0 100644
--- a/llvm/test/CodeGen/Thumb2/mve-stacksplot.mir
+++ b/llvm/test/CodeGen/Thumb2/mve-stacksplot.mir
@@ -118,8 +118,7 @@ body: |
     ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r6, -28
     ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r5, -32
     ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r4, -36
-    ; CHECK-NEXT: $sp = frame-setup t2SUBri killed $sp, 1216, 14, $noreg, $noreg
-    ; CHECK-NEXT: $sp = frame-setup tSUBspi $sp, 1, 14, $noreg
+    ; CHECK-NEXT: $sp = frame-setup t2SUBri12 killed $sp, 1220, 14, $noreg
     ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 1256
     ; CHECK-NEXT: $r0 = IMPLICIT_DEF
     ; CHECK-NEXT: $r1 = IMPLICIT_DEF