[llvm] 8211cfb - [ARM] Don't shrink STM if it would cause an unknown base register store

John Brawn via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 22 06:51:16 PDT 2020


Author: John Brawn
Date: 2020-04-22T14:50:42+01:00
New Revision: 8211cfb7c8bd1bedd5b3ed936d5b8f784c6bfd21

URL: https://github.com/llvm/llvm-project/commit/8211cfb7c8bd1bedd5b3ed936d5b8f784c6bfd21
DIFF: https://github.com/llvm/llvm-project/commit/8211cfb7c8bd1bedd5b3ed936d5b8f784c6bfd21.diff

LOG: [ARM] Don't shrink STM if it would cause an unknown base register store

If a 16-bit thumb STM with writeback stores the base register but it isn't the
first register in the list, then an unknown value is stored. The load/store
optimizer knows this and generates a 32-bit STM without writeback instead, but
thumb2 size reduction converts it into a 16-bit STM. Fix this by having thumb2
size reduction notice such STMs and leave them as they are.

Differential Revision: https://reviews.llvm.org/D78493

Added: 
    

Modified: 
    llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
    llvm/test/CodeGen/Thumb/stm-deprecated.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index d3e22b75c3ee..ae661594bdc9 100644
--- a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -516,13 +516,23 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
     isLdStMul = true;
     break;
   }
-  case ARM::t2STMIA:
-    // If the base register is killed, we don't care what its value is after the
-    // instruction, so we can use an updating STMIA.
+  case ARM::t2STMIA: {
+    // t2STMIA is reduced to tSTMIA_UPD which has writeback. We can only do this
+    // if the base register is killed, as then it doesn't matter what its value
+    // is after the instruction.
     if (!MI->getOperand(0).isKill())
       return false;
 
+    // If the base register is in the register list and isn't the lowest
+    // numbered register (i.e. it's in operand 4 onwards) then with writeback
+    // the stored value is unknown, so we can't convert to tSTMIA_UPD.
+    Register BaseReg = MI->getOperand(0).getReg();
+    for (unsigned i = 4; i < MI->getNumOperands(); ++i)
+      if (MI->getOperand(i).getReg() == BaseReg)
+        return false;
+
     break;
+  }
   case ARM::t2LDMIA_RET: {
     Register BaseReg = MI->getOperand(1).getReg();
     if (BaseReg != ARM::SP)

diff  --git a/llvm/test/CodeGen/Thumb/stm-deprecated.ll b/llvm/test/CodeGen/Thumb/stm-deprecated.ll
index ffe2c0afd921..a3fd7562b9cf 100644
--- a/llvm/test/CodeGen/Thumb/stm-deprecated.ll
+++ b/llvm/test/CodeGen/Thumb/stm-deprecated.ll
@@ -1,19 +1,81 @@
-; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs %s -o - | FileCheck %s
-; RUN: llc -mtriple=thumbv5e-linux-gnueabi -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-T1
+; RUN: llc -mtriple=thumbv5e-linux-gnueabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-T1
+; RUN: llc -mtriple=thumbv7m -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-T2
+; RUN: llc -mtriple=thumbv7a -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-T2
 
-%0 = type { %0*, %0*, i32 }
+%struct1 = type { %struct1*, %struct1*, i32 }
+%struct2 = type { i32, i32, i32* }
 
- at x1 = external global %0, align 4
- at x2 = external global %0, align 4
+ at x1 = external global %struct1, align 4
+ at x2 = external global %struct1, align 4
 
-; CHECK: str r0, [r1]
-; CHECK-NEXT: str r1, [r1, #4]
+declare void @fn1(i32, i32)
+declare void @fn2(%struct2*)
+
+; CHECK-LABEL: test1:
+; CHECK-T1: str r0, [r1]
+; CHECK-T1-NEXT: str r1, [r1, #4]
+; CHECK-T2: strd r0, r1, [r1]
 ; CHECK-NOT: stm
+define void @test1(i32 %unused, %struct1* %x) {
+  %first = getelementptr inbounds %struct1, %struct1* %x, i32 0, i32 0
+  %second = getelementptr inbounds %struct1, %struct1* %x, i32 0, i32 1
+  store %struct1* @x1, %struct1** %first
+  store %struct1* %x, %struct1** %second
+  ret void
+}
+
+; CHECK-LABEL: test2:
+; CHECK-T1: str r0, [r2]
+; CHECK-T1-NEXT: str r1, [r2, #4]
+; CHECK-T1-NEXT: str r2, [r2, #8]
+; CHECK-T2: stm.w r2, {r0, r1, r2}
+; CHECK-NOT: stm r2!, {r0, r1, r2}
+define i32 @test2(i32 %a, i32 %b, %struct2* %p) {
+entry:
+  %p1 = getelementptr inbounds %struct2, %struct2* %p, i32 0, i32 0
+  %p2 = getelementptr inbounds %struct2, %struct2* %p, i32 0, i32 1
+  %p3 = getelementptr inbounds %struct2, %struct2* %p, i32 0, i32 2
+  store i32 %a, i32* %p1, align 4
+  store i32 %b, i32* %p2, align 4
+  store i32* %p1, i32** %p3, align 4
+  call void @fn1(i32 %a, i32 %b)
+  ret i32 0
+}
+
+; CHECK-LABEL: test3:
+; CHECK-T1: str r0, [r2]
+; CHECK-T1-NEXT: str r1, [r2, #4]
+; CHECK-T1-NEXT: str r2, [r2, #8]
+; CHECK-T2: stm.w r2, {r0, r1, r2}
+; CHECK-NOT: stm r2!, {r0, r1, r2}
+define i32 @test3(i32 %a, i32 %b, %struct2* %p) {
+entry:
+  %p1 = getelementptr inbounds %struct2, %struct2* %p, i32 0, i32 0
+  %p2 = getelementptr inbounds %struct2, %struct2* %p, i32 0, i32 1
+  %p3 = getelementptr inbounds %struct2, %struct2* %p, i32 0, i32 2
+  store i32 %a, i32* %p1, align 4
+  store i32 %b, i32* %p2, align 4
+  store i32* %p1, i32** %p3, align 4
+  %p4 = getelementptr inbounds %struct2, %struct2* %p, i32 1
+  call void @fn2(%struct2* %p4)
+  ret i32 0
+}
 
-define void @foo(i32 %unused, %0* %x) {
-  %first = getelementptr inbounds %0, %0* %x, i32 0, i32 0
-  %second = getelementptr inbounds %0, %0* %x, i32 0, i32 1
-  store %0* @x1, %0** %first
-  store %0* %x, %0** %second
-  unreachable
+; FIXME: We should be using stm in both thumb1 and thumb2
+; CHECK-LABEL: test4:
+; CHECK-T1: str r0, [r0]
+; CHECK-T1-NEXT: str r1, [r0, #4]
+; CHECK-T1-NEXT: str r2, [r0, #8]
+; CHECK-T2: stm r0!, {r0, r1, r2}
+define i32 @test4(%struct1* %p, %struct1* %q, i32 %a) {
+entry:
+  %p1 = getelementptr inbounds %struct1, %struct1* %p, i32 0, i32 0
+  %p2 = getelementptr inbounds %struct1, %struct1* %p, i32 0, i32 1
+  %p3 = getelementptr inbounds %struct1, %struct1* %p, i32 0, i32 2
+  store %struct1* %p, %struct1** %p1, align 4
+  store %struct1* %q, %struct1** %p2, align 4
+  store i32 %a, i32* %p3, align 4
+  call void @fn1(i32 %a, i32 %a)
+  ret i32 0
 }


        


More information about the llvm-commits mailing list