[llvm] r365749 - [ARM][LowOverheadLoops] Correct offset checking

Sam Parker via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 11 02:56:16 PDT 2019


Author: sam_parker
Date: Thu Jul 11 02:56:15 2019
New Revision: 365749

URL: http://llvm.org/viewvc/llvm-project?rev=365749&view=rev
Log:
[ARM][LowOverheadLoops] Correct offset checking
    
This patch addresses a couple of problems:
1) The maximum supported offset of LE is -4094.
2) The offset of WLS also needs to be checked, this uses a
   maximum positive offset of 4094.
    
The use of BasicBlockUtils has been changed because the block offsets
weren't being initialised, but the isBBInRange checks both positive
and negative offsets.
    
ARMISelLowering has been tweaked because the test case presented
another pattern that we weren't supporting.

Added:
    llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/end-positive-offset.mir
    llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/while-negative-offset.mir
Modified:
    llvm/trunk/lib/Target/ARM/ARMBasicBlockInfo.h
    llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
    llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp
    llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/size-limit.mir

Modified: llvm/trunk/lib/Target/ARM/ARMBasicBlockInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBasicBlockInfo.h?rev=365749&r1=365748&r2=365749&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMBasicBlockInfo.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMBasicBlockInfo.h Thu Jul 11 02:56:15 2019
@@ -132,6 +132,10 @@ public:
 
   unsigned getOffsetOf(MachineInstr *MI) const;
 
+  unsigned getOffsetOf(MachineBasicBlock *MBB) const {
+    return BBInfo[MBB->getNumber()].Offset;
+  }
+
   void adjustBBOffsetsAfter(MachineBasicBlock *MBB);
 
   void adjustBBSize(MachineBasicBlock *MBB, int Size) {

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=365749&r1=365748&r2=365749&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Thu Jul 11 02:56:15 2019
@@ -12989,22 +12989,26 @@ static SDValue PerformHWLoopCombine(SDNo
                                     const ARMSubtarget *ST) {
   // Look for (brcond (xor test.set.loop.iterations, -1)
   SDValue CC = N->getOperand(1);
+  unsigned Opc = CC->getOpcode();
+  SDValue Int;
 
-  if (CC->getOpcode() != ISD::XOR && CC->getOpcode() != ISD::SETCC)
-    return SDValue();
+  if ((Opc == ISD::XOR || Opc == ISD::SETCC) &&
+      (CC->getOperand(0)->getOpcode() == ISD::INTRINSIC_W_CHAIN)) {
+
+    assert((isa<ConstantSDNode>(CC->getOperand(1)) &&
+            cast<ConstantSDNode>(CC->getOperand(1))->isOne()) &&
+            "Expected to compare against 1");
 
-  if (CC->getOperand(0)->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+    Int = CC->getOperand(0);
+  } else if (CC->getOpcode() == ISD::INTRINSIC_W_CHAIN)
+    Int = CC;
+  else 
     return SDValue();
 
-  SDValue Int = CC->getOperand(0);
   unsigned IntOp = cast<ConstantSDNode>(Int.getOperand(1))->getZExtValue();
   if (IntOp != Intrinsic::test_set_loop_iterations)
     return SDValue();
 
-  assert((isa<ConstantSDNode>(CC->getOperand(1)) &&
-          cast<ConstantSDNode>(CC->getOperand(1))->isOne()) &&
-          "Expected to compare against 1");
-
   SDLoc dl(Int);
   SDValue Chain = N->getOperand(0);
   SDValue Elements = Int.getOperand(2);

Modified: llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp?rev=365749&r1=365748&r2=365749&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp Thu Jul 11 02:56:15 2019
@@ -91,6 +91,7 @@ bool ARMLowOverheadLoops::runOnMachineFu
     MF.getSubtarget().getInstrInfo());
   BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(MF));
   BBUtils->computeAllBlockSizes();
+  BBUtils->adjustBBOffsetsAfter(&MF.front());
 
   bool Changed = false;
   for (auto ML : MLI) {
@@ -200,9 +201,18 @@ bool ARMLowOverheadLoops::ProcessLoop(Ma
       End->getOperand(1).getMBB() != ML->getHeader())
     report_fatal_error("Expected LoopEnd to target Loop Header");
 
-  // The LE instructions has 12-bits for the label offset.
-  if (!BBUtils->isBBInRange(End, ML->getHeader(), 4096)) {
-    LLVM_DEBUG(dbgs() << "ARM Loops: Too large for a low-overhead loop!\n");
+  // The WLS and LE instructions have 12-bits for the label offset. WLS
+  // requires a positive offset, while LE uses negative.
+  if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML->getHeader()) ||
+      !BBUtils->isBBInRange(End, ML->getHeader(), 4094)) {
+    LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
+    Revert = true;
+  }
+  if (Start->getOpcode() == ARM::t2WhileLoopStart &&
+      (BBUtils->getOffsetOf(Start) >
+       BBUtils->getOffsetOf(Start->getOperand(1).getMBB()) ||
+       !BBUtils->isBBInRange(Start, Start->getOperand(1).getMBB(), 4094))) {
+    LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
     Revert = true;
   }
 

Added: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/end-positive-offset.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/end-positive-offset.mir?rev=365749&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/end-positive-offset.mir (added)
+++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/end-positive-offset.mir Thu Jul 11 02:56:15 2019
@@ -0,0 +1,224 @@
+# RUN: llc -run-pass=arm-low-overhead-loops %s -verify-machineinstrs -o - | FileCheck %s
+
+# CHECK-NOT: DoLoopStart
+# CHECK-NOT: DLS
+# CHECK: bb.2.for.body:
+# CHECK:   t2CMPri $lr, 0, 14, $cpsr, implicit-def $cpsr
+# CHECK:   t2Bcc %bb.4, 1, $cpsr
+# CHECK:   tB %bb.3, 14, $noreg
+# CHECK: bb.3.for.cond.cleanup:
+# CHECK: bb.4.for.header:
+
+--- |
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv8.1m.main-unknown-unknown"
+  
+  define void @size_limit(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) #0 {
+  entry:
+    call void @llvm.set.loop.iterations.i32(i32 %N)
+    br label %for.body.preheader
+  
+  for.body.preheader:                               ; preds = %entry
+    %scevgep = getelementptr i32, i32* %a, i32 -1
+    %scevgep4 = getelementptr i32, i32* %c, i32 -1
+    %scevgep8 = getelementptr i32, i32* %b, i32 -1
+    br label %for.header
+  
+  for.body:                                         ; preds = %for.header
+    %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1
+    %ld1 = load i32, i32* %scevgep11, align 4
+    %scevgep7 = getelementptr i32, i32* %lsr.iv5, i32 1
+    %ld2 = load i32, i32* %scevgep7, align 4
+    %mul = mul nsw i32 %ld2, %ld1
+    %scevgep3 = getelementptr i32, i32* %lsr.iv1, i32 1
+    store i32 %mul, i32* %scevgep3, align 4
+    %scevgep2 = getelementptr i32, i32* %lsr.iv1, i32 1
+    %scevgep6 = getelementptr i32, i32* %lsr.iv5, i32 1
+    %scevgep10 = getelementptr i32, i32* %lsr.iv9, i32 1
+    %count.next = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %count, i32 1)
+    %cmp = icmp ne i32 %count.next, 0
+    br i1 %cmp, label %for.cond.cleanup, label %for.header
+  
+  for.cond.cleanup:                                 ; preds = %for.body
+    ret void
+  
+  for.header:                                       ; preds = %for.body, %for.body.preheader
+    %lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ]
+    %lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
+    %lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
+    %count = phi i32 [ %N, %for.body.preheader ], [ %count.next, %for.body ]
+    br label %for.body
+  }
+  
+  ; Function Attrs: nounwind
+  declare i32 @llvm.arm.space(i32, i32) #1
+  
+  ; Function Attrs: noduplicate nounwind
+  declare void @llvm.set.loop.iterations.i32(i32) #2
+  
+  ; Function Attrs: noduplicate nounwind
+  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2
+  
+  ; Function Attrs: nounwind
+  declare void @llvm.stackprotector(i8*, i8**) #3
+  
+  attributes #0 = { "target-features"="+lob" }
+  attributes #1 = { nounwind "target-features"="+lob" }
+  attributes #2 = { noduplicate nounwind "target-features"="+lob" }
+  attributes #3 = { nounwind }
+
+...
+---
+name:            size_limit
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: false
+hasWinCFI:       false
+registers:       []
+liveins:         
+  - { reg: '$r0', virtual-reg: '' }
+  - { reg: '$r1', virtual-reg: '' }
+  - { reg: '$r2', virtual-reg: '' }
+  - { reg: '$r3', virtual-reg: '' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       56
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           
+  - { id: 0, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 2, name: '', type: spill-slot, offset: -20, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 3, name: '', type: spill-slot, offset: -24, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 4, name: '', type: spill-slot, offset: -28, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 5, name: '', type: spill-slot, offset: -32, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 6, name: '', type: spill-slot, offset: -36, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 7, name: '', type: spill-slot, offset: -40, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 8, name: '', type: spill-slot, offset: -44, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 9, name: '', type: spill-slot, offset: -48, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 10, name: '', type: spill-slot, offset: -52, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 11, name: '', type: spill-slot, offset: -56, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 12, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 13, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+callSites:       []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x80000000)
+  
+    frame-setup tPUSH 14, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp
+    frame-setup CFI_INSTRUCTION def_cfa_offset 8
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r4, -8
+    $sp = frame-setup tSUBspi $sp, 12, 14, $noreg
+    frame-setup CFI_INSTRUCTION def_cfa_offset 56
+    t2DoLoopStart renamable $r3
+    tSTRspi killed $r3, $sp, 11, 14, $noreg :: (store 4 into %stack.0)
+    tSTRspi killed $r2, $sp, 10, 14, $noreg :: (store 4 into %stack.1)
+    tSTRspi killed $r1, $sp, 9, 14, $noreg :: (store 4 into %stack.2)
+    tSTRspi killed $r0, $sp, 8, 14, $noreg :: (store 4 into %stack.3)
+    tB %bb.1, 14, $noreg
+  
+  bb.1.for.body.preheader:
+    successors: %bb.4(0x80000000)
+  
+    $r0 = tLDRspi $sp, 8, 14, $noreg :: (load 4 from %stack.3)
+    renamable $r1, dead $cpsr = tSUBi3 killed renamable $r0, 4, 14, $noreg
+    $r2 = tLDRspi $sp, 10, 14, $noreg :: (load 4 from %stack.1)
+    renamable $r3, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg
+    $r12 = t2LDRi12 $sp, 36, 14, $noreg :: (load 4 from %stack.2)
+    renamable $lr = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg
+    $r4 = tLDRspi $sp, 11, 14, $noreg :: (load 4 from %stack.0)
+    t2STRi12 killed $lr, $sp, 28, 14, $noreg :: (store 4 into %stack.4)
+    tSTRspi killed $r3, $sp, 6, 14, $noreg :: (store 4 into %stack.5)
+    tSTRspi killed $r1, $sp, 5, 14, $noreg :: (store 4 into %stack.6)
+    tSTRspi killed $r4, $sp, 4, 14, $noreg :: (store 4 into %stack.7)
+    tB %bb.4, 14, $noreg
+  
+  bb.2.for.body:
+    successors: %bb.3(0x40000000), %bb.4(0x40000000)
+  
+    $r0 = tLDRspi $sp, 3, 14, $noreg :: (load 4 from %stack.8)
+    renamable $r1, renamable $r0 = t2LDR_PRE renamable $r0, 4, 14, $noreg :: (load 4 from %ir.scevgep11)
+    $r2 = tLDRspi $sp, 2, 14, $noreg :: (load 4 from %stack.9)
+    renamable $r3, renamable $r2 = t2LDR_PRE renamable $r2, 4, 14, $noreg :: (load 4 from %ir.scevgep7)
+    renamable $r1, dead $cpsr = nsw tMUL killed renamable $r3, killed renamable $r1, 14, $noreg
+    $r3 = tLDRspi $sp, 1, 14, $noreg :: (load 4 from %stack.10)
+    early-clobber renamable $r3 = t2STR_PRE killed renamable $r1, renamable $r3, 4, 14, $noreg :: (store 4 into %ir.scevgep3)
+    $r1 = tLDRspi $sp, 0, 14, $noreg :: (load 4 from %stack.11)
+    $lr = tMOVr killed $r1, 14, $noreg
+    renamable $lr = t2LoopDec killed renamable $lr, 1
+    $r12 = tMOVr $lr, 14, $noreg
+    tSTRspi killed $r0, $sp, 7, 14, $noreg :: (store 4 into %stack.4)
+    tSTRspi killed $r2, $sp, 6, 14, $noreg :: (store 4 into %stack.5)
+    tSTRspi killed $r3, $sp, 5, 14, $noreg :: (store 4 into %stack.6)
+    t2STRi12 killed $r12, $sp, 16, 14, $noreg :: (store 4 into %stack.7)
+    t2LoopEnd killed renamable $lr, %bb.4
+    tB %bb.3, 14, $noreg
+  
+  bb.3.for.cond.cleanup:
+    $sp = tADDspi $sp, 12, 14, $noreg
+    tPOP_RET 14, $noreg, def $r4, def $pc
+  
+  bb.4.for.header:
+    successors: %bb.2(0x80000000)
+  
+    $r0 = tLDRspi $sp, 4, 14, $noreg :: (load 4 from %stack.7)
+    $r1 = tLDRspi $sp, 5, 14, $noreg :: (load 4 from %stack.6)
+    $r2 = tLDRspi $sp, 6, 14, $noreg :: (load 4 from %stack.5)
+    $r3 = tLDRspi $sp, 7, 14, $noreg :: (load 4 from %stack.4)
+    tSTRspi killed $r0, $sp, 0, 14, $noreg :: (store 4 into %stack.11)
+    tSTRspi killed $r1, $sp, 1, 14, $noreg :: (store 4 into %stack.10)
+    tSTRspi killed $r2, $sp, 2, 14, $noreg :: (store 4 into %stack.9)
+    tSTRspi killed $r3, $sp, 3, 14, $noreg :: (store 4 into %stack.8)
+    tB %bb.2, 14, $noreg
+
+...

Modified: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/size-limit.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/size-limit.mir?rev=365749&r1=365748&r2=365749&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/size-limit.mir (original)
+++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/size-limit.mir Thu Jul 11 02:56:15 2019
@@ -29,7 +29,7 @@
     %lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
     %lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
     %0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.body ]
-    %size = call i32 @llvm.arm.space(i32 4072, i32 undef)
+    %size = call i32 @llvm.arm.space(i32 4070, i32 undef)
     %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1
     %1 = load i32, i32* %scevgep11, align 4, !tbaa !3
     %scevgep7 = getelementptr i32, i32* %lsr.iv5, i32 1
@@ -140,7 +140,7 @@ body:             |
   bb.1.for.body:
     successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   
-    dead renamable $r3 = SPACE 4072, undef renamable $r0
+    dead renamable $r3 = SPACE 4070, undef renamable $r0
     renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep11, !tbaa !3)
     renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14, $noreg :: (load 4 from %ir.scevgep7, !tbaa !3)
     renamable $r3 = nsw t2MUL killed renamable $r3, killed renamable $r12, 14, $noreg

Added: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/while-negative-offset.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/while-negative-offset.mir?rev=365749&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/while-negative-offset.mir (added)
+++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/while-negative-offset.mir Thu Jul 11 02:56:15 2019
@@ -0,0 +1,199 @@
+# RUN: llc -run-pass=arm-low-overhead-loops -o - -verify-machineinstrs %s | FileCheck %s
+# CHECK-NOT: WLS
+# CHECK-NOT: WhileLoopStart
+
+--- |
+  source_filename = "while-size-limit.ll"
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv8.1m.main"
+  
+  define void @size_limit(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) #0 {
+  entry:
+    br label %while
+  
+  for.cond.cleanup:                                 ; preds = %while, %for.body
+    ret void
+  
+  for.body.preheader:                               ; preds = %while
+    %scevgep = getelementptr i32, i32* %a, i32 -1
+    %scevgep4 = getelementptr i32, i32* %c, i32 -1
+    %scevgep8 = getelementptr i32, i32* %b, i32 -1
+    br label %for.body
+  
+  for.body:                                         ; preds = %for.body, %for.body.preheader
+    %lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ]
+    %lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
+    %lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
+    %0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.body ]
+    %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1
+    %1 = load i32, i32* %scevgep11, align 4
+    %scevgep7 = getelementptr i32, i32* %lsr.iv5, i32 1
+    %2 = load i32, i32* %scevgep7, align 4
+    %mul = mul nsw i32 %2, %1
+    %scevgep3 = getelementptr i32, i32* %lsr.iv1, i32 1
+    store i32 %mul, i32* %scevgep3, align 4
+    %scevgep2 = getelementptr i32, i32* %lsr.iv1, i32 1
+    %scevgep6 = getelementptr i32, i32* %lsr.iv5, i32 1
+    %scevgep10 = getelementptr i32, i32* %lsr.iv9, i32 1
+    %3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
+    %4 = icmp ne i32 %3, 0
+    br i1 %4, label %for.body, label %for.cond.cleanup
+  
+  while:                                            ; preds = %entry
+    %cmp8 = call i1 @llvm.test.set.loop.iterations.i32(i32 %N)
+    br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup
+  }
+  
+  ; Function Attrs: nounwind
+  declare i32 @llvm.arm.space(i32, i32) #1
+  
+  ; Function Attrs: noduplicate nounwind
+  declare i1 @llvm.test.set.loop.iterations.i32(i32) #2
+  
+  ; Function Attrs: noduplicate nounwind
+  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2
+  
+  ; Function Attrs: nounwind
+  declare void @llvm.stackprotector(i8*, i8**) #3
+  
+  attributes #0 = { "target-features"="+lob" }
+  attributes #1 = { nounwind "target-features"="+lob" }
+  attributes #2 = { noduplicate nounwind "target-features"="+lob" }
+  attributes #3 = { nounwind }
+
+...
+---
+name:            size_limit
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: false
+hasWinCFI:       false
+registers:       []
+liveins:         
+  - { reg: '$r0', virtual-reg: '' }
+  - { reg: '$r1', virtual-reg: '' }
+  - { reg: '$r2', virtual-reg: '' }
+  - { reg: '$r3', virtual-reg: '' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       40
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           
+  - { id: 0, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 2, name: '', type: spill-slot, offset: -20, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 3, name: '', type: spill-slot, offset: -24, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 4, name: '', type: spill-slot, offset: -28, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 5, name: '', type: spill-slot, offset: -32, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 6, name: '', type: spill-slot, offset: -36, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 7, name: '', type: spill-slot, offset: -40, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 8, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 9, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+callSites:       []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.4(0x80000000)
+  
+    frame-setup tPUSH 14, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp
+    frame-setup CFI_INSTRUCTION def_cfa_offset 8
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r4, -8
+    $sp = frame-setup tSUBspi $sp, 8, 14, $noreg
+    frame-setup CFI_INSTRUCTION def_cfa_offset 40
+    tSTRspi killed $r3, $sp, 7, 14, $noreg :: (store 4 into %stack.0)
+    tSTRspi killed $r2, $sp, 6, 14, $noreg :: (store 4 into %stack.1)
+    tSTRspi killed $r1, $sp, 5, 14, $noreg :: (store 4 into %stack.2)
+    tSTRspi killed $r0, $sp, 4, 14, $noreg :: (store 4 into %stack.3)
+    tB %bb.4, 14, $noreg
+  
+  bb.1.for.cond.cleanup:
+    $sp = tADDspi $sp, 8, 14, $noreg
+    tPOP_RET 14, $noreg, def $r4, def $pc
+  
+  bb.2.for.body.preheader:
+    successors: %bb.3(0x80000000)
+  
+    $r0 = tLDRspi $sp, 4, 14, $noreg :: (load 4 from %stack.3)
+    renamable $r1, dead $cpsr = tSUBi3 killed renamable $r0, 4, 14, $noreg
+    $r2 = tLDRspi $sp, 6, 14, $noreg :: (load 4 from %stack.1)
+    renamable $r3, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg
+    $r12 = t2LDRi12 $sp, 20, 14, $noreg :: (load 4 from %stack.2)
+    renamable $lr = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg
+    $r4 = tLDRspi $sp, 7, 14, $noreg :: (load 4 from %stack.0)
+    t2STRi12 killed $lr, $sp, 12, 14, $noreg :: (store 4 into %stack.4)
+    tSTRspi killed $r3, $sp, 2, 14, $noreg :: (store 4 into %stack.5)
+    tSTRspi killed $r1, $sp, 1, 14, $noreg :: (store 4 into %stack.6)
+    tSTRspi killed $r4, $sp, 0, 14, $noreg :: (store 4 into %stack.7)
+    tB %bb.3, 14, $noreg
+  
+  bb.3.for.body:
+    successors: %bb.3(0x40000000), %bb.1(0x40000000)
+  
+    $r0 = tLDRspi $sp, 0, 14, $noreg :: (load 4 from %stack.7)
+    $r1 = tLDRspi $sp, 1, 14, $noreg :: (load 4 from %stack.6)
+    $r2 = tLDRspi $sp, 2, 14, $noreg :: (load 4 from %stack.5)
+    $r3 = tLDRspi $sp, 3, 14, $noreg :: (load 4 from %stack.4)
+    renamable $r12, renamable $r3 = t2LDR_PRE renamable $r3, 4, 14, $noreg :: (load 4 from %ir.scevgep11)
+    renamable $lr, renamable $r2 = t2LDR_PRE renamable $r2, 4, 14, $noreg :: (load 4 from %ir.scevgep7)
+    renamable $r12 = nsw t2MUL killed renamable $lr, killed renamable $r12, 14, $noreg
+    early-clobber renamable $r1 = t2STR_PRE killed renamable $r12, renamable $r1, 4, 14, $noreg :: (store 4 into %ir.scevgep3)
+    $lr = tMOVr killed $r0, 14, $noreg
+    renamable $lr = t2LoopDec killed renamable $lr, 1
+    $r0 = tMOVr $lr, 14, $noreg
+    tSTRspi killed $r0, $sp, 0, 14, $noreg :: (store 4 into %stack.7)
+    tSTRspi killed $r1, $sp, 1, 14, $noreg :: (store 4 into %stack.6)
+    tSTRspi killed $r2, $sp, 2, 14, $noreg :: (store 4 into %stack.5)
+    tSTRspi killed $r3, $sp, 3, 14, $noreg :: (store 4 into %stack.4)
+    t2LoopEnd killed renamable $lr, %bb.3
+    tB %bb.1, 14, $noreg
+  
+  bb.4.while:
+    successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  
+    $r0 = tLDRspi $sp, 7, 14, $noreg :: (load 4 from %stack.0)
+    t2WhileLoopStart killed renamable $r0, %bb.2
+    tB %bb.1, 14, $noreg
+
+...




More information about the llvm-commits mailing list