[PATCH] D100094: [ARM] This patch adds some simplifications to ARMBlockPlacement Pass.

Dave Green via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 26 15:42:35 PDT 2021


dmgreen added a comment.

When comparing moving Pred before Exit, vs Exit after Pred, is either expected to be more or less efficient? Or are they both just a bit inefficient?

Please add this test case:

  # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
  # RUN: llc -run-pass arm-block-placement %s -o - | FileCheck %s
  
  --- |
    target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
    target triple = "thumbv8.1m.main-arm-none-eabi"
  
    define i32 @c(i32 %d, i32 %e, i32 %f) #0 {
    entry:
      %g = alloca i16, align 2
      %0 = bitcast i16* %g to i8*
      call void @llvm.lifetime.start.p0i8(i64 2, i8* nonnull %0) #5
      %cond = icmp eq i32 %e, 8
      %cmp = icmp eq i32 %d, 0
      %or.cond = and i1 %cmp, %cond
      br i1 %or.cond, label %while.cond.preheader, label %sw.epilog
  
    while.cond.preheader:                             ; preds = %entry
      %tobool.not16 = icmp eq i32 %f, 0
      %1 = call { i32, i1 } @llvm.test.start.loop.iterations.i32(i32 %f)
      %2 = extractvalue { i32, i1 } %1, 1
      %3 = extractvalue { i32, i1 } %1, 0
      br i1 %2, label %while.body.preheader, label %sw.epilog
  
    while.body.preheader:                             ; preds = %while.cond.preheader
      br label %while.body
  
    while.cond1.preheader:                            ; preds = %while.body
      %4 = icmp eq i32 %f, 0
      %5 = call { i32, i1 } @llvm.test.start.loop.iterations.i32(i32 %f)
      %6 = extractvalue { i32, i1 } %5, 1
      %7 = extractvalue { i32, i1 } %5, 0
      br i1 %6, label %while.body3.preheader, label %sw.epilog
  
    while.body3.preheader:                            ; preds = %while.cond1.preheader
      br label %while.body3
  
    while.body:                                       ; preds = %while.body.preheader, %while.body
      %h.017 = phi i32 [ %10, %while.body ], [ undef, %while.body.preheader ]
      %8 = phi i32 [ %3, %while.body.preheader ], [ %12, %while.body ]
      %9 = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.v8i16(i32 %h.017, i32 0, i32 1)
      %10 = extractvalue { <8 x i16>, i32 } %9, 1
      %11 = extractvalue { <8 x i16>, i32 } %9, 0
      call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* nonnull %g, <8 x i16> %11, <8 x i16> undef, i32 16, i32 1)
      %12 = call i32 @llvm.loop.decrement.reg.i32(i32 %8, i32 1)
      %13 = icmp ne i32 %12, 0
      br i1 %13, label %while.body, label %while.cond1.preheader
  
    while.body3:                                      ; preds = %while.body3.preheader, %while.body3
      %.pn = phi { <8 x i16>, i32 } [ %15, %while.body3 ], [ %9, %while.body3.preheader ]
      %14 = phi i32 [ %7, %while.body3.preheader ], [ %17, %while.body3 ]
      %h.121 = extractvalue { <8 x i16>, i32 } %.pn, 1
      %15 = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.v8i16(i32 %h.121, i32 0, i32 2)
      %16 = extractvalue { <8 x i16>, i32 } %15, 0
      call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* nonnull %g, <8 x i16> %16, <8 x i16> undef, i32 16, i32 1)
      %17 = call i32 @llvm.loop.decrement.reg.i32(i32 %14, i32 1)
      %18 = icmp ne i32 %17, 0
      br i1 %18, label %while.body3, label %sw.epilog
  
    sw.epilog:                                        ; preds = %while.body3, %while.cond1.preheader, %while.cond.preheader, %entry
      %19 = bitcast i16* %g to i8*
      call void @llvm.lifetime.end.p0i8(i64 2, i8* nonnull %19) #5
      ret i32 undef
    }
  
    declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
    declare { <8 x i16>, i32 } @llvm.arm.mve.viwdup.v8i16(i32, i32, i32) #2
    declare void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16*, <8 x i16>, <8 x i16>, i32, i32) #3
    declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
    declare { i32, i1 } @llvm.test.start.loop.iterations.i32(i32) #4
    declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #4
  
    attributes #0 = { "target-features"="+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode" }
  
  ...
  ---
  name:            c
  alignment:       2
  tracksRegLiveness: true
  liveins:
    - { reg: '$r0', virtual-reg: '' }
    - { reg: '$r1', virtual-reg: '' }
    - { reg: '$r2', virtual-reg: '' }
  stack:
    - { id: 0, name: g, type: default, offset: -10, size: 2, alignment: 2,
        stack-id: default, callee-saved-register: '', callee-saved-restored: true,
        local-offset: -2, debug-info-variable: '', debug-info-expression: '',
        debug-info-location: '' }
    - { id: 1, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
        stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
        debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
    - { id: 2, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
        stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
        debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
  body:             |
    ; CHECK-LABEL: name: c
    ; CHECK: bb.0.entry:
    ; CHECK:   successors: %bb.1(0x55555555), %bb.2(0x2aaaaaab)
    ; CHECK:   liveins: $r0, $r1, $r2, $r7, $lr
    ; CHECK:   frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
    ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
    ; CHECK:   frame-setup CFI_INSTRUCTION offset $lr, -4
    ; CHECK:   frame-setup CFI_INSTRUCTION offset $r7, -8
    ; CHECK:   $sp = frame-setup tSUBspi $sp, 1, 14 /* CC::al */, $noreg
    ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 12
    ; CHECK:   tCMPi8 killed renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
    ; CHECK:   t2IT 0, 8, implicit-def $itstate
    ; CHECK:   tCMPi8 killed renamable $r1, 8, 0 /* CC::eq */, killed $cpsr, implicit-def $cpsr, implicit killed $itstate
    ; CHECK:   t2Bcc %bb.2, 0 /* CC::eq */, killed $cpsr
    ; CHECK:   tB %bb.1, 14 /* CC::al */, $noreg
    ; CHECK: bb.5.while.cond1.preheader:
    ; CHECK:   successors: %bb.6(0x40000000), %bb.1(0x40000000)
    ; CHECK:   liveins: $r0, $r1, $r2
    ; CHECK:   renamable $lr = t2WhileLoopStartLR killed renamable $r2, %bb.1, implicit-def dead $cpsr
    ; CHECK:   t2B %bb.6, 14 /* CC::al */, $noreg
    ; CHECK: bb.2.while.cond.preheader:
    ; CHECK:   successors: %bb.3(0x40000000), %bb.1(0x40000000)
    ; CHECK:   liveins: $r2
    ; CHECK:   renamable $lr = t2WhileLoopStartLR renamable $r2, %bb.1, implicit-def dead $cpsr
    ; CHECK:   t2B %bb.3, 14 /* CC::al */, $noreg
    ; CHECK: bb.1.sw.epilog:
    ; CHECK:   $sp = frame-destroy tADDspi $sp, 1, 14 /* CC::al */, $noreg
    ; CHECK:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, implicit undef $r0
    ; CHECK: bb.3.while.body.preheader:
    ; CHECK:   successors: %bb.4(0x80000000)
    ; CHECK:   liveins: $lr, $r2
    ; CHECK:   renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
    ; CHECK:   renamable $r1 = t2ADDri $sp, 2, 14 /* CC::al */, $noreg, $noreg
    ; CHECK:   renamable $r0 = IMPLICIT_DEF
    ; CHECK: bb.4.while.body (align 4):
    ; CHECK:   successors: %bb.4(0x7c000000), %bb.5(0x04000000)
    ; CHECK:   liveins: $lr, $r0, $r1, $r2, $r3
    ; CHECK:   renamable $q0, renamable $r0 = MVE_VIWDUPu16 killed renamable $r0, renamable $r3, 1, 0, $noreg, undef renamable $q0
    ; CHECK:   MVE_VSTRH16_rq undef renamable $q0, renamable $r1, killed renamable $q0, 0, $noreg
    ; CHECK:   renamable $lr = t2LoopEndDec killed renamable $lr, %bb.4, implicit-def dead $cpsr
    ; CHECK:   t2B %bb.5, 14 /* CC::al */, $noreg
    ; CHECK: bb.6.while.body3.preheader:
    ; CHECK:   successors: %bb.7(0x80000000)
    ; CHECK:   liveins: $lr, $r0, $r1
    ; CHECK:   renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
    ; CHECK: bb.7.while.body3 (align 4):
    ; CHECK:   successors: %bb.7(0x7c000000), %bb.1(0x04000000)
    ; CHECK:   liveins: $lr, $r0, $r1, $r3
    ; CHECK:   renamable $q0, renamable $r0 = MVE_VIWDUPu16 killed renamable $r0, renamable $r3, 2, 0, $noreg, undef renamable $q0
    ; CHECK:   MVE_VSTRH16_rq undef renamable $q0, renamable $r1, killed renamable $q0, 0, $noreg
    ; CHECK:   renamable $lr = t2LoopEndDec killed renamable $lr, %bb.7, implicit-def dead $cpsr
    ; CHECK:   t2B %bb.1, 14 /* CC::al */, $noreg
  
    bb.0.entry:
      successors: %bb.7(0x80000000), %bb.1(0x40000000)
      liveins: $r0, $r1, $r2, $r7, $lr
  
      frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
      frame-setup CFI_INSTRUCTION def_cfa_offset 8
      frame-setup CFI_INSTRUCTION offset $lr, -4
      frame-setup CFI_INSTRUCTION offset $r7, -8
      $sp = frame-setup tSUBspi $sp, 1, 14 /* CC::al */, $noreg
      frame-setup CFI_INSTRUCTION def_cfa_offset 12
      tCMPi8 killed renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
      t2IT 0, 8, implicit-def $itstate
      tCMPi8 killed renamable $r1, 8, 0 /* CC::eq */, killed $cpsr, implicit-def $cpsr, implicit killed $itstate
      t2Bcc %bb.1, 0 /* CC::eq */, killed $cpsr
  
    bb.7.sw.epilog:
      $sp = frame-destroy tADDspi $sp, 1, 14 /* CC::al */, $noreg
      frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, implicit undef $r0
  
    bb.1.while.cond.preheader:
      successors: %bb.3(0x40000000), %bb.7(0x40000000)
      liveins: $r2
  
      renamable $lr = t2WhileLoopStartLR renamable $r2, %bb.7, implicit-def dead $cpsr
      t2B %bb.3, 14 /* CC::al */, $noreg
  
    bb.3.while.body.preheader:
      successors: %bb.4(0x80000000)
      liveins: $lr, $r2
  
      renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
      renamable $r1 = t2ADDri $sp, 2, 14 /* CC::al */, $noreg, $noreg
      renamable $r0 = IMPLICIT_DEF
  
    bb.4.while.body (align 4):
      successors: %bb.4(0x7c000000), %bb.2(0x04000000)
      liveins: $lr, $r0, $r1, $r2, $r3
  
      renamable $q0, renamable $r0 = MVE_VIWDUPu16 killed renamable $r0, renamable $r3, 1, 0, $noreg, undef renamable $q0
      MVE_VSTRH16_rq undef renamable $q0, renamable $r1, killed renamable $q0, 0, $noreg
      renamable $lr = t2LoopEndDec killed renamable $lr, %bb.4, implicit-def dead $cpsr
      t2B %bb.2, 14 /* CC::al */, $noreg
  
    bb.2.while.cond1.preheader:
      successors: %bb.5(0x40000000), %bb.7(0x40000000)
      liveins: $r0, $r1, $r2
  
      renamable $lr = t2WhileLoopStartLR killed renamable $r2, %bb.7, implicit-def dead $cpsr
      t2B %bb.5, 14 /* CC::al */, $noreg
  
    bb.5.while.body3.preheader:
      successors: %bb.6(0x80000000)
      liveins: $lr, $r0, $r1
  
      renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
  
    bb.6.while.body3 (align 4):
      successors: %bb.6(0x7c000000), %bb.7(0x04000000)
      liveins: $lr, $r0, $r1, $r3
  
      renamable $q0, renamable $r0 = MVE_VIWDUPu16 killed renamable $r0, renamable $r3, 2, 0, $noreg, undef renamable $q0
      MVE_VSTRH16_rq undef renamable $q0, renamable $r1, killed renamable $q0, 0, $noreg
      renamable $lr = t2LoopEndDec killed renamable $lr, %bb.6, implicit-def dead $cpsr
      t2B %bb.7, 14 /* CC::al */, $noreg
  
  ...



================
Comment at: llvm/lib/Target/ARM/ARMBlockPlacement.cpp:228
 
-  BBUtils->adjustBBOffsetsAfter(After);
+  BBUtils->adjustBBOffsetsAfter(BeforePrev);
 }
----------------
I think it's worth just renumbering all nodes and recalculating all offsets.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100094/new/

https://reviews.llvm.org/D100094



More information about the llvm-commits mailing list