[PATCH] D100094: [ARM] This patch adds some simplifications to ARMBlockPlacement Pass.
Dave Green via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 26 15:42:35 PDT 2021
dmgreen added a comment.
When comparing moving Pred before Exit, vs Exit after Pred, is either expected to be more or less efficient? Or are they both just a bit inefficient?
Please add this test case:
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -run-pass arm-block-placement %s -o - | FileCheck %s
--- |
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main-arm-none-eabi"
define i32 @c(i32 %d, i32 %e, i32 %f) #0 {
entry:
%g = alloca i16, align 2
%0 = bitcast i16* %g to i8*
call void @llvm.lifetime.start.p0i8(i64 2, i8* nonnull %0) #5
%cond = icmp eq i32 %e, 8
%cmp = icmp eq i32 %d, 0
%or.cond = and i1 %cmp, %cond
br i1 %or.cond, label %while.cond.preheader, label %sw.epilog
while.cond.preheader: ; preds = %entry
%tobool.not16 = icmp eq i32 %f, 0
%1 = call { i32, i1 } @llvm.test.start.loop.iterations.i32(i32 %f)
%2 = extractvalue { i32, i1 } %1, 1
%3 = extractvalue { i32, i1 } %1, 0
br i1 %2, label %while.body.preheader, label %sw.epilog
while.body.preheader: ; preds = %while.cond.preheader
br label %while.body
while.cond1.preheader: ; preds = %while.body
%4 = icmp eq i32 %f, 0
%5 = call { i32, i1 } @llvm.test.start.loop.iterations.i32(i32 %f)
%6 = extractvalue { i32, i1 } %5, 1
%7 = extractvalue { i32, i1 } %5, 0
br i1 %6, label %while.body3.preheader, label %sw.epilog
while.body3.preheader: ; preds = %while.cond1.preheader
br label %while.body3
while.body: ; preds = %while.body.preheader, %while.body
%h.017 = phi i32 [ %10, %while.body ], [ undef, %while.body.preheader ]
%8 = phi i32 [ %3, %while.body.preheader ], [ %12, %while.body ]
%9 = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.v8i16(i32 %h.017, i32 0, i32 1)
%10 = extractvalue { <8 x i16>, i32 } %9, 1
%11 = extractvalue { <8 x i16>, i32 } %9, 0
call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* nonnull %g, <8 x i16> %11, <8 x i16> undef, i32 16, i32 1)
%12 = call i32 @llvm.loop.decrement.reg.i32(i32 %8, i32 1)
%13 = icmp ne i32 %12, 0
br i1 %13, label %while.body, label %while.cond1.preheader
while.body3: ; preds = %while.body3.preheader, %while.body3
%.pn = phi { <8 x i16>, i32 } [ %15, %while.body3 ], [ %9, %while.body3.preheader ]
%14 = phi i32 [ %7, %while.body3.preheader ], [ %17, %while.body3 ]
%h.121 = extractvalue { <8 x i16>, i32 } %.pn, 1
%15 = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.v8i16(i32 %h.121, i32 0, i32 2)
%16 = extractvalue { <8 x i16>, i32 } %15, 0
call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* nonnull %g, <8 x i16> %16, <8 x i16> undef, i32 16, i32 1)
%17 = call i32 @llvm.loop.decrement.reg.i32(i32 %14, i32 1)
%18 = icmp ne i32 %17, 0
br i1 %18, label %while.body3, label %sw.epilog
sw.epilog: ; preds = %while.body3, %while.cond1.preheader, %while.cond.preheader, %entry
%19 = bitcast i16* %g to i8*
call void @llvm.lifetime.end.p0i8(i64 2, i8* nonnull %19) #5
ret i32 undef
}
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
declare { <8 x i16>, i32 } @llvm.arm.mve.viwdup.v8i16(i32, i32, i32) #2
declare void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16*, <8 x i16>, <8 x i16>, i32, i32) #3
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
declare { i32, i1 } @llvm.test.start.loop.iterations.i32(i32) #4
declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #4
attributes #0 = { "target-features"="+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode" }
...
---
name: c
alignment: 2
tracksRegLiveness: true
liveins:
- { reg: '$r0', virtual-reg: '' }
- { reg: '$r1', virtual-reg: '' }
- { reg: '$r2', virtual-reg: '' }
stack:
- { id: 0, name: g, type: default, offset: -10, size: 2, alignment: 2,
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
local-offset: -2, debug-info-variable: '', debug-info-expression: '',
debug-info-location: '' }
- { id: 1, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 2, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
body: |
; CHECK-LABEL: name: c
; CHECK: bb.0.entry:
; CHECK: successors: %bb.1(0x55555555), %bb.2(0x2aaaaaab)
; CHECK: liveins: $r0, $r1, $r2, $r7, $lr
; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8
; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8
; CHECK: $sp = frame-setup tSUBspi $sp, 1, 14 /* CC::al */, $noreg
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 12
; CHECK: tCMPi8 killed renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2IT 0, 8, implicit-def $itstate
; CHECK: tCMPi8 killed renamable $r1, 8, 0 /* CC::eq */, killed $cpsr, implicit-def $cpsr, implicit killed $itstate
; CHECK: t2Bcc %bb.2, 0 /* CC::eq */, killed $cpsr
; CHECK: tB %bb.1, 14 /* CC::al */, $noreg
; CHECK: bb.5.while.cond1.preheader:
; CHECK: successors: %bb.6(0x40000000), %bb.1(0x40000000)
; CHECK: liveins: $r0, $r1, $r2
; CHECK: renamable $lr = t2WhileLoopStartLR killed renamable $r2, %bb.1, implicit-def dead $cpsr
; CHECK: t2B %bb.6, 14 /* CC::al */, $noreg
; CHECK: bb.2.while.cond.preheader:
; CHECK: successors: %bb.3(0x40000000), %bb.1(0x40000000)
; CHECK: liveins: $r2
; CHECK: renamable $lr = t2WhileLoopStartLR renamable $r2, %bb.1, implicit-def dead $cpsr
; CHECK: t2B %bb.3, 14 /* CC::al */, $noreg
; CHECK: bb.1.sw.epilog:
; CHECK: $sp = frame-destroy tADDspi $sp, 1, 14 /* CC::al */, $noreg
; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, implicit undef $r0
; CHECK: bb.3.while.body.preheader:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: liveins: $lr, $r2
; CHECK: renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
; CHECK: renamable $r1 = t2ADDri $sp, 2, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r0 = IMPLICIT_DEF
; CHECK: bb.4.while.body (align 4):
; CHECK: successors: %bb.4(0x7c000000), %bb.5(0x04000000)
; CHECK: liveins: $lr, $r0, $r1, $r2, $r3
; CHECK: renamable $q0, renamable $r0 = MVE_VIWDUPu16 killed renamable $r0, renamable $r3, 1, 0, $noreg, undef renamable $q0
; CHECK: MVE_VSTRH16_rq undef renamable $q0, renamable $r1, killed renamable $q0, 0, $noreg
; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.4, implicit-def dead $cpsr
; CHECK: t2B %bb.5, 14 /* CC::al */, $noreg
; CHECK: bb.6.while.body3.preheader:
; CHECK: successors: %bb.7(0x80000000)
; CHECK: liveins: $lr, $r0, $r1
; CHECK: renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
; CHECK: bb.7.while.body3 (align 4):
; CHECK: successors: %bb.7(0x7c000000), %bb.1(0x04000000)
; CHECK: liveins: $lr, $r0, $r1, $r3
; CHECK: renamable $q0, renamable $r0 = MVE_VIWDUPu16 killed renamable $r0, renamable $r3, 2, 0, $noreg, undef renamable $q0
; CHECK: MVE_VSTRH16_rq undef renamable $q0, renamable $r1, killed renamable $q0, 0, $noreg
; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.7, implicit-def dead $cpsr
; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg
bb.0.entry:
successors: %bb.7(0x80000000), %bb.1(0x40000000)
liveins: $r0, $r1, $r2, $r7, $lr
frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8
frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r7, -8
$sp = frame-setup tSUBspi $sp, 1, 14 /* CC::al */, $noreg
frame-setup CFI_INSTRUCTION def_cfa_offset 12
tCMPi8 killed renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
t2IT 0, 8, implicit-def $itstate
tCMPi8 killed renamable $r1, 8, 0 /* CC::eq */, killed $cpsr, implicit-def $cpsr, implicit killed $itstate
t2Bcc %bb.1, 0 /* CC::eq */, killed $cpsr
bb.7.sw.epilog:
$sp = frame-destroy tADDspi $sp, 1, 14 /* CC::al */, $noreg
frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, implicit undef $r0
bb.1.while.cond.preheader:
successors: %bb.3(0x40000000), %bb.7(0x40000000)
liveins: $r2
renamable $lr = t2WhileLoopStartLR renamable $r2, %bb.7, implicit-def dead $cpsr
t2B %bb.3, 14 /* CC::al */, $noreg
bb.3.while.body.preheader:
successors: %bb.4(0x80000000)
liveins: $lr, $r2
renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
renamable $r1 = t2ADDri $sp, 2, 14 /* CC::al */, $noreg, $noreg
renamable $r0 = IMPLICIT_DEF
bb.4.while.body (align 4):
successors: %bb.4(0x7c000000), %bb.2(0x04000000)
liveins: $lr, $r0, $r1, $r2, $r3
renamable $q0, renamable $r0 = MVE_VIWDUPu16 killed renamable $r0, renamable $r3, 1, 0, $noreg, undef renamable $q0
MVE_VSTRH16_rq undef renamable $q0, renamable $r1, killed renamable $q0, 0, $noreg
renamable $lr = t2LoopEndDec killed renamable $lr, %bb.4, implicit-def dead $cpsr
t2B %bb.2, 14 /* CC::al */, $noreg
bb.2.while.cond1.preheader:
successors: %bb.5(0x40000000), %bb.7(0x40000000)
liveins: $r0, $r1, $r2
renamable $lr = t2WhileLoopStartLR killed renamable $r2, %bb.7, implicit-def dead $cpsr
t2B %bb.5, 14 /* CC::al */, $noreg
bb.5.while.body3.preheader:
successors: %bb.6(0x80000000)
liveins: $lr, $r0, $r1
renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
bb.6.while.body3 (align 4):
successors: %bb.6(0x7c000000), %bb.7(0x04000000)
liveins: $lr, $r0, $r1, $r3
renamable $q0, renamable $r0 = MVE_VIWDUPu16 killed renamable $r0, renamable $r3, 2, 0, $noreg, undef renamable $q0
MVE_VSTRH16_rq undef renamable $q0, renamable $r1, killed renamable $q0, 0, $noreg
renamable $lr = t2LoopEndDec killed renamable $lr, %bb.6, implicit-def dead $cpsr
t2B %bb.7, 14 /* CC::al */, $noreg
...
================
Comment at: llvm/lib/Target/ARM/ARMBlockPlacement.cpp:228
- BBUtils->adjustBBOffsetsAfter(After);
+ BBUtils->adjustBBOffsetsAfter(BeforePrev);
}
----------------
I think it's worth just renumbering all nodes and recalculating all offsets.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D100094/new/
https://reviews.llvm.org/D100094
More information about the llvm-commits
mailing list