[llvm-branch-commits] [llvm] 1454724 - [ARM] Align blocks that are not fallthough targets
David Green via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat Jan 16 14:24:27 PST 2021
Author: David Green
Date: 2021-01-16T22:19:35Z
New Revision: 145472421535c71a9ea60af7e5d012ab69dc85ff
URL: https://github.com/llvm/llvm-project/commit/145472421535c71a9ea60af7e5d012ab69dc85ff
DIFF: https://github.com/llvm/llvm-project/commit/145472421535c71a9ea60af7e5d012ab69dc85ff.diff
LOG: [ARM] Align blocks that are not fallthough targets
If the previous block in a function does not fallthough, adding nop's to
align it will never be executed. This means we can freely (except for
codesize) align more branches. This happens in constantislandspass (as
it cannot happen later) and only happens at aggressive optimization
levels as it does increase codesize.
Differential Revision: https://reviews.llvm.org/D94394
Added:
Modified:
llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index e89eb0fb4502..630490f6f914 100644
--- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -338,6 +338,32 @@ LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {
}
#endif
+// Align blocks where the previous block does not fall through. This may add
+// extra NOP's but they will not be executed. It uses the PrefLoopAlignment as a
+// measure of how much to align, and only runs at CodeGenOpt::Aggressive.
+static bool AlignBlocks(MachineFunction *MF) {
+ if (MF->getTarget().getOptLevel() != CodeGenOpt::Aggressive ||
+ MF->getFunction().hasOptSize())
+ return false;
+
+ auto *TLI = MF->getSubtarget().getTargetLowering();
+ const Align Alignment = TLI->getPrefLoopAlignment();
+ if (Alignment < 4)
+ return false;
+
+ bool Changed = false;
+ bool PrevCanFallthough = true;
+ for (auto &MBB : *MF) {
+ if (!PrevCanFallthough) {
+ Changed = true;
+ MBB.setAlignment(Alignment);
+ }
+ PrevCanFallthough = MBB.canFallThrough();
+ }
+
+ return Changed;
+}
+
bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
MCP = mf.getConstantPool();
@@ -380,6 +406,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
MF->RenumberBlocks();
}
+ // Align any non-fallthrough blocks
+ MadeChange |= AlignBlocks(MF);
+
// Perform the initial placement of the constant pool entries. To start with,
// we put them all at the end of the function.
std::vector<MachineInstr*> CPEMIs;
diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
index b949934e51df..ea7a83b96b46 100644
--- a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
+++ b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
@@ -26,6 +26,7 @@ define i32 @add_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture rea
; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-LE-NEXT: add.w r0, r12, r1
; CHECK-LE-NEXT: pop {r4, pc}
+; CHECK-LE-NEXT: .p2align 2
; CHECK-LE-NEXT: .LBB0_4:
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: movs r1, #0
@@ -58,6 +59,7 @@ define i32 @add_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture rea
; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-BE-NEXT: add.w r0, r12, r1
; CHECK-BE-NEXT: pop {r4, r5, r6, pc}
+; CHECK-BE-NEXT: .p2align 2
; CHECK-BE-NEXT: .LBB0_4:
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: movs r1, #0
@@ -129,6 +131,7 @@ define i32 @mul_bottom_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapt
; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-LE-NEXT: add.w r0, r12, r1
; CHECK-LE-NEXT: pop {r4, r5, r7, pc}
+; CHECK-LE-NEXT: .p2align 2
; CHECK-LE-NEXT: .LBB1_4:
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: movs r1, #0
@@ -161,6 +164,7 @@ define i32 @mul_bottom_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapt
; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-BE-NEXT: add.w r0, r12, r1
; CHECK-BE-NEXT: pop {r4, r5, r6, pc}
+; CHECK-BE-NEXT: .p2align 2
; CHECK-BE-NEXT: .LBB1_4:
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: movs r1, #0
@@ -232,6 +236,7 @@ define i32 @mul_top_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture
; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-LE-NEXT: add.w r0, r12, r1
; CHECK-LE-NEXT: pop {r4, pc}
+; CHECK-LE-NEXT: .p2align 2
; CHECK-LE-NEXT: .LBB2_4:
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: movs r1, #0
@@ -264,6 +269,7 @@ define i32 @mul_top_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture
; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-BE-NEXT: add.w r0, r12, r1
; CHECK-BE-NEXT: pop {r4, r5, r6, pc}
+; CHECK-BE-NEXT: .p2align 2
; CHECK-BE-NEXT: .LBB2_4:
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: movs r1, #0
@@ -335,6 +341,7 @@ define i32 @and_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture rea
; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-LE-NEXT: add.w r0, r12, r1
; CHECK-LE-NEXT: pop {r4, pc}
+; CHECK-LE-NEXT: .p2align 2
; CHECK-LE-NEXT: .LBB3_4:
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: movs r1, #0
@@ -367,6 +374,7 @@ define i32 @and_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture rea
; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-BE-NEXT: add.w r0, r12, r1
; CHECK-BE-NEXT: pop {r4, r5, r6, pc}
+; CHECK-BE-NEXT: .p2align 2
; CHECK-BE-NEXT: .LBB3_4:
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: movs r1, #0
@@ -440,6 +448,7 @@ define i32 @multi_uses(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture r
; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-LE-NEXT: add.w r0, lr, r12
; CHECK-LE-NEXT: pop {r4, pc}
+; CHECK-LE-NEXT: .p2align 2
; CHECK-LE-NEXT: .LBB4_4:
; CHECK-LE-NEXT: mov.w lr, #0
; CHECK-LE-NEXT: mov.w r12, #0
@@ -474,6 +483,7 @@ define i32 @multi_uses(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture r
; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-BE-NEXT: add.w r0, r12, lr
; CHECK-BE-NEXT: pop {r4, r5, r6, pc}
+; CHECK-BE-NEXT: .p2align 2
; CHECK-BE-NEXT: .LBB4_4:
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: mov.w lr, #0
diff --git a/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll b/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll
index 90bf4df53f30..043268d769d1 100644
--- a/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll
+++ b/llvm/test/CodeGen/Thumb2/aligned-nonfallthrough.ll
@@ -57,6 +57,7 @@ define i64 @loopif(i32* nocapture readonly %x, i32 %y, i32 %n) {
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-NEXT: mov r1, r3
; CHECK-NEXT: pop {r7, pc}
+; CHECK-NEXT: .p2align 2
; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: movs r3, #0
More information about the llvm-branch-commits
mailing list