[llvm] [MachineBlockPlacement][X86] Use max of MDAlign and TLIAlign to align Loops. (PR #71026)
Freddy Ye via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 1 23:36:50 PDT 2023
https://github.com/FreddyLeaf updated https://github.com/llvm/llvm-project/pull/71026
>From 82e7581e957503b6dac934e039648efbe67052b8 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Thu, 2 Nov 2023 13:59:47 +0800
Subject: [PATCH] [MachineBlockPlacement][X86] Use max of MDAlign and TLIAlign
to align Loops.
This patch added backend consumption on a new loop metadata:
!1 = !{!"llvm.loop.align", i32 64}
which is generated from clang's new loop attribute:
[[clang::code_align()]]
clang patch: #70762
---
llvm/include/llvm/CodeGen/MachineLoopInfo.h | 11 +-
llvm/lib/CodeGen/MachineBlockPlacement.cpp | 30 +++++-
llvm/lib/CodeGen/MachineLoopInfo.cpp | 50 +++++++++-
llvm/test/CodeGen/X86/code-align-loops.ll | 105 ++++++++++++++++++++
4 files changed, 190 insertions(+), 6 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/code-align-loops.ll
diff --git a/llvm/include/llvm/CodeGen/MachineLoopInfo.h b/llvm/include/llvm/CodeGen/MachineLoopInfo.h
index cf8d1f17bde7687..f2ab162705830a4 100644
--- a/llvm/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineLoopInfo.h
@@ -31,6 +31,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/Support/GenericLoopInfo.h"
@@ -57,7 +58,7 @@ class MachineLoop : public LoopBase<MachineBasicBlock, MachineLoop> {
/// loop test. This will return the latch block if it's one of the exiting
/// blocks. Otherwise, return the exiting block. Return 'null' when
/// multiple exiting blocks are present.
- MachineBasicBlock *findLoopControlBlock();
+ MachineBasicBlock *findLoopControlBlock() const;
/// Return the debug location of the start of this loop.
/// This looks for a BB terminating instruction with a known debug
@@ -66,6 +67,14 @@ class MachineLoop : public LoopBase<MachineBasicBlock, MachineLoop> {
/// it returns an unknown location.
DebugLoc getStartLoc() const;
+ /// \brief Find the llvm.loop metadata for this loop.
+ /// If each branch to the header of this loop contains the same llvm.loop
+ /// metadata, then this metadata node is returned. Otherwise, if any
+ /// latch instruction does not contain the llvm.loop metadata or
+ /// multiple latch instructions contain different llvm.loop metadata nodes,
+ /// then null is returned.
+ MDNode *getLoopID() const;
+
/// Returns true if the instruction is loop invariant.
/// I.e., all virtual register operands are defined outside of the loop,
/// physical registers aren't accessed explicitly, and there are no side
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index f783eeca047433a..11b35d6f0a9632e 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -2919,8 +2919,30 @@ void MachineBlockPlacement::alignBlocks() {
if (!L)
continue;
- const Align Align = TLI->getPrefLoopAlignment(L);
- if (Align == 1)
+ const Align TLIAlign = TLI->getPrefLoopAlignment(L);
+ unsigned MDAlign = 1;
+ MDNode *LoopID = L->getLoopID();
+ if (LoopID) {
+ for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
+ MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+ if (MD == nullptr)
+ continue;
+ MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+ if (S == nullptr)
+ continue;
+ if (S->getString() == "llvm.loop.align") {
+ assert(MD->getNumOperands() == 2 &&
+ "per-loop align metadata should have two operands.");
+ MDAlign =
+ mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
+ assert(MDAlign >= 1 && "per-loop align value must be positive.");
+ }
+ }
+ }
+
+ // Use max of the TLIAlign and MDAlign
+ const Align LoopAlign = std::max(TLIAlign, Align(MDAlign));
+ if (LoopAlign == 1)
continue; // Don't care about loop alignment.
// If the block is cold relative to the function entry don't waste space
@@ -2959,7 +2981,7 @@ void MachineBlockPlacement::alignBlocks() {
// Force alignment if all the predecessors are jumps. We already checked
// that the block isn't cold above.
if (!LayoutPred->isSuccessor(ChainBB)) {
- ChainBB->setAlignment(Align);
+ ChainBB->setAlignment(LoopAlign);
DetermineMaxAlignmentPadding();
continue;
}
@@ -2972,7 +2994,7 @@ void MachineBlockPlacement::alignBlocks() {
MBPI->getEdgeProbability(LayoutPred, ChainBB);
BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
if (LayoutEdgeFreq <= (Freq * ColdProb)) {
- ChainBB->setAlignment(Align);
+ ChainBB->setAlignment(LoopAlign);
DetermineMaxAlignmentPadding();
}
}
diff --git a/llvm/lib/CodeGen/MachineLoopInfo.cpp b/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 37a0ff3d71c87e8..75875142ac49d68 100644
--- a/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -88,7 +88,7 @@ MachineBasicBlock *MachineLoop::getBottomBlock() {
return BotMBB;
}
-MachineBasicBlock *MachineLoop::findLoopControlBlock() {
+MachineBasicBlock *MachineLoop::findLoopControlBlock() const {
if (MachineBasicBlock *Latch = getLoopLatch()) {
if (isLoopExiting(Latch))
return Latch;
@@ -151,6 +151,54 @@ MachineLoopInfo::findLoopPreheader(MachineLoop *L, bool SpeculativePreheader,
return Preheader;
}
+MDNode *MachineLoop::getLoopID() const {
+ MDNode *LoopID = nullptr;
+ if (auto *MBB = findLoopControlBlock()) {
+ // If there is a single latch block, then the metadata
+ // node is attached to its terminating instruction.
+ const auto *BB = MBB->getBasicBlock();
+ if (!BB)
+ return nullptr;
+ if (const auto *TI = BB->getTerminator())
+ LoopID = TI->getMetadata(LLVMContext::MD_loop);
+ } else if (auto *MBB = getHeader()) {
+ // There seem to be multiple latch blocks, so we have to
+ // visit all predecessors of the loop header and check
+ // their terminating instructions for the metadata.
+ if (const auto *H = MBB->getBasicBlock()) {
+ // Walk over all blocks in the loop.
+ for (auto *MBB : this->blocks()) {
+ const auto *BB = MBB->getBasicBlock();
+ if (!BB)
+ return nullptr;
+ const auto *TI = BB->getTerminator();
+ if (!TI)
+ return nullptr;
+ MDNode *MD = nullptr;
+ // Check if this terminating instruction jumps to the loop header.
+ for (const auto *S : successors(TI)) {
+ if (S == H) {
+ // This is a jump to the header - gather the metadata from it.
+ MD = TI->getMetadata(LLVMContext::MD_loop);
+ break;
+ }
+ }
+ if (!MD)
+ return nullptr;
+ if (!LoopID)
+ LoopID = MD;
+ else if (MD != LoopID)
+ return nullptr;
+ }
+ }
+ }
+ if (LoopID &&
+ (LoopID->getNumOperands() == 0 || LoopID->getOperand(0) != LoopID)) {
+ LoopID = nullptr;
+ }
+ return LoopID;
+}
+
bool MachineLoop::isLoopInvariant(MachineInstr &I) const {
MachineFunction *MF = I.getParent()->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
diff --git a/llvm/test/CodeGen/X86/code-align-loops.ll b/llvm/test/CodeGen/X86/code-align-loops.ll
new file mode 100644
index 000000000000000..3fc810ca55d3a00
--- /dev/null
+++ b/llvm/test/CodeGen/X86/code-align-loops.ll
@@ -0,0 +1,105 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu | FileCheck %s -check-prefixes=CHECK,ALIGN
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -align-loops=32 | FileCheck %s -check-prefixes=CHECK,ALIGN32
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -align-loops=256 | FileCheck %s -check-prefixes=CHECK,ALIGN256
+
+; This test is to check if .p2align can be correctly generated by considerring
+; 1. -align-loops=N from llc option
+; 2. loop metadata node !{!"llvm.loop.align", i32 64}
+; The test IR is generated from below simple C file:
+; $ clang -S -emit-llvm loop.c
+; $ cat loop.c
+; void bar();
+; void var();
+; void foo(int a) {
+; for (int i = 0; i < a; ++i)
+; bar();
+; for (int i = 0; i < a; ++i)
+; var();
+; }
+; The difference between test1 and test2 is test2 only set one loop metadata node for the second loop.
+
+; CHECK-LABEL: test1:
+; ALIGN: .p2align 6, 0x90
+; ALIGN-NEXT: .LBB0_2: # %for.body
+; ALIGN: .p2align 9, 0x90
+; ALIGN-NEXT: .LBB0_3: # %for.body
+
+; ALIGN32: .p2align 6, 0x90
+; ALIGN32-NEXT: .LBB0_2: # %for.body
+; ALIGN32: .p2align 9, 0x90
+; ALIGN32-NEXT: .LBB0_3: # %for.body
+
+; ALIGN256: .p2align 8, 0x90
+; ALIGN256-NEXT: .LBB0_2: # %for.body
+; ALIGN256: .p2align 9, 0x90
+; ALIGN256-NEXT: .LBB0_3: # %for.body
+
+define void @test1(i32 %a) nounwind {
+entry:
+ %cmp12 = icmp sgt i32 %a, 0
+ br i1 %cmp12, label %for.body, label %for.cond.cleanup4
+
+for.body: ; preds = %entry, %for.body
+ %i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ tail call void (...) @bar()
+ %inc = add nuw nsw i32 %i.013, 1
+ %exitcond.not = icmp eq i32 %inc, %a
+ br i1 %exitcond.not, label %for.body5, label %for.body, !llvm.loop !0
+
+for.cond.cleanup4: ; preds = %for.body5, %entry
+ ret void
+
+for.body5: ; preds = %for.body, %for.body5
+ %i1.015 = phi i32 [ %inc7, %for.body5 ], [ 0, %for.body ]
+ tail call void (...) @var()
+ %inc7 = add nuw nsw i32 %i1.015, 1
+ %exitcond16.not = icmp eq i32 %inc7, %a
+ br i1 %exitcond16.not, label %for.cond.cleanup4, label %for.body5, !llvm.loop !2
+}
+
+; CHECK-LABEL: test2:
+; ALIGN: .p2align 4, 0x90
+; ALIGN-NEXT: .LBB1_2: # %for.body
+; ALIGN: .p2align 9, 0x90
+; ALIGN-NEXT: .LBB1_3: # %for.body
+
+; ALIGN32: .p2align 5, 0x90
+; ALIGN32-NEXT: .LBB1_2: # %for.body
+; ALIGN32: .p2align 9, 0x90
+; ALIGN32-NEXT: .LBB1_3: # %for.body
+
+; ALIGN256: .p2align 8, 0x90
+; ALIGN256-NEXT: .LBB1_2: # %for.body
+; ALIGN256: .p2align 9, 0x90
+; ALIGN256-NEXT: .LBB1_3: # %for.body
+define void @test2(i32 %a) nounwind {
+entry:
+ %cmp12 = icmp sgt i32 %a, 0
+ br i1 %cmp12, label %for.body, label %for.cond.cleanup4
+
+for.body: ; preds = %entry, %for.body
+ %i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ tail call void (...) @bar()
+ %inc = add nuw nsw i32 %i.013, 1
+ %exitcond.not = icmp eq i32 %inc, %a
+ br i1 %exitcond.not, label %for.body5, label %for.body
+
+for.cond.cleanup4: ; preds = %for.body5, %entry
+ ret void
+
+for.body5: ; preds = %for.body, %for.body5
+ %i1.015 = phi i32 [ %inc7, %for.body5 ], [ 0, %for.body ]
+ tail call void (...) @var()
+ %inc7 = add nuw nsw i32 %i1.015, 1
+ %exitcond16.not = icmp eq i32 %inc7, %a
+ br i1 %exitcond16.not, label %for.cond.cleanup4, label %for.body5, !llvm.loop !2
+}
+
+declare void @bar(...)
+declare void @var(...)
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.align", i32 64}
+!2 = distinct !{!2, !3}
+!3 = !{!"llvm.loop.align", i32 512}
More information about the llvm-commits
mailing list