[llvm] [MachineBlockPlacement][X86] Use max of MDAlign and TLIAlign to align Loops. (PR #71026)

via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 1 23:31:23 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Freddy Ye (FreddyLeaf)

<details>
<summary>Changes</summary>

This patch added backend consumption on a new loop metadata:
!1 = !{!"llvm.loop.align", i32 64}
which is generated from clang's new loop attribute:
[[clang::code_align()]]
clang patch: #<!-- -->70762


---
Full diff: https://github.com/llvm/llvm-project/pull/71026.diff


4 Files Affected:

- (modified) llvm/include/llvm/CodeGen/MachineLoopInfo.h (+10-1) 
- (modified) llvm/lib/CodeGen/MachineBlockPlacement.cpp (+26-4) 
- (modified) llvm/lib/CodeGen/MachineLoopInfo.cpp (+49-1) 
- (added) llvm/test/CodeGen/X86/code-align-loops.ll (+105) 


``````````diff
diff --git a/llvm/include/llvm/CodeGen/MachineLoopInfo.h b/llvm/include/llvm/CodeGen/MachineLoopInfo.h
index cf8d1f17bde7687..f2ab162705830a4 100644
--- a/llvm/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineLoopInfo.h
@@ -31,6 +31,7 @@
 
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/Support/GenericLoopInfo.h"
 
@@ -57,7 +58,7 @@ class MachineLoop : public LoopBase<MachineBasicBlock, MachineLoop> {
   /// loop test. This will return the latch block if it's one of the exiting
   /// blocks. Otherwise, return the exiting block. Return 'null' when
   /// multiple exiting blocks are present.
-  MachineBasicBlock *findLoopControlBlock();
+  MachineBasicBlock *findLoopControlBlock() const;
 
   /// Return the debug location of the start of this loop.
   /// This looks for a BB terminating instruction with a known debug
@@ -66,6 +67,14 @@ class MachineLoop : public LoopBase<MachineBasicBlock, MachineLoop> {
   /// it returns an unknown location.
   DebugLoc getStartLoc() const;
 
+  /// \brief Find the llvm.loop metadata for this loop.
+  /// If each branch to the header of this loop contains the same llvm.loop
+  /// metadata, then this metadata node is returned. Otherwise, if any
+  /// latch instruction does not contain the llvm.loop metadata or
+  /// multiple latch instructions contain different llvm.loop metadata nodes,
+  /// then null is returned.
+  MDNode *getLoopID() const;
+
   /// Returns true if the instruction is loop invariant.
   /// I.e., all virtual register operands are defined outside of the loop,
   /// physical registers aren't accessed explicitly, and there are no side
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index f783eeca047433a..11b35d6f0a9632e 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -2919,8 +2919,30 @@ void MachineBlockPlacement::alignBlocks() {
     if (!L)
       continue;
 
-    const Align Align = TLI->getPrefLoopAlignment(L);
-    if (Align == 1)
+    const Align TLIAlign = TLI->getPrefLoopAlignment(L);
+    unsigned MDAlign = 1;
+    MDNode *LoopID = L->getLoopID();
+    if (LoopID) {
+      for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
+        MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+        if (MD == nullptr)
+          continue;
+        MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+        if (S == nullptr)
+          continue;
+        if (S->getString() == "llvm.loop.align") {
+          assert(MD->getNumOperands() == 2 &&
+                 "per-loop align metadata should have two operands.");
+          MDAlign =
+              mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
+          assert(MDAlign >= 1 && "per-loop align value must be positive.");
+        }
+      }
+    }
+
+    // Use max of the TLIAlign and MDAlign
+    const Align LoopAlign = std::max(TLIAlign, Align(MDAlign));
+    if (LoopAlign == 1)
       continue; // Don't care about loop alignment.
 
     // If the block is cold relative to the function entry don't waste space
@@ -2959,7 +2981,7 @@ void MachineBlockPlacement::alignBlocks() {
     // Force alignment if all the predecessors are jumps. We already checked
     // that the block isn't cold above.
     if (!LayoutPred->isSuccessor(ChainBB)) {
-      ChainBB->setAlignment(Align);
+      ChainBB->setAlignment(LoopAlign);
       DetermineMaxAlignmentPadding();
       continue;
     }
@@ -2972,7 +2994,7 @@ void MachineBlockPlacement::alignBlocks() {
         MBPI->getEdgeProbability(LayoutPred, ChainBB);
     BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
     if (LayoutEdgeFreq <= (Freq * ColdProb)) {
-      ChainBB->setAlignment(Align);
+      ChainBB->setAlignment(LoopAlign);
       DetermineMaxAlignmentPadding();
     }
   }
diff --git a/llvm/lib/CodeGen/MachineLoopInfo.cpp b/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 37a0ff3d71c87e8..75875142ac49d68 100644
--- a/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -88,7 +88,7 @@ MachineBasicBlock *MachineLoop::getBottomBlock() {
   return BotMBB;
 }
 
-MachineBasicBlock *MachineLoop::findLoopControlBlock() {
+MachineBasicBlock *MachineLoop::findLoopControlBlock() const {
   if (MachineBasicBlock *Latch = getLoopLatch()) {
     if (isLoopExiting(Latch))
       return Latch;
@@ -151,6 +151,54 @@ MachineLoopInfo::findLoopPreheader(MachineLoop *L, bool SpeculativePreheader,
   return Preheader;
 }
 
+MDNode *MachineLoop::getLoopID() const {
+  MDNode *LoopID = nullptr;
+  if (auto *MBB = findLoopControlBlock()) {
+    // If there is a single latch block, then the metadata
+    // node is attached to its terminating instruction.
+    const auto *BB = MBB->getBasicBlock();
+    if (!BB)
+      return nullptr;
+    if (const auto *TI = BB->getTerminator())
+      LoopID = TI->getMetadata(LLVMContext::MD_loop);
+  } else if (auto *MBB = getHeader()) {
+    // There seem to be multiple latch blocks, so we have to
+    // visit all predecessors of the loop header and check
+    // their terminating instructions for the metadata.
+    if (const auto *H = MBB->getBasicBlock()) {
+      // Walk over all blocks in the loop.
+      for (auto *MBB : this->blocks()) {
+        const auto *BB = MBB->getBasicBlock();
+        if (!BB)
+          return nullptr;
+        const auto *TI = BB->getTerminator();
+        if (!TI)
+          return nullptr;
+        MDNode *MD = nullptr;
+        // Check if this terminating instruction jumps to the loop header.
+        for (const auto *S : successors(TI)) {
+          if (S == H) {
+            // This is a jump to the header - gather the metadata from it.
+            MD = TI->getMetadata(LLVMContext::MD_loop);
+            break;
+          }
+        }
+        if (!MD)
+          return nullptr;
+        if (!LoopID)
+          LoopID = MD;
+        else if (MD != LoopID)
+          return nullptr;
+      }
+    }
+  }
+  if (LoopID &&
+      (LoopID->getNumOperands() == 0 || LoopID->getOperand(0) != LoopID)) {
+    LoopID = nullptr;
+  }
+  return LoopID;
+}
+
 bool MachineLoop::isLoopInvariant(MachineInstr &I) const {
   MachineFunction *MF = I.getParent()->getParent();
   MachineRegisterInfo *MRI = &MF->getRegInfo();
diff --git a/llvm/test/CodeGen/X86/code-align-loops.ll b/llvm/test/CodeGen/X86/code-align-loops.ll
new file mode 100644
index 000000000000000..ce5522f9740e7eb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/code-align-loops.ll
@@ -0,0 +1,105 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu | FileCheck %s -check-prefixes=CHECK,ALIGN
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -align-loops=32 | FileCheck %s -check-prefixes=CHECK,ALIGN32
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -align-loops=256 | FileCheck %s -check-prefixes=CHECK,ALIGN256
+
+; This test is to check if .p2align can be correctly generated by considerring
+; 1. -align-loops=N from llc option
+; 2. loop metadata node !{!"llvm.loop.intel.align", i32 64}
+; The test IR is generated from below simple C file:
+; $ clang -S -emit-llvm loop.c
+; $ cat loop.c
+; void bar();
+; void var();
+; void foo(int a) {
+;   for (int i = 0; i < a; ++i)
+;     bar();
+;   for (int i = 0; i < a; ++i)
+;     var();
+; }
+; The difference between test1 and test2 is test2 only set one loop metadata node for the second loop.
+
+; CHECK-LABEL: test1:
+; ALIGN: .p2align 6, 0x90
+; ALIGN-NEXT: .LBB0_2: # %for.body
+; ALIGN: .p2align 9, 0x90
+; ALIGN-NEXT: .LBB0_3: # %for.body
+
+; ALIGN32: .p2align 6, 0x90
+; ALIGN32-NEXT: .LBB0_2: # %for.body
+; ALIGN32: .p2align 9, 0x90
+; ALIGN32-NEXT: .LBB0_3: # %for.body
+
+; ALIGN256: .p2align 8, 0x90
+; ALIGN256-NEXT: .LBB0_2: # %for.body
+; ALIGN256: .p2align 9, 0x90
+; ALIGN256-NEXT: .LBB0_3: # %for.body
+
+define void @test1(i32 %a) nounwind {
+entry:
+  %cmp12 = icmp sgt i32 %a, 0
+  br i1 %cmp12, label %for.body, label %for.cond.cleanup4
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  tail call void (...) @bar()
+  %inc = add nuw nsw i32 %i.013, 1
+  %exitcond.not = icmp eq i32 %inc, %a
+  br i1 %exitcond.not, label %for.body5, label %for.body, !llvm.loop !0
+
+for.cond.cleanup4:                                ; preds = %for.body5, %entry
+  ret void
+
+for.body5:                                        ; preds = %for.body, %for.body5
+  %i1.015 = phi i32 [ %inc7, %for.body5 ], [ 0, %for.body ]
+  tail call void (...) @var()
+  %inc7 = add nuw nsw i32 %i1.015, 1
+  %exitcond16.not = icmp eq i32 %inc7, %a
+  br i1 %exitcond16.not, label %for.cond.cleanup4, label %for.body5, !llvm.loop !2
+}
+
+; CHECK-LABEL: test2:
+; ALIGN: .p2align 4, 0x90
+; ALIGN-NEXT: .LBB1_2: # %for.body
+; ALIGN: .p2align 9, 0x90
+; ALIGN-NEXT: .LBB1_3: # %for.body
+
+; ALIGN32: .p2align 5, 0x90
+; ALIGN32-NEXT: .LBB1_2: # %for.body
+; ALIGN32: .p2align 9, 0x90
+; ALIGN32-NEXT: .LBB1_3: # %for.body
+
+; ALIGN256: .p2align 8, 0x90
+; ALIGN256-NEXT: .LBB1_2: # %for.body
+; ALIGN256: .p2align 9, 0x90
+; ALIGN256-NEXT: .LBB1_3: # %for.body
+define void @test2(i32 %a) nounwind {
+entry:
+  %cmp12 = icmp sgt i32 %a, 0
+  br i1 %cmp12, label %for.body, label %for.cond.cleanup4
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  tail call void (...) @bar()
+  %inc = add nuw nsw i32 %i.013, 1
+  %exitcond.not = icmp eq i32 %inc, %a
+  br i1 %exitcond.not, label %for.body5, label %for.body
+
+for.cond.cleanup4:                                ; preds = %for.body5, %entry
+  ret void
+
+for.body5:                                        ; preds = %for.body, %for.body5
+  %i1.015 = phi i32 [ %inc7, %for.body5 ], [ 0, %for.body ]
+  tail call void (...) @var()
+  %inc7 = add nuw nsw i32 %i1.015, 1
+  %exitcond16.not = icmp eq i32 %inc7, %a
+  br i1 %exitcond16.not, label %for.cond.cleanup4, label %for.body5, !llvm.loop !2
+}
+
+declare void @bar(...)
+declare void @var(...)
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.align", i32 64}
+!2 = distinct !{!2, !3}
+!3 = !{!"llvm.loop.align", i32 512}

``````````

</details>


https://github.com/llvm/llvm-project/pull/71026


More information about the llvm-commits mailing list