[llvm] [X86][NewPM] Port x86-fast-pre-tile-config (PR #174323)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 4 01:56:22 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Aiden Grossman (boomanaiden154)
<details>
<summary>Changes</summary>
Standard porting. Rename the old pass, refactor, add support for the new pass, add test coverage.
---
Full diff: https://github.com/llvm/llvm-project/pull/174323.diff
11 Files Affected:
- (modified) llvm/lib/Target/X86/X86.h (+10-2)
- (modified) llvm/lib/Target/X86/X86FastPreTileConfig.cpp (+47-22)
- (modified) llvm/lib/Target/X86/X86PassRegistry.def (+1-1)
- (modified) llvm/lib/Target/X86/X86TargetMachine.cpp (+2-2)
- (modified) llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir (+2-1)
- (modified) llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir (+2-1)
- (modified) llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir (+2-1)
- (modified) llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir (+2-1)
- (modified) llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir (+2-1)
- (modified) llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir (+2-1)
- (modified) llvm/test/CodeGen/X86/AMX/amx-sink-config-after-calls.mir (+1-1)
``````````diff
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index f81bc5bba9c28..f2f827765c201 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/MachineFunctionAnalysisManager.h"
#include "llvm/IR/Analysis.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/PassInfo.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetMachine.h"
@@ -130,7 +131,14 @@ FunctionPass *createX86DynAllocaExpanderLegacyPass();
FunctionPass *createX86TileConfigPass();
/// Return a pass that preconfig the tile registers before fast reg allocation.
-FunctionPass *createX86FastPreTileConfigPass();
+class X86FastPreTileConfigPass
+ : public PassInfoMixin<X86FastPreTileConfigPass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM);
+};
+
+FunctionPass *createX86FastPreTileConfigLegacyPass();
/// Return a pass that config the tile registers after fast reg allocation.
FunctionPass *createX86FastTileConfigPass();
@@ -305,7 +313,7 @@ void initializeX86DynAllocaExpanderLegacyPass(PassRegistry &);
void initializeX86ExecutionDomainFixPass(PassRegistry &);
void initializeX86ExpandPseudoLegacyPass(PassRegistry &);
void initializeX86FPStackifierLegacyPass(PassRegistry &);
-void initializeX86FastPreTileConfigPass(PassRegistry &);
+void initializeX86FastPreTileConfigLegacyPass(PassRegistry &);
void initializeX86FastTileConfigPass(PassRegistry &);
void initializeX86FixupSetCCPassPass(PassRegistry &);
void initializeX86FlagsCopyLoweringLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/X86/X86FastPreTileConfig.cpp b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp
index 25799f4ac0ea0..9efe335666ac2 100644
--- a/llvm/lib/Target/X86/X86FastPreTileConfig.cpp
+++ b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp
@@ -23,24 +23,32 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionAnalysisManager.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachinePassManager.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/Analysis.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
-#define DEBUG_TYPE "fastpretileconfig"
+#define DEBUG_TYPE "x86-fast-pre-tile-config"
STATISTIC(NumStores, "Number of stores added");
STATISTIC(NumLoads, "Number of loads added");
namespace {
-class X86FastPreTileConfig : public MachineFunctionPass {
+class X86FastPreTileConfigImpl {
+public:
+ X86FastPreTileConfigImpl() : StackSlotForVirtReg(-1) {}
+ bool runOnMachineFunction(MachineFunction &MF);
+
+private:
MachineFunction *MF = nullptr;
const X86Subtarget *ST = nullptr;
const TargetInstrInfo *TII = nullptr;
@@ -74,9 +82,11 @@ class X86FastPreTileConfig : public MachineFunctionPass {
void convertPHI(MachineBasicBlock *MBB, MachineInstr &PHI);
void convertPHIs(MachineBasicBlock &MBB);
bool configBasicBlock(MachineBasicBlock &MBB);
+};
+class X86FastPreTileConfigLegacy : public MachineFunctionPass {
public:
- X86FastPreTileConfig() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {}
+ X86FastPreTileConfigLegacy() : MachineFunctionPass(ID) {}
/// Return the pass name.
StringRef getPassName() const override {
@@ -91,11 +101,11 @@ class X86FastPreTileConfig : public MachineFunctionPass {
} // end anonymous namespace
-char X86FastPreTileConfig::ID = 0;
+char X86FastPreTileConfigLegacy::ID = 0;
-INITIALIZE_PASS_BEGIN(X86FastPreTileConfig, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(X86FastPreTileConfigLegacy, DEBUG_TYPE,
"Fast Tile Register Preconfigure", false, false)
-INITIALIZE_PASS_END(X86FastPreTileConfig, DEBUG_TYPE,
+INITIALIZE_PASS_END(X86FastPreTileConfigLegacy, DEBUG_TYPE,
"Fast Tile Register Preconfigure", false, false)
static bool dominates(MachineBasicBlock &MBB,
@@ -114,7 +124,7 @@ static bool dominates(MachineBasicBlock &MBB,
/// This allocates space for the specified virtual register to be held on the
/// stack.
-int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg) {
+int X86FastPreTileConfigImpl::getStackSpaceFor(Register VirtReg) {
// Find the location Reg would belong...
int SS = StackSlotForVirtReg[VirtReg];
// Already has space allocated?
@@ -135,7 +145,8 @@ int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg) {
/// Returns false if \p VirtReg is known to not live out of the current config.
/// If \p VirtReg live out of the current MBB, it must live out of the current
/// config
-bool X86FastPreTileConfig::mayLiveOut(Register VirtReg, MachineInstr *CfgMI) {
+bool X86FastPreTileConfigImpl::mayLiveOut(Register VirtReg,
+ MachineInstr *CfgMI) {
if (MayLiveAcrossBlocks.test(VirtReg.virtRegIndex()))
return true;
@@ -159,7 +170,7 @@ bool X86FastPreTileConfig::mayLiveOut(Register VirtReg, MachineInstr *CfgMI) {
return false;
}
-void X86FastPreTileConfig::InitializeTileConfigStackSpace() {
+void X86FastPreTileConfigImpl::InitializeTileConfigStackSpace() {
MachineBasicBlock &MBB = MF->front();
MachineInstr *MI = &*MBB.getFirstNonPHI();
DebugLoc DL;
@@ -197,8 +208,8 @@ void X86FastPreTileConfig::InitializeTileConfigStackSpace() {
/// Insert spill instruction for \p AssignedReg before \p Before.
/// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot.
-void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before,
- Register VirtReg, bool Kill) {
+void X86FastPreTileConfigImpl::spill(MachineBasicBlock::iterator Before,
+ Register VirtReg, bool Kill) {
LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n");
int FI = getStackSpaceFor(VirtReg);
LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n');
@@ -213,9 +224,9 @@ void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before,
}
/// Insert reload instruction for \p PhysReg before \p Before.
-void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI,
- Register OrigReg, MachineOperand *RowMO,
- MachineOperand *ColMO) {
+void X86FastPreTileConfigImpl::reload(MachineBasicBlock::iterator UseMI,
+ Register OrigReg, MachineOperand *RowMO,
+ MachineOperand *ColMO) {
int FI = getStackSpaceFor(OrigReg);
const TargetRegisterClass &RC = *MRI->getRegClass(OrigReg);
Register TileReg;
@@ -321,8 +332,8 @@ static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg) {
// t = tileload row, col, s
// The new instruction is inserted at the end of the phi node. The order
// of the original phi node is not ensured.
-void X86FastPreTileConfig::convertPHI(MachineBasicBlock *MBB,
- MachineInstr &PHI) {
+void X86FastPreTileConfigImpl::convertPHI(MachineBasicBlock *MBB,
+ MachineInstr &PHI) {
// 1. Create instruction to get stack slot address of each incoming block.
// 2. Create PHI node for the stack address.
// 3. Create PHI node for shape. If one of the incoming shape is immediate
@@ -432,7 +443,7 @@ static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
return false;
}
-void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock &MBB) {
+void X86FastPreTileConfigImpl::canonicalizePHIs(MachineBasicBlock &MBB) {
SmallVector<MachineInstr *, 8> PHIs;
for (MachineInstr &MI : MBB) {
@@ -487,7 +498,7 @@ void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock &MBB) {
}
}
-void X86FastPreTileConfig::convertPHIs(MachineBasicBlock &MBB) {
+void X86FastPreTileConfigImpl::convertPHIs(MachineBasicBlock &MBB) {
SmallVector<MachineInstr *, 8> PHIs;
for (MachineInstr &MI : MBB) {
if (!MI.isPHI())
@@ -505,7 +516,7 @@ void X86FastPreTileConfig::convertPHIs(MachineBasicBlock &MBB) {
// PreTileConfig should configure the tile registers based on basic
// block.
-bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
+bool X86FastPreTileConfigImpl::configBasicBlock(MachineBasicBlock &MBB) {
this->MBB = &MBB;
bool Change = false;
MachineInstr *LastShapeMI = nullptr;
@@ -663,7 +674,7 @@ bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
return Change;
}
-bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
+bool X86FastPreTileConfigImpl::runOnMachineFunction(MachineFunction &MFunc) {
X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
// Early exit in the common case of non-AMX code.
if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA)
@@ -708,6 +719,20 @@ bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
return Change;
}
-FunctionPass *llvm::createX86FastPreTileConfigPass() {
- return new X86FastPreTileConfig();
+FunctionPass *llvm::createX86FastPreTileConfigLegacyPass() {
+ return new X86FastPreTileConfigLegacy();
+}
+
+bool X86FastPreTileConfigLegacy::runOnMachineFunction(MachineFunction &MF) {
+ X86FastPreTileConfigImpl Impl;
+ return Impl.runOnMachineFunction(MF);
+}
+
+PreservedAnalyses
+X86FastPreTileConfigPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ X86FastPreTileConfigImpl Impl;
+ bool Changed = Impl.runOnMachineFunction(MF);
+ return Changed ? getMachineFunctionPassPreservedAnalyses()
+ : PreservedAnalyses::all();
}
diff --git a/llvm/lib/Target/X86/X86PassRegistry.def b/llvm/lib/Target/X86/X86PassRegistry.def
index 2e825a5ceed55..64a1b1cdb4465 100644
--- a/llvm/lib/Target/X86/X86PassRegistry.def
+++ b/llvm/lib/Target/X86/X86PassRegistry.def
@@ -37,6 +37,7 @@ MACHINE_FUNCTION_PASS("x86-compress-evex", X86CompressEVEXPass())
MACHINE_FUNCTION_PASS("x86-domain-reassignment", X86DomainReassignmentPass())
MACHINE_FUNCTION_PASS("x86-dyn-alloca-expander", X86DynAllocaExpanderPass())
MACHINE_FUNCTION_PASS("x86-expand-pseudo", X86ExpandPseudoPass())
+MACHINE_FUNCTION_PASS("x86-fast-pre-tile-config", X86FastPreTileConfigPass())
MACHINE_FUNCTION_PASS("x86-fixup-leas", X86FixupLEAsPass())
MACHINE_FUNCTION_PASS("x86-flags-copy-lowering", X86FlagsCopyLoweringPass())
MACHINE_FUNCTION_PASS("x86-fp-stackifier", X86FPStackifierPass())
@@ -48,7 +49,6 @@ MACHINE_FUNCTION_PASS("x86-optimize-leas", X86OptimizeLEAsPass())
#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME)
#endif
DUMMY_MACHINE_FUNCTION_PASS("x86-execution-domain-fix", X86ExecutionDomainFix())
-DUMMY_MACHINE_FUNCTION_PASS("fastpretileconfig", X86FastPreTileConfig())
DUMMY_MACHINE_FUNCTION_PASS("fasttileconfig", X86FastTileConfig())
DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-bw-inst", FixupBWInstPass())
DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-inst-tuning", X86FixupInstTuningPass())
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 827e8c2a0a209..09e1acb4c7a2f 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -81,7 +81,7 @@ extern "C" LLVM_C_ABI void LLVMInitializeX86Target() {
initializeX86CallFrameOptimizationLegacyPass(PR);
initializeX86CmovConversionLegacyPass(PR);
initializeX86TileConfigPass(PR);
- initializeX86FastPreTileConfigPass(PR);
+ initializeX86FastPreTileConfigLegacyPass(PR);
initializeX86FastTileConfigPass(PR);
initializeKCFIPass(PR);
initializeX86LowerTileCopyPass(PR);
@@ -529,7 +529,7 @@ void X86PassConfig::addPreRegAlloc() {
if (getOptLevel() != CodeGenOptLevel::None)
addPass(createX86PreTileConfigPass());
else
- addPass(createX86FastPreTileConfigPass());
+ addPass(createX86FastPreTileConfigLegacyPass());
}
void X86PassConfig::addMachineSSAOptimization() {
diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir
index eef1f43b278d9..c4f0c669a6639 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir
+++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=x86_64-- -mattr=+amx-tile -run-pass=fastpretileconfig -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-- -mattr=+amx-tile -run-pass=x86-fast-pre-tile-config -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-- -mattr=+amx-tile -passes=x86-fast-pre-tile-config -o - %s | FileCheck %s
#
# This case test tile phi is nested accessed, but the its def block is
# not visited yet.
diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir
index 5843366baab6d..c7b272ab83762 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir
+++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=x86_64-- -mattr=+amx-tile -run-pass=fastpretileconfig -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-- -mattr=+amx-tile -run-pass=x86-fast-pre-tile-config -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-- -mattr=+amx-tile -passes=x86-fast-pre-tile-config -o - %s | FileCheck %s
#
# bb.0
# def %0
diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir
index 4eb8b95085189..f0c7565164408 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir
+++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=x86_64-- -mattr=+amx-tile -run-pass=fastpretileconfig -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-- -mattr=+amx-tile -run-pass=x86-fast-pre-tile-config -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-- -mattr=+amx-tile -passes=x86-fast-pre-tile-config -o - %s | FileCheck %s
#
# bb.0
# def %0
diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir
index 1ed4328bf132a..f4870fc318d06 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir
+++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=x86_64-- -mattr=+amx-int8,avx512f -run-pass=fastpretileconfig -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-- -mattr=+amx-int8,avx512f -run-pass=x86-fast-pre-tile-config -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-- -mattr=+amx-int8,avx512f -passes=x86-fast-pre-tile-config -o - %s | FileCheck %s
# Test spill/reload across basic block.
diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir
index 561ba6f2f4970..8e0d320879344 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir
+++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=x86_64-- -run-pass=fastpretileconfig -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-- -run-pass=x86-fast-pre-tile-config -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-- -passes=x86-fast-pre-tile-config -o - %s | FileCheck %s
--- |
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir b/llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir
index 0d56feac62681..59678661c9409 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir
+++ b/llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=x86_64-- -mattr=+amx-int8,avx512f -run-pass=fastpretileconfig -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-- -mattr=+amx-int8,avx512f -run-pass=x86-fast-pre-tile-config -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-- -mattr=+amx-int8,avx512f -passes=x86-fast-pre-tile-config -o - %s | FileCheck %s
# Test the case which has TILELOADD being mixed in pseudo AMX instruction
...
diff --git a/llvm/test/CodeGen/X86/AMX/amx-sink-config-after-calls.mir b/llvm/test/CodeGen/X86/AMX/amx-sink-config-after-calls.mir
index 82049dce8a45c..4eea98838910e 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-sink-config-after-calls.mir
+++ b/llvm/test/CodeGen/X86/AMX/amx-sink-config-after-calls.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-# RUN: llc -mtriple=x86_64-- -mattr=+amx-int8,avx512f -run-pass="fastpretileconfig,regallocfast,fasttileconfig" -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-- -mattr=+amx-int8,avx512f -run-pass="x86-fast-pre-tile-config,regallocfast,fasttileconfig" -verify-machineinstrs -o - %s | FileCheck %s
# Test to verify that ldtilecfg instructions are sinked closer to tile defining
# instructions after a call. This ensures call does not overwrite values in
``````````
</details>
https://github.com/llvm/llvm-project/pull/174323
More information about the llvm-commits
mailing list