[llvm] f80b273 - [X86][NewPM] Port X86LowerAMXType to NewPM
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 27 06:06:30 PDT 2025
Author: Aiden Grossman
Date: 2025-10-27T06:06:25-07:00
New Revision: f80b27349d4db84351c29595598d07e765516f1a
URL: https://github.com/llvm/llvm-project/commit/f80b27349d4db84351c29595598d07e765516f1a
DIFF: https://github.com/llvm/llvm-project/commit/f80b27349d4db84351c29595598d07e765516f1a.diff
LOG: [X86][NewPM] Port X86LowerAMXType to NewPM
To enable the eventual migration of everything to the NewPM.
Reviewers: RKSimon, phoebewang, paperchalice, arsenm, topperc
Reviewed By: arsenm
Pull Request: https://github.com/llvm/llvm-project/pull/165084
Added:
Modified:
llvm/lib/Target/X86/X86.h
llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp
llvm/lib/Target/X86/X86LowerAMXType.cpp
llvm/lib/Target/X86/X86PassRegistry.def
llvm/lib/Target/X86/X86TargetMachine.cpp
llvm/test/CodeGen/X86/AMX/amx-combine-undef.ll
llvm/test/CodeGen/X86/AMX/amx-combine.ll
llvm/test/CodeGen/X86/AMX/amx-configO2toO0-lower.ll
llvm/test/CodeGen/X86/AMX/amx-type.ll
llvm/test/CodeGen/X86/AMX/lat-combine-amx-bitcast.ll
llvm/test/CodeGen/X86/AMX/lat-transform-amx-bitcast.ll
llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll
llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index 706ab2b62bc1b..51b540a7a51d0 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -14,7 +14,10 @@
#ifndef LLVM_LIB_TARGET_X86_X86_H
#define LLVM_LIB_TARGET_X86_X86_H
+#include "llvm/IR/Analysis.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/Support/CodeGen.h"
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -162,7 +165,17 @@ FunctionPass *createX86WinEHUnwindV2Pass();
/// The pass transforms load/store <256 x i32> to AMX load/store intrinsics
/// or split the data to two <128 x i32>.
-FunctionPass *createX86LowerAMXTypePass();
+class X86LowerAMXTypePass : public PassInfoMixin<X86LowerAMXTypePass> {
+private:
+ const TargetMachine *TM;
+
+public:
+ X86LowerAMXTypePass(const TargetMachine *TM) : TM(TM) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+ static bool isRequired() { return true; }
+};
+
+FunctionPass *createX86LowerAMXTypeLegacyPass();
/// The pass transforms amx intrinsics to scalar operation if the function has
/// optnone attribute or it is O0.
diff --git a/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp b/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp
index d979517e12af6..2c0443da673a8 100644
--- a/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp
+++ b/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp
@@ -10,6 +10,7 @@
/// TODO: Port CodeGen passes to new pass manager.
//===----------------------------------------------------------------------===//
+#include "X86.h"
#include "X86ISelDAGToDAG.h"
#include "X86TargetMachine.h"
diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp
index 0ba71ada8638e..8ffd454f4f73e 100644
--- a/llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -46,12 +46,14 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Analysis.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsX86.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -64,7 +66,7 @@
using namespace llvm;
using namespace PatternMatch;
-#define DEBUG_TYPE "lower-amx-type"
+#define DEBUG_TYPE "x86-lower-amx-type"
static bool isAMXCast(Instruction *II) {
return match(II,
@@ -137,7 +139,7 @@ static Instruction *getFirstNonAllocaInTheEntryBlock(Function &F) {
class ShapeCalculator {
private:
- TargetMachine *TM = nullptr;
+ const TargetMachine *TM = nullptr;
// In AMX intrinsics we let Shape = {Row, Col}, but the
// RealCol = Col / ElementSize. We may use the RealCol
@@ -145,7 +147,7 @@ class ShapeCalculator {
std::map<Value *, Value *> Col2Row, Row2Col;
public:
- ShapeCalculator(TargetMachine *TargetM) : TM(TargetM) {}
+ ShapeCalculator(const TargetMachine *TargetM) : TM(TargetM) {}
std::pair<Value *, Value *> getShape(IntrinsicInst *II, unsigned OpNo);
std::pair<Value *, Value *> getShape(PHINode *Phi);
Value *getRowFromCol(Instruction *II, Value *V, unsigned Granularity);
@@ -1432,8 +1434,58 @@ bool X86LowerAMXCast::transformAllAMXCast() {
return Change;
}
+bool lowerAmxType(Function &F, const TargetMachine *TM,
+ TargetLibraryInfo *TLI) {
+ // Performance optimization: most code doesn't use AMX, so return early if
+ // there are no instructions that produce AMX values. This is sufficient, as
+ // AMX arguments and constants are not allowed -- so any producer of an AMX
+ // value must be an instruction.
+ // TODO: find a cheaper way for this, without looking at all instructions.
+ if (!containsAMXCode(F))
+ return false;
+
+ bool C = false;
+ ShapeCalculator SC(TM);
+ X86LowerAMXCast LAC(F, &SC);
+ C |= LAC.combineAMXcast(TLI);
+ // There might be remaining AMXcast after combineAMXcast and they should be
+ // handled elegantly.
+ C |= LAC.transformAllAMXCast();
+
+ X86LowerAMXType LAT(F, &SC);
+ C |= LAT.visit();
+
+ // Prepare for fast register allocation at O0.
+ // Todo: May better check the volatile model of AMX code, not just
+ // by checking Attribute::OptimizeNone and CodeGenOptLevel::None.
+ if (TM->getOptLevel() == CodeGenOptLevel::None) {
+ // If Front End not use O0 but the Mid/Back end use O0, (e.g.
+ // "Clang -O2 -S -emit-llvm t.c" + "llc t.ll") we should make
+ // sure the amx data is volatile, that is necessary for AMX fast
+ // register allocation.
+ if (!F.hasFnAttribute(Attribute::OptimizeNone)) {
+ X86VolatileTileData VTD(F);
+ C = VTD.volatileTileData() || C;
+ }
+ }
+
+ return C;
+}
+
} // anonymous namespace
+PreservedAnalyses X86LowerAMXTypePass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
+ bool Changed = lowerAmxType(F, TM, &TLI);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA = PreservedAnalyses::none();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
namespace {
class X86LowerAMXTypeLegacyPass : public FunctionPass {
@@ -1443,44 +1495,10 @@ class X86LowerAMXTypeLegacyPass : public FunctionPass {
X86LowerAMXTypeLegacyPass() : FunctionPass(ID) {}
bool runOnFunction(Function &F) override {
- // Performance optimization: most code doesn't use AMX, so return early if
- // there are no instructions that produce AMX values. This is sufficient, as
- // AMX arguments and constants are not allowed -- so any producer of an AMX
- // value must be an instruction.
- // TODO: find a cheaper way for this, without looking at all instructions.
- if (!containsAMXCode(F))
- return false;
-
- bool C = false;
TargetMachine *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
TargetLibraryInfo *TLI =
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
-
- ShapeCalculator SC(TM);
- X86LowerAMXCast LAC(F, &SC);
- C |= LAC.combineAMXcast(TLI);
- // There might be remaining AMXcast after combineAMXcast and they should be
- // handled elegantly.
- C |= LAC.transformAllAMXCast();
-
- X86LowerAMXType LAT(F, &SC);
- C |= LAT.visit();
-
- // Prepare for fast register allocation at O0.
- // Todo: May better check the volatile model of AMX code, not just
- // by checking Attribute::OptimizeNone and CodeGenOptLevel::None.
- if (TM->getOptLevel() == CodeGenOptLevel::None) {
- // If Front End not use O0 but the Mid/Back end use O0, (e.g.
- // "Clang -O2 -S -emit-llvm t.c" + "llc t.ll") we should make
- // sure the amx data is volatile, that is nessary for AMX fast
- // register allocation.
- if (!F.hasFnAttribute(Attribute::OptimizeNone)) {
- X86VolatileTileData VTD(F);
- C = VTD.volatileTileData() || C;
- }
- }
-
- return C;
+ return lowerAmxType(F, TM, TLI);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -1501,6 +1519,6 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(X86LowerAMXTypeLegacyPass, DEBUG_TYPE, PassName, false,
false)
-FunctionPass *llvm::createX86LowerAMXTypePass() {
+FunctionPass *llvm::createX86LowerAMXTypeLegacyPass() {
return new X86LowerAMXTypeLegacyPass();
}
diff --git a/llvm/lib/Target/X86/X86PassRegistry.def b/llvm/lib/Target/X86/X86PassRegistry.def
index 3f2a4331c41f2..fc25d55d3059a 100644
--- a/llvm/lib/Target/X86/X86PassRegistry.def
+++ b/llvm/lib/Target/X86/X86PassRegistry.def
@@ -12,11 +12,16 @@
// NOTE: NO INCLUDE GUARD DESIRED!
+#ifndef FUNCTION_PASS
+#define FUNCTION_PASS(NAME, CREATE_PASS)
+#endif
+FUNCTION_PASS("x86-lower-amx-type", X86LowerAMXTypePass(this))
+#undef FUNCTION_PASS
+
#ifndef DUMMY_FUNCTION_PASS
#define DUMMY_FUNCTION_PASS(NAME, CREATE_PASS)
#endif
DUMMY_FUNCTION_PASS("lower-amx-intrinsics", X86LowerAMXIntrinsics(*this))
-DUMMY_FUNCTION_PASS("lower-amx-type", X86LowerAMXTypePass(*this))
DUMMY_FUNCTION_PASS("x86-partial-reduction", X86PartialReduction())
DUMMY_FUNCTION_PASS("x86-winehstate", WinEHStatePass())
#undef DUMMY_FUNCTION_PASS
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 8dd6f3d97ccea..9a76abcd351bf 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -423,7 +423,7 @@ void X86PassConfig::addIRPasses() {
// We add both pass anyway and when these two passes run, we skip the pass
// based on the option level and option attribute.
addPass(createX86LowerAMXIntrinsicsPass());
- addPass(createX86LowerAMXTypePass());
+ addPass(createX86LowerAMXTypeLegacyPass());
TargetPassConfig::addIRPasses();
diff --git a/llvm/test/CodeGen/X86/AMX/amx-combine-undef.ll b/llvm/test/CodeGen/X86/AMX/amx-combine-undef.ll
index faa119cd037f1..5f0682abbea12 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-combine-undef.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-combine-undef.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
define void @undef_2phi(ptr%buf) {
; CHECK-LABEL: @undef_2phi(
diff --git a/llvm/test/CodeGen/X86/AMX/amx-combine.ll b/llvm/test/CodeGen/X86/AMX/amx-combine.ll
index 07f489c633c55..72e072dd15761 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-combine.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-combine.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
define void @combine_store(ptr%p) {
; CHECK-LABEL: @combine_store(
diff --git a/llvm/test/CodeGen/X86/AMX/amx-configO2toO0-lower.ll b/llvm/test/CodeGen/X86/AMX/amx-configO2toO0-lower.ll
index 6c536f11d4bb1..4ac406c1603ee 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-configO2toO0-lower.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-configO2toO0-lower.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -lower-amx-type -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -x86-lower-amx-type -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -passes=x86-lower-amx-type -S | FileCheck %s
@buf = dso_local global [1024 x i8] zeroinitializer, align 16
@buf2 = dso_local global [1024 x i8] zeroinitializer, align 16
diff --git a/llvm/test/CodeGen/X86/AMX/amx-type.ll b/llvm/test/CodeGen/X86/AMX/amx-type.ll
index 1d9af2b13cdfd..294195a6541bf 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-type.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-type.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
%struct.__tile_str = type { i16, i16, <256 x i32> }
diff --git a/llvm/test/CodeGen/X86/AMX/lat-combine-amx-bitcast.ll b/llvm/test/CodeGen/X86/AMX/lat-combine-amx-bitcast.ll
index b70668f7a3dea..cdce783d0a237 100644
--- a/llvm/test/CodeGen/X86/AMX/lat-combine-amx-bitcast.ll
+++ b/llvm/test/CodeGen/X86/AMX/lat-combine-amx-bitcast.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
define void @combine_amx_cast_inside_bb() {
; CHECK-LABEL: @combine_amx_cast_inside_bb(
diff --git a/llvm/test/CodeGen/X86/AMX/lat-transform-amx-bitcast.ll b/llvm/test/CodeGen/X86/AMX/lat-transform-amx-bitcast.ll
index 3a5b424540ff1..0b419bb8573d5 100644
--- a/llvm/test/CodeGen/X86/AMX/lat-transform-amx-bitcast.ll
+++ b/llvm/test/CodeGen/X86/AMX/lat-transform-amx-bitcast.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
%struct.__tile_str = type { i16, i16, <256 x i32> }
diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll b/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll
index 52641c65c90e9..3549875e858a9 100644
--- a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll
+++ b/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
- ; RUN: opt --codegen-opt-level=0 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
+ ; RUN: opt --codegen-opt-level=0 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
+ ; RUN: opt --codegen-opt-level=0 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
@buf = dso_local global [2048 x i8] zeroinitializer, align 16
diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll b/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll
index 346d46b6b16c2..96966264e0515 100644
--- a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll
+++ b/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
@buf = dso_local global [2048 x i8] zeroinitializer, align 16
@buf2 = dso_local global [2048 x i8] zeroinitializer, align 16
More information about the llvm-commits
mailing list