[llvm] [X86][AMX] remove related code of X86PreAMXConfigPass (PR #69569)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 19 00:43:09 PDT 2023
https://github.com/yubingex007-a11y created https://github.com/llvm/llvm-project/pull/69569
None
>From de699709ee73acf9f04a02471255bffdde9bd0cf Mon Sep 17 00:00:00 2001
From: Bing1 Yu <bing1.yu at intel.com>
Date: Thu, 19 Oct 2023 15:42:02 +0800
Subject: [PATCH] [X86][AMX] remove related code of X86PreAMXConfigPass
---
clang/docs/tools/clang-formatted-files.txt | 1 -
llvm/include/llvm/CodeGen/Passes.h | 3 -
llvm/lib/Target/X86/CMakeLists.txt | 1 -
llvm/lib/Target/X86/X86.h | 1 -
llvm/lib/Target/X86/X86PreAMXConfig.cpp | 415 ------------------
llvm/lib/Target/X86/X86TargetMachine.cpp | 1 -
.../X86/AMX/amx-configO2toO0-precfg.ll | 178 --------
llvm/tools/opt/opt.cpp | 1 -
.../gn/secondary/llvm/lib/Target/X86/BUILD.gn | 1 -
9 files changed, 602 deletions(-)
delete mode 100644 llvm/lib/Target/X86/X86PreAMXConfig.cpp
delete mode 100644 llvm/test/CodeGen/X86/AMX/amx-configO2toO0-precfg.ll
diff --git a/clang/docs/tools/clang-formatted-files.txt b/clang/docs/tools/clang-formatted-files.txt
index 16f84727117e28d..48cd800bffd0046 100644
--- a/clang/docs/tools/clang-formatted-files.txt
+++ b/clang/docs/tools/clang-formatted-files.txt
@@ -6813,7 +6813,6 @@ llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp
llvm/lib/Target/X86/X86LowerAMXType.cpp
llvm/lib/Target/X86/X86LowerTileCopy.cpp
-llvm/lib/Target/X86/X86PreAMXConfig.cpp
llvm/lib/Target/X86/X86PreTileConfig.cpp
llvm/lib/Target/X86/X86RegisterBankInfo.h
llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index 598c0b838c1b97d..8d14eef949e91b4 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -584,9 +584,6 @@ namespace llvm {
/// or split the data to two <128 x i32>.
FunctionPass *createX86LowerAMXTypePass();
- /// The pass insert tile config intrinsics for AMX fast register allocation.
- FunctionPass *createX86PreAMXConfigPass();
-
/// The pass transforms amx intrinsics to scalar operation if the function has
/// optnone attribute or it is O0.
FunctionPass *createX86LowerAMXIntrinsicsPass();
diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt
index c387d59ea981a52..0b7a98ad6341dde 100644
--- a/llvm/lib/Target/X86/CMakeLists.txt
+++ b/llvm/lib/Target/X86/CMakeLists.txt
@@ -33,7 +33,6 @@ set(sources
X86DiscriminateMemOps.cpp
X86LowerTileCopy.cpp
X86LowerAMXType.cpp
- X86PreAMXConfig.cpp
X86LowerAMXIntrinsics.cpp
X86TileConfig.cpp
X86FastPreTileConfig.cpp
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index 3c5ca0788498032..485afbc1dfbc241 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -194,7 +194,6 @@ void initializeX86LowerAMXTypeLegacyPassPass(PassRegistry &);
void initializeX86LowerTileCopyPass(PassRegistry &);
void initializeX86OptimizeLEAPassPass(PassRegistry &);
void initializeX86PartialReductionPass(PassRegistry &);
-void initializeX86PreAMXConfigPassPass(PassRegistry &);
void initializeX86PreTileConfigPass(PassRegistry &);
void initializeX86ReturnThunksPass(PassRegistry &);
void initializeX86SpeculativeExecutionSideEffectSuppressionPass(PassRegistry &);
diff --git a/llvm/lib/Target/X86/X86PreAMXConfig.cpp b/llvm/lib/Target/X86/X86PreAMXConfig.cpp
deleted file mode 100644
index 7872a64061d438c..000000000000000
--- a/llvm/lib/Target/X86/X86PreAMXConfig.cpp
+++ /dev/null
@@ -1,415 +0,0 @@
-//===- Target/X86/X86PreAMXConfig.cpp - ------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// Insert tilecfg for each area of key AMX intrinsic.
-/// All the key AMX intrinsic's tile operand must come from tileload. And the
-/// def tile of key AMX intrinsic must be tilestored.
-/// take tdpbssd for example:
-/// --------------------------------------------------------------------------
-/// %t1 = call x86_amx @llvm.x86.tileloadd64.internal(...) key
-/// %t2 = call x86_amx @llvm.x86.tileloadd64.internal(...) |
-/// %t3 = call x86_amx @llvm.x86.tileloadd64.internal(...) amx
-/// %td = tail call x86_amx @llvm.x86.tdpbssd.internal(t1, t2, t3) |
-/// call void @llvm.x86.tilestored64.internal(... td) area
-/// --------------------------------------------------------------------------
-/// This pass will insert tilecfg before every key-amx-area, some like:
-/// --------------------------------------------------------------------------
-/// %cfgmem = alloca <16 x i32>, align 4 * allocate mem
-/// store <16 x i32> zeroinitializer, <16 x i32>* %cfgmem * zero init
-/// ...
-/// ... pre-config shape of %t1 *
-/// store volatile i8 %m, i8* %amx.tmm.0.shape.row, align 1 *
-/// store volatile i16 %k, i16* %amx.tmm.0.shape.col, align 2 * pre-config
-/// ... *
-/// ... pre-config shape of %t2 * shapes
-/// store volatile i8 %k, i8* %amx.tmm.1.shape.row, align 1 *
-/// store volatile i16 %n, i16* %amx.tmm.1.shape.col, align 2 *
-/// ...
-/// call void @llvm.x86.ldtilecfg(i8* %cfgmem) * tile config
-//
-//===----------------------------------------------------------------------===//
-//
-#include "X86.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/IntrinsicsX86.h"
-#include "llvm/IR/PatternMatch.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-
-using namespace llvm;
-using namespace PatternMatch;
-
-#define DEBUG_TYPE "pre-amx-config"
-
-static bool isAMXIntrinsic(IntrinsicInst *II) {
- for (Value *Operand : II->operands())
- if (Operand->getType()->isX86_AMXTy())
- return true;
- return II->getType()->isX86_AMXTy();
-}
-
-static bool isTileLoad(IntrinsicInst *II) {
- return II->getIntrinsicID() == Intrinsic::x86_tileloadd64_internal ||
- II->getIntrinsicID() == Intrinsic::x86_tileloaddt164_internal;
-}
-
-static bool isTileStore(IntrinsicInst *II) {
- return II->getIntrinsicID() == Intrinsic::x86_tilestored64_internal;
-}
-
-#ifndef NDEBUG
-static bool onlyTileDef(IntrinsicInst *II) {
- for (Value *Operand : II->operands())
- if (Operand->getType()->isX86_AMXTy())
- return false;
- return II->getType()->isX86_AMXTy();
-}
-
-static bool brokenVolatile(Instruction *I) {
- // Todo: it is weak to identify a normal call here.
- if ((isa<CallInst>(I) && !isa<IntrinsicInst>(I)) || I->isTerminator())
- return true;
- return false;
-}
-#endif
-
-namespace {
-class X86PreAMXConfig {
- using PosAndShapesMap = MapVector<Instruction *, SmallVector<Value *, 8>>;
-
- Function &F;
-
-public:
- X86PreAMXConfig(Function &Func) : F(Func) {}
- bool preTileConfig();
- void addTileConfig(Instruction *ModelStart, SmallVector<Value *, 8> &Shapes);
- bool findConfigShapes(PosAndShapesMap &PosAndShapes);
- bool getKeyAMXShapes(IntrinsicInst *KeyAMX, SmallVector<Value *, 8> &Shapes);
- void preWriteTileCfg(Value *I8Ptr, IRBuilderBase &Builder,
- SmallVector<Value *, 8> &Shapes);
- BasicBlock::iterator
- getShapesAndConfigPosEnd(BasicBlock::iterator Iter,
- SmallVector<Value *, 8> &Shapes);
- bool checkVolatileModel(SmallSet<Value *, 4> &Loads, IntrinsicInst *Store,
- IntrinsicInst *KeyAMX);
-};
-
-// Orderly write the shapes in tilecfg's mem. This maybe not right.
-// Because the first shape may not corresponding to the first tmm register,
-// so we need to handle at at X86FastTileConfig::materializeTileCfg()
-// after register allocation.
-// For example:
-// --------------------------------------------------------------------------
-// zeroinitialize tilecfg's mem (of ldtilecfg)
-// --------------------------------------------------------------------------
-// ... pre-config shape of %t1 *
-// %amx.tmm.0.shape.row = getelementptr i8, i8* %mem, i64 48 *
-// %amx.tmm.0.shape.col = getelementptr i16, i16* %mem, i64 16 *
-// store volatile i8 %m, i8* %amx.tmm.0.shape.row, align 1 *
-// store volatile i16 %k, i16* %amx.tmm.0.shape.col, align 2 * pre-config
-// ... *
-// ... pre-config shape of %t2 *
-// %amx.tmm.1.shape.row = getelementptr i8, i8* %mem, i64 49 *
-// %amx.tmm.1.shape.col = getelementptr i16, i16* %mem, i64 18 *
-// store volatile i8 %k, i8* %amx.tmm.1.shape.row, align 1 * shapes
-// store volatile i16 %n, i16* %amx.tmm.1.shape.col, align 2 *
-// ... *
-// ... pre-config shape of %t3 * of
-// %amx.tmm.2.shape.row = getelementptr i8, i8* %mem, i64 50 *
-// %amx.tmm.2.shape.col = getelementptr i16, i16* %mem, i64 20 *
-// store volatile i8 %m, i8* %amx.tmm.2.shape.row, align 1 *
-// store volatile i16 %n, i16* %amx.tmm.2.shape.col, align 2 *
-// ... * tiles
-// ... pre-config shape of %td *
-// %amx.tmm.3.shape.row = getelementptr i8, i8* %mem, i64 51 *
-// %amx.tmm.3.shape.col = getelementptr i16, i16* %mem, i64 22 *
-// store volatile i8 %m, i8* %amx.tmm.3.shape.row, align 1 *
-// store volatile i16 %n, i16* %amx.tmm.3.shape.col, align 2 *
-// --------------------------------------------------------------------------
-// call void @llvm.x86.ldtilecfg(i8* %mem) * tile config
-// --------------------------------------------------------------------------
-// %t1 = call x86_amx @llvm.x86.tileloadd64.internal(m, k, ...) key
-// %t2 = call x86_amx @llvm.x86.tileloadd64.internal(k, n, ...)
-// %t3 = call x86_amx @llvm.x86.tileloadd64.internal(m, n, ...) amx
-// %td = tail call x86_amx @llvm.x86.tdpbssd.internal(m, n, k, t1, t2, t3)
-// call void @llvm.x86.tilestored64.internal(... td) area
-// --------------------------------------------------------------------------
-void X86PreAMXConfig::preWriteTileCfg(Value *I8Ptr, IRBuilderBase &Builder,
- SmallVector<Value *, 8> &Shapes) {
- LLVMContext &Ctx = Builder.getContext();
- Type *I8Ty = Type::getInt8Ty(Ctx);
- Type *I16Ty = Type::getInt16Ty(Ctx);
-
- // TODO: Currently we defaultly set Palette = 1, it may be assigned to
- // other value in the future.
- Value *PaletteOffset = ConstantInt::get(Type::getInt64Ty(Ctx), 0);
- Value *PaletteValue = ConstantInt::get(Type::getInt8Ty(Ctx), 1);
- Value *PalettePos = Builder.CreateGEP(I8Ty, I8Ptr, PaletteOffset);
- Builder.CreateStore(PaletteValue, PalettePos);
-
- for (int I = 0, E = Shapes.size() / 2; I < E; I++) {
- Value *RowOffset = ConstantInt::get(Type::getInt64Ty(Ctx), 48 + I);
- Value *ColOffset = ConstantInt::get(Type::getInt64Ty(Ctx), 16 + I * 2);
- const std::string ShapeName = "amx.tmm." + itostr(I);
- Value *RowPos = Builder.CreateGEP(I8Ty, I8Ptr, RowOffset,
- ShapeName + ".shape.row");
- Value *ColPos = Builder.CreateGEP(I8Ty, I8Ptr, ColOffset);
- ColPos = Builder.CreateBitCast(ColPos, PointerType::get(I16Ty, 0),
- ShapeName + ".shape.col");
- Value *Row = Shapes[I * 2];
- Value *Col = Shapes[I * 2 + 1];
- Row = Builder.CreateTrunc(Row, I8Ty);
- Builder.CreateStore(Row, RowPos);
- Builder.CreateStore(Col, ColPos);
- }
-}
-
-void X86PreAMXConfig::addTileConfig(Instruction *ModelStart,
- SmallVector<Value *, 8> &Shapes) {
- Module *M = F.getParent();
- IRBuilder<> Builder(ModelStart);
- const DataLayout &DL = M->getDataLayout();
- unsigned AddrSpace = DL.getAllocaAddrSpace();
- LLVMContext &Ctx = Builder.getContext();
- Type *V512Ty = VectorType::get(Builder.getInt32Ty(), 16, false);
- Align Alignment = DL.getPrefTypeAlign(Type::getInt32Ty(Ctx));
-
- AllocaInst *Addr =
- new AllocaInst(V512Ty, AddrSpace, "", &F.getEntryBlock().front());
- Addr->setAlignment(Alignment);
- Value *I8Ptr = Builder.CreateBitCast(Addr, Builder.getInt8PtrTy());
-
- Builder.CreateAlignedStore(Constant::getNullValue(V512Ty), Addr, Alignment);
-
- preWriteTileCfg(I8Ptr, Builder, Shapes);
-
- Builder.CreateIntrinsic(Intrinsic::x86_ldtilecfg_internal, std::nullopt,
- {I8Ptr});
-}
-
-// Todo: We may need to handle "more than one store" case in the future.
-bool X86PreAMXConfig::checkVolatileModel(SmallSet<Value *, 4> &Loads,
- IntrinsicInst *Store,
- IntrinsicInst *KeyAMX) {
- Value *ST = Store->getOperand(4);
-
- // Only has tileload and tilestore.
- if (!KeyAMX)
- return (Loads.size() == 1) && Loads.contains(ST);
-
- // All Loads should be operands of KeyAMX.
- // All tile operands of KeyAMX should come from Loads.
- for (Value *Op : KeyAMX->operands()) {
- if (Op->getType()->isX86_AMXTy())
- if (!Loads.erase(Op))
- return false;
- }
-
- // The def of KeyAMX should be stored into mem.
- // Todo: is it key amx can be no def?
- return Loads.empty() && (ST == cast<Value>(KeyAMX));
-}
-
-bool X86PreAMXConfig::getKeyAMXShapes(IntrinsicInst *KeyAMX,
- SmallVector<Value *, 8> &Shapes) {
- for (unsigned I = 0; I < KeyAMX->getNumOperands(); I++) {
- Value *Op = KeyAMX->getOperand(I);
- if (!Op->getType()->isX86_AMXTy())
- continue;
- IntrinsicInst *TileDef = dyn_cast<IntrinsicInst>(Op);
- assert((TileDef && isTileLoad(TileDef)) &&
- "All KeyAMX's tile definiation should comes from TileLoad!");
- Shapes.push_back(TileDef->getOperand(0));
- Shapes.push_back(TileDef->getOperand(1));
- }
- if (!isTileStore(KeyAMX)) {
- Shapes.push_back(KeyAMX->getOperand(0));
- Shapes.push_back(KeyAMX->getOperand(1));
- }
- return Shapes.size() != 0;
-}
-
-// Collect the shapes and skip the area of current key amx intrinsic.
-//
-// For example:
-// ...
-// --------------------------------------------------------------------------
-// %t1 = call x86_amx @llvm.x86.tileloadd64.internal(m, k, ...) record (m,k)
-// %t2 = call x86_amx @llvm.x86.tileloadd64.internal(k, n, ...) record (m,k)
-// %t3 = call x86_amx @llvm.x86.tileloadd64.internal(m, n, ...) record (m,k)
-// %td = call x86_amx @llvm.x86.tdpbssd.internal(...t1, t2, t3)
-// call void @llvm.x86.tilestored64.internal(m, n,... td) <--PosEnd record (m,k)
-// --------------------------------------------------------------------------
-BasicBlock::iterator
-X86PreAMXConfig::getShapesAndConfigPosEnd(BasicBlock::iterator Iter,
- SmallVector<Value *, 8> &Shapes) {
- IntrinsicInst *KeyAMX = nullptr;
- BasicBlock *BB = Iter->getParent();
- BasicBlock::iterator PosEnd = BB->end();
- SmallSet<Value *, 4> Loads;
-
- // See TileStore as "Config Position End" and check volatile model.
- for (auto I = Iter, E = BB->end(); I != E; ++I) {
- assert(!brokenVolatile(&*I) && "Not reach tile store!");
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(&*I);
- if (!II || !isAMXIntrinsic(II))
- continue;
-
- if (isTileLoad(II)) {
- Loads.insert(II);
- } else if (isTileStore(II)) {
- if (!checkVolatileModel(Loads, II, KeyAMX))
- report_fatal_error("Not Volatile AMX Model!");
- PosEnd = I;
- break;
- } else {
- assert(!KeyAMX && "Too many key amx intrinsic!");
- KeyAMX = II;
- }
- }
- assert(PosEnd != BB->end() && "Not find TileStore!");
-
- // See KeyAMX as TileStore if only TileLoad and TileStore.
- if (!KeyAMX)
- KeyAMX = dyn_cast<IntrinsicInst>(&*PosEnd);
-
- // Get Shapes in order.
- assert(Shapes.empty() && "Shapes should be clean.");
- getKeyAMXShapes(KeyAMX, Shapes);
-
- return PosEnd;
-}
-
-// Record a key amx area's shapes with its position.
-// Use the first tileload as its position.
-// For example:
-// ...
-// --------------------------------------------------------------------------
-// %t1 = call x86_amx @llvm.x86.tileloadd64.internal(m, k, ...) <-- pos
-// %t2 = call x86_amx @llvm.x86.tileloadd64.internal(k, n, ...) /
-// %t3 = call x86_amx @llvm.x86.tileloadd64.internal(m, n, ...) shapes:
-// %td = call x86_amx @llvm.x86.tdpbssd.internal(...t1, t2, t3) (m,k)(k,n)
-// call void @llvm.x86.tilestored64.internal(m, n,... td) (m,n)(m,n)
-// --------------------------------------------------------------------------
-bool X86PreAMXConfig::findConfigShapes(PosAndShapesMap &PosAndShapes) {
- bool Find = false;
- for (BasicBlock &BB : F) {
- for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(&*I);
- if (!II)
- continue;
- if (!isAMXIntrinsic(II))
- continue;
- assert(onlyTileDef(II) && "Not volatile model for AMX at O0!");
-
- I = getShapesAndConfigPosEnd(I, PosAndShapes[&*I]);
- Find = true;
- }
- }
- return Find;
-}
-
-// Insert ldtilecfg and preconfig the shapes for each area of key AMX intrinsic.
-// e.g. (key amx = tdpbssd)
-// --------------------------------------------------------------------------
-// %cfgmem = alloca <16 x i32>, align 4 * allocate mem
-// store <16 x i32> zeroinitializer, <16 x i32>* %cfgmem * zero init
-// ...
-// ... pre-config shape of %t1 *
-// store volatile i8 %m, i8* %amx.tmm.0.shape.row, align 1 *
-// store volatile i16 %k, i16* %amx.tmm.0.shape.col, align 2 * pre-config
-// ... *
-// ... pre-config shape of %t2 *
-// store volatile i8 %k, i8* %amx.tmm.1.shape.row, align 1 * shapes
-// store volatile i16 %n, i16* %amx.tmm.1.shape.col, align 2 *
-// ... *
-// ... pre-config shape of %t3 * of
-// store volatile i8 %m, i8* %amx.tmm.2.shape.row, align 1 *
-// store volatile i16 %n, i16* %amx.tmm.2.shape.col, align 2 *
-// ... * tiles
-// ... pre-config shape of %td *
-// store volatile i8 %m, i8* %amx.tmm.3.shape.row, align 1 *
-// store volatile i16 %n, i16* %amx.tmm.3.shape.col, align 2 *
-//
-// call void @llvm.x86.ldtilecfg(i8* %cfgmem) * pre-config
-// --------------------------------------------------------------------------
-// %t1 = call x86_amx @llvm.x86.tileloadd64.internal(m, k, ...) key
-// %t2 = call x86_amx @llvm.x86.tileloadd64.internal(k, n, ...)
-// %t3 = call x86_amx @llvm.x86.tileloadd64.internal(m, n, ...) amx
-// %td = tail call x86_amx @llvm.x86.tdpbssd.internal(m, n, k, t1, t2, t3)
-// call void @llvm.x86.tilestored64.internal(... td) area
-// --------------------------------------------------------------------------
-bool X86PreAMXConfig::preTileConfig() {
- PosAndShapesMap PosAndShapes;
- bool NeedCfg = findConfigShapes(PosAndShapes);
- if (!NeedCfg)
- return false;
- for (auto &IPAndShapes : PosAndShapes)
- addTileConfig(IPAndShapes.first, IPAndShapes.second);
-
- return true;
-}
-} // anonymous namespace
-
-namespace {
-
-class X86PreAMXConfigPass : public FunctionPass {
-public:
- static char ID;
-
- X86PreAMXConfigPass() : FunctionPass(ID) {
- initializeX86PreAMXConfigPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- TargetMachine *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
- bool C = false;
-
- // Prepare for fast register allocation at O0.
- if (TM->getOptLevel() == CodeGenOptLevel::None) {
-
- // We pre-config each key AMX intrinsic at O0.
- // In theory, one tile config can cover several AMX intrinsics, but
- // it is very diffcult to classify the tile shapes at O0. So here we
- // let thing be easy, pre-config every key AMX intrinsic.
- X86PreAMXConfig PCFG(F);
- C = PCFG.preTileConfig();
- }
-
- return C;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<TargetPassConfig>();
- }
-};
-
-} // anonymous namespace
-
-static const char PassName[] = "Pre AMX Tile Config";
-char X86PreAMXConfigPass::ID = 0;
-INITIALIZE_PASS_BEGIN(X86PreAMXConfigPass, DEBUG_TYPE, PassName, false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
-INITIALIZE_PASS_END(X86PreAMXConfigPass, DEBUG_TYPE, PassName, false, false)
-
-FunctionPass *llvm::createX86PreAMXConfigPass() {
- return new X86PreAMXConfigPass();
-}
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 82c15c916c51fc6..5668b514d6dec07 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -71,7 +71,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
PassRegistry &PR = *PassRegistry::getPassRegistry();
initializeX86LowerAMXIntrinsicsLegacyPassPass(PR);
initializeX86LowerAMXTypeLegacyPassPass(PR);
- initializeX86PreAMXConfigPassPass(PR);
initializeX86PreTileConfigPass(PR);
initializeGlobalISel(PR);
initializeWinEHStatePassPass(PR);
diff --git a/llvm/test/CodeGen/X86/AMX/amx-configO2toO0-precfg.ll b/llvm/test/CodeGen/X86/AMX/amx-configO2toO0-precfg.ll
deleted file mode 100644
index 82b9746c41933d7..000000000000000
--- a/llvm/test/CodeGen/X86/AMX/amx-configO2toO0-precfg.ll
+++ /dev/null
@@ -1,178 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -pre-amx-config -S | FileCheck %s
-
- at buf = dso_local global [1024 x i8] zeroinitializer, align 16
- at buf2 = dso_local global [1024 x i8] zeroinitializer, align 16
-
-define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) local_unnamed_addr {
-; CHECK-LABEL: @test_api(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = alloca <16 x i32>, align 4
-; CHECK-NEXT: [[TMP1:%.*]] = alloca <16 x i32>, align 4
-; CHECK-NEXT: [[TMP2:%.*]] = alloca <16 x i32>, align 4
-; CHECK-NEXT: [[TMP3:%.*]] = alloca <16 x i32>, align 4
-; CHECK-NEXT: [[TMP4:%.*]] = alloca <16 x i32>, align 4
-; CHECK-NEXT: [[TMP5:%.*]] = alloca <16 x i32>, align 4
-; CHECK-NEXT: [[TMP6:%.*]] = alloca <16 x i32>, align 4
-; CHECK-NEXT: [[TMP7:%.*]] = alloca <16 x i32>, align 4
-; CHECK-NEXT: [[I:%.*]] = alloca <256 x i32>, align 1024
-; CHECK-NEXT: [[I2:%.*]] = alloca <256 x i32>, align 1024
-; CHECK-NEXT: [[I4:%.*]] = alloca <256 x i32>, align 1024
-; CHECK-NEXT: [[I6:%.*]] = alloca <256 x i32>, align 1024
-; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[COND:%.*]], 0
-; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]]
-; CHECK: if.then:
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr [[TMP7]], align 4
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i64 0
-; CHECK-NEXT: store i8 1, ptr [[TMP8]], align 1
-; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW:%.*]] = getelementptr i8, ptr [[TMP7]], i64 48
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP7]], i64 16
-; CHECK-NEXT: [[TMP10:%.*]] = trunc i16 [[ROW:%.*]] to i8
-; CHECK-NEXT: store i8 [[TMP10]], ptr [[AMX_TMM_0_SHAPE_ROW]], align 1
-; CHECK-NEXT: store i16 8, ptr [[TMP9]], align 2
-; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(ptr [[TMP7]])
-; CHECK-NEXT: [[I8:%.*]] = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 8, ptr @buf, i64 32)
-; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 8, ptr [[I4]], i64 64, x86_amx [[I8]])
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr [[TMP6]], align 4
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP6]], i64 0
-; CHECK-NEXT: store i8 1, ptr [[TMP11]], align 1
-; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW1:%.*]] = getelementptr i8, ptr [[TMP6]], i64 48
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP6]], i64 16
-; CHECK-NEXT: store i8 8, ptr [[AMX_TMM_0_SHAPE_ROW1]], align 1
-; CHECK-NEXT: store i16 [[COL:%.*]], ptr [[TMP12]], align 2
-; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(ptr [[TMP6]])
-; CHECK-NEXT: [[I9:%.*]] = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 [[COL]], ptr @buf, i64 32)
-; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 8, i16 [[COL]], ptr [[I2]], i64 64, x86_amx [[I9]])
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr [[TMP5]], align 4
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP5]], i64 0
-; CHECK-NEXT: store i8 1, ptr [[TMP13]], align 1
-; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW2:%.*]] = getelementptr i8, ptr [[TMP5]], i64 48
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP5]], i64 16
-; CHECK-NEXT: [[TMP15:%.*]] = trunc i16 [[ROW]] to i8
-; CHECK-NEXT: store i8 [[TMP15]], ptr [[AMX_TMM_0_SHAPE_ROW2]], align 1
-; CHECK-NEXT: store i16 [[COL]], ptr [[TMP14]], align 2
-; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(ptr [[TMP5]])
-; CHECK-NEXT: [[I10:%.*]] = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 [[COL]], ptr @buf, i64 32)
-; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 [[COL]], ptr [[I]], i64 64, x86_amx [[I10]])
-; CHECK-NEXT: br label [[IF_END:%.*]]
-; CHECK: if.else:
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr [[TMP4]], align 4
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP4]], i64 0
-; CHECK-NEXT: store i8 1, ptr [[TMP16]], align 1
-; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW3:%.*]] = getelementptr i8, ptr [[TMP4]], i64 48
-; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP4]], i64 16
-; CHECK-NEXT: [[TMP18:%.*]] = trunc i16 [[ROW]] to i8
-; CHECK-NEXT: store i8 [[TMP18]], ptr [[AMX_TMM_0_SHAPE_ROW3]], align 1
-; CHECK-NEXT: store i16 8, ptr [[TMP17]], align 2
-; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(ptr [[TMP4]])
-; CHECK-NEXT: [[I11:%.*]] = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 8, ptr @buf2, i64 32)
-; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 8, ptr [[I4]], i64 64, x86_amx [[I11]])
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr [[TMP3]], align 4
-; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP3]], i64 0
-; CHECK-NEXT: store i8 1, ptr [[TMP19]], align 1
-; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW4:%.*]] = getelementptr i8, ptr [[TMP3]], i64 48
-; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP3]], i64 16
-; CHECK-NEXT: store i8 8, ptr [[AMX_TMM_0_SHAPE_ROW4]], align 1
-; CHECK-NEXT: store i16 [[COL]], ptr [[TMP20]], align 2
-; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(ptr [[TMP3]])
-; CHECK-NEXT: [[I12:%.*]] = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 [[COL]], ptr @buf2, i64 32)
-; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 8, i16 [[COL]], ptr [[I2]], i64 64, x86_amx [[I12]])
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr [[TMP2]], align 4
-; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP2]], i64 0
-; CHECK-NEXT: store i8 1, ptr [[TMP21]], align 1
-; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 48
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[TMP2]], i64 16
-; CHECK-NEXT: [[TMP23:%.*]] = trunc i16 [[ROW]] to i8
-; CHECK-NEXT: store i8 [[TMP23]], ptr [[AMX_TMM_0_SHAPE_ROW5]], align 1
-; CHECK-NEXT: store i16 [[COL]], ptr [[TMP22]], align 2
-; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(ptr [[TMP2]])
-; CHECK-NEXT: [[I13:%.*]] = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 [[COL]], ptr @buf2, i64 32)
-; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 [[COL]], ptr [[I]], i64 64, x86_amx [[I13]])
-; CHECK-NEXT: br label [[IF_END]]
-; CHECK: if.end:
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr [[TMP1]], align 4
-; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP1]], i64 0
-; CHECK-NEXT: store i8 1, ptr [[TMP24]], align 1
-; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW6:%.*]] = getelementptr i8, ptr [[TMP1]], i64 48
-; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16
-; CHECK-NEXT: [[TMP26:%.*]] = trunc i16 [[ROW]] to i8
-; CHECK-NEXT: store i8 [[TMP26]], ptr [[AMX_TMM_0_SHAPE_ROW6]], align 1
-; CHECK-NEXT: store i16 [[COL]], ptr [[TMP25]], align 2
-; CHECK-NEXT: [[AMX_TMM_1_SHAPE_ROW:%.*]] = getelementptr i8, ptr [[TMP1]], i64 49
-; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP1]], i64 18
-; CHECK-NEXT: [[TMP28:%.*]] = trunc i16 [[ROW]] to i8
-; CHECK-NEXT: store i8 [[TMP28]], ptr [[AMX_TMM_1_SHAPE_ROW]], align 1
-; CHECK-NEXT: store i16 8, ptr [[TMP27]], align 2
-; CHECK-NEXT: [[AMX_TMM_2_SHAPE_ROW:%.*]] = getelementptr i8, ptr [[TMP1]], i64 50
-; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[TMP1]], i64 20
-; CHECK-NEXT: store i8 8, ptr [[AMX_TMM_2_SHAPE_ROW]], align 1
-; CHECK-NEXT: store i16 [[COL]], ptr [[TMP29]], align 2
-; CHECK-NEXT: [[AMX_TMM_3_SHAPE_ROW:%.*]] = getelementptr i8, ptr [[TMP1]], i64 51
-; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[TMP1]], i64 22
-; CHECK-NEXT: [[TMP31:%.*]] = trunc i16 [[ROW]] to i8
-; CHECK-NEXT: store i8 [[TMP31]], ptr [[AMX_TMM_3_SHAPE_ROW]], align 1
-; CHECK-NEXT: store i16 [[COL]], ptr [[TMP30]], align 2
-; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(ptr [[TMP1]])
-; CHECK-NEXT: [[I14:%.*]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 8, ptr [[I4]], i64 64)
-; CHECK-NEXT: [[I15:%.*]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 [[COL]], ptr [[I2]], i64 64)
-; CHECK-NEXT: [[I16:%.*]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 [[COL]], ptr [[I]], i64 64)
-; CHECK-NEXT: [[I17:%.*]] = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 [[ROW]], i16 [[COL]], i16 8, x86_amx [[I16]], x86_amx [[I14]], x86_amx [[I15]])
-; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 [[COL]], ptr [[I6]], i64 64, x86_amx [[I17]])
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr [[TMP0]], align 4
-; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[TMP0]], i64 0
-; CHECK-NEXT: store i8 1, ptr [[TMP32]], align 1
-; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW7:%.*]] = getelementptr i8, ptr [[TMP0]], i64 48
-; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16
-; CHECK-NEXT: [[TMP34:%.*]] = trunc i16 [[ROW]] to i8
-; CHECK-NEXT: store i8 [[TMP34]], ptr [[AMX_TMM_0_SHAPE_ROW7]], align 1
-; CHECK-NEXT: store i16 [[COL]], ptr [[TMP33]], align 2
-; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(ptr [[TMP0]])
-; CHECK-NEXT: [[I18:%.*]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 [[COL]], ptr [[I6]], i64 64)
-; CHECK-NEXT: tail call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 [[COL]], ptr @buf, i64 32, x86_amx [[I18]])
-; CHECK-NEXT: ret void
-;
-entry:
- %i = alloca <256 x i32>, align 1024
- %i2 = alloca <256 x i32>, align 1024
- %i4 = alloca <256 x i32>, align 1024
- %i6 = alloca <256 x i32>, align 1024
- %tobool.not = icmp eq i32 %cond, 0
- br i1 %tobool.not, label %if.else, label %if.then
-
-if.then: ; preds = %entry
- %i8 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, ptr @buf, i64 32)
- call void @llvm.x86.tilestored64.internal(i16 %row, i16 8, ptr %i4, i64 64, x86_amx %i8)
- %i9 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, ptr @buf, i64 32)
- call void @llvm.x86.tilestored64.internal(i16 8, i16 %col, ptr %i2, i64 64, x86_amx %i9)
- %i10 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, ptr @buf, i64 32)
- call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, ptr %i, i64 64, x86_amx %i10)
- br label %if.end
-
-if.else: ; preds = %entry
- %i11 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, ptr @buf2, i64 32)
- call void @llvm.x86.tilestored64.internal(i16 %row, i16 8, ptr %i4, i64 64, x86_amx %i11)
- %i12 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, ptr @buf2, i64 32)
- call void @llvm.x86.tilestored64.internal(i16 8, i16 %col, ptr %i2, i64 64, x86_amx %i12)
- %i13 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, ptr @buf2, i64 32)
- call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, ptr %i, i64 64, x86_amx %i13)
- br label %if.end
-
-if.end: ; preds = %if.else, %if.then
- %i14 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, ptr %i4, i64 64)
- %i15 = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, ptr %i2, i64 64)
- %i16 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, ptr %i, i64 64)
- %i17 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 %row, i16 %col, i16 8, x86_amx %i16, x86_amx %i14, x86_amx %i15)
- call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, ptr %i6, i64 64, x86_amx %i17)
- %i18 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, ptr %i6, i64 64)
- tail call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, ptr @buf, i64 32, x86_amx %i18)
- ret void
-}
-
-; Function Attrs: nounwind
-declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, ptr, i64)
-
-; Function Attrs: nounwind
-declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
-
-; Function Attrs: nounwind
-declare void @llvm.x86.tilestored64.internal(i16, i16, ptr, i64, x86_amx)
diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp
index ed9c10d971218f1..52c8c17ea4b46de 100644
--- a/llvm/tools/opt/opt.cpp
+++ b/llvm/tools/opt/opt.cpp
@@ -351,7 +351,6 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) {
"expandmemcmp",
"loop-reduce",
"lower-amx-type",
- "pre-amx-config",
"lower-amx-intrinsics",
"polyhedral-info",
"print-polyhedral-info",
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn
index 921cac1a6fb49f1..e8aa57fc9bc1d44 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn
@@ -122,7 +122,6 @@ static_library("LLVMX86CodeGen") {
"X86OptimizeLEAs.cpp",
"X86PadShortFunction.cpp",
"X86PartialReduction.cpp",
- "X86PreAMXConfig.cpp",
"X86PreTileConfig.cpp",
"X86RegisterInfo.cpp",
"X86ReturnThunks.cpp",
More information about the llvm-commits
mailing list