[PATCH] D91927: [X86] Add x86_amx type for intel AMX.
LuoYuanke via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Tue Nov 24 21:39:03 PST 2020
LuoYuanke marked an inline comment as done.
LuoYuanke added inline comments.
================
Comment at: llvm/lib/IR/DataLayout.cpp:819
+ case Type::X86_AMXTyID:
+ return Align(64);
default:
----------------
pengfei wrote:
> Should be 512 bits?
Yes. It is 512. Thanks.
================
Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:72
LLVMContext &Ctx = Builder.getContext();
- Type *Ty = LD->getType();
- EVT VT = EVT::getEVT(Ty);
- EVT HalfVT = VT.getHalfNumVectorElementsVT(Ctx);
- Type *HalfTy = HalfVT.getTypeForEVT(Ctx);
-
- Value *Ptr = LD->getPointerOperand();
- PointerType *HalfPtrTy = HalfTy->getPointerTo(LD->getPointerAddressSpace());
- Value *HalfPtr = Builder.CreateBitCast(Ptr, HalfPtrTy);
- // The HW require the alignment for AMX tile is 64, but front-end generate
- // code for the vector alignment which is the vector size.
- uint64_t HalfTySize = HalfTy->getPrimitiveSizeInBits().getFixedSize() / 8;
- Align Alignment = std::min(LD->getAlign(), Align(HalfTySize));
- auto *Lo =
- Builder.CreateAlignedLoad(HalfTy, HalfPtr, Alignment, LD->isVolatile());
-
- HalfPtr = Builder.CreateGEP(HalfTy, HalfPtr, Builder.getInt32(1));
- auto *Hi =
- Builder.CreateAlignedLoad(HalfTy, HalfPtr, Alignment, LD->isVolatile());
-
- LoadMap[Inst] = std::make_pair(Lo, Hi);
-}
-
-bool X86LowerAMXType::visitLD() {
- if (LDSet.empty())
- return false;
- for (auto &Inst : LDSet) {
- int Count = 0;
- Value *NewInst = nullptr;
- // The user should be all AMX intrinsics or all LLVM instruction.
- // Don't support it is used by both AMX intrinsics and LLVM instructions.
- for (auto I = Inst->use_begin(), E = Inst->use_end(); I != E;) {
- Use &U = *I++;
- const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U.getUser());
- if (!II) {
- Count++;
- continue;
- }
- if (NewInst)
- continue;
- Value *Row, *Col;
- switch (II->getIntrinsicID()) {
- default:
- report_fatal_error("Non-AMX intrinsic use tile type.");
- break;
- case Intrinsic::x86_tdpbssd_internal: {
- unsigned OpNo = U.getOperandNo();
- switch (OpNo) {
- case 3:
- Row = II->getArgOperand(0);
- Col = II->getArgOperand(1);
- break;
- case 4:
- Row = II->getArgOperand(0);
- Col = II->getArgOperand(2);
- break;
- case 5:
- Row = II->getArgOperand(2);
- Col = II->getArgOperand(1);
- break;
- }
- break;
- }
- case Intrinsic::x86_tilestored64_internal: {
- Row = II->getArgOperand(0);
- Col = II->getArgOperand(1);
- break;
- }
- }
- assert(Count == 0 && "Can NOT mix amx intrinsic and LLVM instruction");
- // FIXME: The shape def should be ahead of load.
- IRBuilder<> Builder(Inst);
- LLVMContext &Ctx = Builder.getContext();
- // Use the maximun column as stride.
- Value *Stride = Builder.getInt64(64);
- Value *I8Ptr =
- Builder.CreateBitCast(Inst->getOperand(0), Type::getInt8PtrTy(Ctx));
- std::array<Value *, 4> Args = {Row, Col, I8Ptr, Stride};
-
- NewInst = Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal,
- None, Args);
-
- Inst->replaceAllUsesWith(NewInst);
- }
- if (!NewInst)
- splitLD(Inst);
+ AllocaInst *AllocaAddr = CreateAllocaInst(Builder, Bitcast->getParent());
+ Value *I8Ptr =
----------------
craig.topper wrote:
> Shouldn't this be in the function's entry block?
Yes. It is in function's entry block. It is done in line 48 of function CreateAllocaInst(). CreateAllocaInst() is actually copied from your code. :)
================
Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:79
+ // -->
+ // %addr = alloca <256 x i32>, align 1024
+ // store <256 x i32> %src, <256 x i32>* %addr, align 1024
----------------
pengfei wrote:
> Why the alignment not be 64?
1024 is conservatives, because vector require the alignment to be the vector size. Here generate vector <256 x i32> load/store.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D91927/new/
https://reviews.llvm.org/D91927
More information about the cfe-commits
mailing list