[llvm-branch-commits] [llvm] 08665b1 - Support tilezero intrinsic and c interface for AMX.
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Dec 30 21:40:02 PST 2020
Author: Luo, Yuanke
Date: 2020-12-31T13:24:57+08:00
New Revision: 08665b180568c82a1b2b8bd38a1e5769a862c2a9
URL: https://github.com/llvm/llvm-project/commit/08665b180568c82a1b2b8bd38a1e5769a862c2a9
DIFF: https://github.com/llvm/llvm-project/commit/08665b180568c82a1b2b8bd38a1e5769a862c2a9.diff
LOG: Support tilezero intrinsic and c interface for AMX.
Differential Revision: https://reviews.llvm.org/D92837
Added:
llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
Modified:
clang/include/clang/Basic/BuiltinsX86_64.def
clang/lib/Headers/amxintrin.h
clang/test/CodeGen/X86/amx_api.c
llvm/include/llvm/IR/IntrinsicsX86.td
llvm/lib/Target/X86/X86ExpandPseudo.cpp
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/lib/Target/X86/X86InstrAMX.td
llvm/lib/Target/X86/X86PreTileConfig.cpp
llvm/lib/Target/X86/X86RegisterInfo.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def
index 98327ade17e8..974ba35b3233 100644
--- a/clang/include/clang/Basic/BuiltinsX86_64.def
+++ b/clang/include/clang/Basic/BuiltinsX86_64.def
@@ -104,6 +104,7 @@ TARGET_BUILTIN(__builtin_ia32_senduipi, "vUWi", "n", "uintr")
TARGET_BUILTIN(__builtin_ia32_tileloadd64_internal, "V256iUsUsvC*z", "n", "amx-tile")
TARGET_BUILTIN(__builtin_ia32_tdpbssd_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-int8")
TARGET_BUILTIN(__builtin_ia32_tilestored64_internal, "vUsUsv*zV256i", "n", "amx-tile")
+TARGET_BUILTIN(__builtin_ia32_tilezero_internal, "V256iUsUs", "n", "amx-tile")
// AMX
TARGET_BUILTIN(__builtin_ia32_tile_loadconfig, "vvC*", "n", "amx-tile")
TARGET_BUILTIN(__builtin_ia32_tile_storeconfig, "vvC*", "n", "amx-tile")
diff --git a/clang/lib/Headers/amxintrin.h b/clang/lib/Headers/amxintrin.h
index 03a468ef15b1..901488a17e8c 100644
--- a/clang/lib/Headers/amxintrin.h
+++ b/clang/lib/Headers/amxintrin.h
@@ -251,7 +251,7 @@ typedef struct __tile1024i_str {
_tile1024i tile;
} __tile1024i;
-__DEFAULT_FN_ATTRS_INT8
+__DEFAULT_FN_ATTRS_TILE
static void __tile_loadd(__tile1024i *dst, const void *base,
__SIZE_TYPE__ stride) {
dst->tile = _tile_loadd_internal(dst->row, dst->col, base, stride);
@@ -264,10 +264,15 @@ static void __tile_dpbsud(__tile1024i *dst, __tile1024i src1,
src1.tile, src2.tile);
}
-__DEFAULT_FN_ATTRS_INT8
+__DEFAULT_FN_ATTRS_TILE
static void __tile_stored(void *base, __SIZE_TYPE__ stride, __tile1024i src) {
_tile_stored_internal(src.row, src.col, base, stride, src.tile);
}
+__DEFAULT_FN_ATTRS_TILE
+static void __tile_zero(__tile1024i *dst) {
+ dst->tile = __builtin_ia32_tilezero_internal(dst->row, dst->col);
+}
+
#endif /* __x86_64__ */
#endif /* __AMXINTRIN_H */
diff --git a/clang/test/CodeGen/X86/amx_api.c b/clang/test/CodeGen/X86/amx_api.c
index 52eb9542228d..55290f3fa6fb 100644
--- a/clang/test/CodeGen/X86/amx_api.c
+++ b/clang/test/CodeGen/X86/amx_api.c
@@ -52,3 +52,10 @@ void test_tile_stored(__tile1024i c) {
//CHECK-NEXT: call void @llvm.x86.tilestored64.internal
__tile_stored(buf, STRIDE, c);
}
+
+void test_tile_zero(__tile1024i c) {
+ //CHECK-LABEL: @test_tile_zero
+ //CHECK: call x86_amx @llvm.x86.tilezero.internal
+ //CHECK-NEXT bitcast x86_amx {{%.*}} to <256 x i32>
+ __tile_zero(&c);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index dba44523e153..68b076c594be 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -5057,6 +5057,10 @@ let TargetPrefix = "x86" in {
GCCBuiltin<"__builtin_ia32_tilestored64_internal">,
Intrinsic<[], [llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty,
llvm_i64_ty, llvm_x86amx_ty], []>;
+ def int_x86_tilezero_internal :
+ GCCBuiltin<"__builtin_ia32_tilezero_internal">,
+ Intrinsic<[llvm_x86amx_ty], [llvm_i16_ty, llvm_i16_ty],
+ []>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index a2fe09aecc49..15af0fb2e888 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -494,6 +494,12 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MI.setDesc(TII->get(X86::TILESTORED));
return true;
}
+ case X86::PTILEZEROV: {
+ for (int i = 3; i > 0; --i) // Remove row, col, $tmmcfg
+ MI.RemoveOperand(i);
+ MI.setDesc(TII->get(X86::TILEZERO));
+ return true;
+ }
}
llvm_unreachable("Previous switch has a fallthrough?");
}
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 883b6bfc145d..a96f73df855d 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -4641,6 +4641,18 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, CNode);
return;
}
+ case Intrinsic::x86_tilezero_internal: {
+ if (!Subtarget->hasAMXTILE())
+ break;
+ unsigned Opc = X86::PTILEZEROV;
+ SDValue Chain = Node->getOperand(0);
+ SDValue CFG = CurDAG->getRegister(0, MVT::Untyped);
+ SDValue Ops[] = {Node->getOperand(2), Node->getOperand(3), CFG, Chain};
+ MachineSDNode *CNode =
+ CurDAG->getMachineNode(Opc, dl, {MVT::x86amx, MVT::Other}, Ops);
+ ReplaceNode(Node, CNode);
+ return;
+ }
}
break;
}
diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td
index e7346261b40c..e4f3290cab9f 100644
--- a/llvm/lib/Target/X86/X86InstrAMX.td
+++ b/llvm/lib/Target/X86/X86InstrAMX.td
@@ -62,6 +62,9 @@ let Predicates = [HasAMXTILE, In64BitMode] in {
def PTILESTOREDV : PseudoI<(outs), (ins GR16:$src1,
GR16:$src2, opaquemem:$src3,
TILE:$src4, TILECFG:$cfg), []>;
+ def PTILEZEROV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
+ GR16:$src2,
+ TILECFG:$cfg), []>;
let usesCustomInserter = 1 in {
// Pseudo instructions, using immediates instead of tile registers.
diff --git a/llvm/lib/Target/X86/X86PreTileConfig.cpp b/llvm/lib/Target/X86/X86PreTileConfig.cpp
index 50719744f238..05ee6c6c8384 100644
--- a/llvm/lib/Target/X86/X86PreTileConfig.cpp
+++ b/llvm/lib/Target/X86/X86PreTileConfig.cpp
@@ -132,6 +132,7 @@ static ShapeT getShape(const MachineInstr &MI, MachineRegisterInfo *MRI) {
llvm_unreachable("Unexpected machine instruction on tile");
case X86::PTILELOADDV:
case X86::PTDPBSSDV:
+ case X86::PTILEZEROV:
MachineOperand &MO1 = const_cast<MachineOperand &>(MI.getOperand(1));
MachineOperand &MO2 = const_cast<MachineOperand &>(MI.getOperand(2));
ShapeT Shape(&MO1, &MO2, MRI);
@@ -230,6 +231,7 @@ static void addTileCFGUse(MachineFunction &MF, Register CFG) {
case X86::PTILELOADDV:
case X86::PTILESTOREDV:
case X86::PTDPBSSDV:
+ case X86::PTILEZEROV:
unsigned NumOperands = MI.getNumOperands();
MI.RemoveOperand(NumOperands - 1);
MI.addOperand(MF, MachineOperand::CreateReg(CFG, false));
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 81571decae2d..d90b4e7bdc7e 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -873,6 +873,7 @@ static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM,
// We only collect the tile shape that is defined.
case X86::PTILELOADDV:
case X86::PTDPBSSDV:
+ case X86::PTILEZEROV:
MachineOperand &MO1 = MI->getOperand(1);
MachineOperand &MO2 = MI->getOperand(2);
ShapeT Shape(&MO1, &MO2, MRI);
diff --git a/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll b/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
new file mode 100644
index 000000000000..501bde029dc1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile -mattr=+avx512f -verify-machineinstrs | FileCheck %s
+
+define void @test_amx(i8* %pointer, i8* %base, i64 %stride) {
+; CHECK-LABEL: test_amx:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; CHECK-NEXT: vmovdqu64 %zmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movb $1, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movb $8, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movw $8, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movb $8, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movw $8, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movb $8, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movw $8, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: ldtilecfg -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movw $8, %ax
+; CHECK-NEXT: tilezero %tmm0
+; CHECK-NEXT: tileloadd (%rsi,%rdx), %tmm1
+; CHECK-NEXT: tileloadd (%rsi,%rdx), %tmm2
+; CHECK-NEXT: tdpbssd %tmm2, %tmm1, %tmm0
+; CHECK-NEXT: tilestored %tmm0, (%rdi,%rdx)
+; CHECK-NEXT: tilerelease
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %c = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 8)
+ %a = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, i8* %base, i64 %stride)
+ %b = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, i8* %base, i64 %stride)
+ %d = call x86_amx @llvm.x86.tdpbssd.internal(i16 8, i16 8, i16 8, x86_amx %c, x86_amx %a, x86_amx %b)
+ call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %d)
+
+ ret void
+}
+
+declare x86_amx @llvm.x86.tilezero.internal(i16, i16)
+declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, i8*, i64)
+declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
+declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx)
More information about the llvm-branch-commits
mailing list