[llvm-branch-commits] [llvm] 08665b1 - Support tilezero intrinsic and c interface for AMX.

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Dec 30 21:40:02 PST 2020


Author: Luo, Yuanke
Date: 2020-12-31T13:24:57+08:00
New Revision: 08665b180568c82a1b2b8bd38a1e5769a862c2a9

URL: https://github.com/llvm/llvm-project/commit/08665b180568c82a1b2b8bd38a1e5769a862c2a9
DIFF: https://github.com/llvm/llvm-project/commit/08665b180568c82a1b2b8bd38a1e5769a862c2a9.diff

LOG: Support tilezero intrinsic and c interface for AMX.

Differential Revision: https://reviews.llvm.org/D92837

Added: 
    llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll

Modified: 
    clang/include/clang/Basic/BuiltinsX86_64.def
    clang/lib/Headers/amxintrin.h
    clang/test/CodeGen/X86/amx_api.c
    llvm/include/llvm/IR/IntrinsicsX86.td
    llvm/lib/Target/X86/X86ExpandPseudo.cpp
    llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/lib/Target/X86/X86InstrAMX.td
    llvm/lib/Target/X86/X86PreTileConfig.cpp
    llvm/lib/Target/X86/X86RegisterInfo.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def
index 98327ade17e8..974ba35b3233 100644
--- a/clang/include/clang/Basic/BuiltinsX86_64.def
+++ b/clang/include/clang/Basic/BuiltinsX86_64.def
@@ -104,6 +104,7 @@ TARGET_BUILTIN(__builtin_ia32_senduipi, "vUWi", "n", "uintr")
 TARGET_BUILTIN(__builtin_ia32_tileloadd64_internal, "V256iUsUsvC*z", "n", "amx-tile")
 TARGET_BUILTIN(__builtin_ia32_tdpbssd_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-int8")
 TARGET_BUILTIN(__builtin_ia32_tilestored64_internal, "vUsUsv*zV256i", "n", "amx-tile")
+TARGET_BUILTIN(__builtin_ia32_tilezero_internal, "V256iUsUs", "n", "amx-tile")
 // AMX
 TARGET_BUILTIN(__builtin_ia32_tile_loadconfig, "vvC*", "n", "amx-tile")
 TARGET_BUILTIN(__builtin_ia32_tile_storeconfig, "vvC*", "n", "amx-tile")

diff  --git a/clang/lib/Headers/amxintrin.h b/clang/lib/Headers/amxintrin.h
index 03a468ef15b1..901488a17e8c 100644
--- a/clang/lib/Headers/amxintrin.h
+++ b/clang/lib/Headers/amxintrin.h
@@ -251,7 +251,7 @@ typedef struct __tile1024i_str {
   _tile1024i tile;
 } __tile1024i;
 
-__DEFAULT_FN_ATTRS_INT8
+__DEFAULT_FN_ATTRS_TILE
 static void __tile_loadd(__tile1024i *dst, const void *base,
                          __SIZE_TYPE__ stride) {
   dst->tile = _tile_loadd_internal(dst->row, dst->col, base, stride);
@@ -264,10 +264,15 @@ static void __tile_dpbsud(__tile1024i *dst, __tile1024i src1,
                                     src1.tile, src2.tile);
 }
 
-__DEFAULT_FN_ATTRS_INT8
+__DEFAULT_FN_ATTRS_TILE
 static void __tile_stored(void *base, __SIZE_TYPE__ stride, __tile1024i src) {
   _tile_stored_internal(src.row, src.col, base, stride, src.tile);
 }
 
+__DEFAULT_FN_ATTRS_TILE
+static void __tile_zero(__tile1024i *dst) {
+  dst->tile = __builtin_ia32_tilezero_internal(dst->row, dst->col);
+}
+
 #endif /* __x86_64__ */
 #endif /* __AMXINTRIN_H */

diff  --git a/clang/test/CodeGen/X86/amx_api.c b/clang/test/CodeGen/X86/amx_api.c
index 52eb9542228d..55290f3fa6fb 100644
--- a/clang/test/CodeGen/X86/amx_api.c
+++ b/clang/test/CodeGen/X86/amx_api.c
@@ -52,3 +52,10 @@ void test_tile_stored(__tile1024i c) {
   //CHECK-NEXT: call void @llvm.x86.tilestored64.internal
   __tile_stored(buf, STRIDE, c);
 }
+
+void test_tile_zero(__tile1024i c) {
+  //CHECK-LABEL: @test_tile_zero
+  //CHECK: call x86_amx @llvm.x86.tilezero.internal
+  //CHECK-NEXT bitcast x86_amx {{%.*}} to <256 x i32>
+  __tile_zero(&c);
+}

diff  --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index dba44523e153..68b076c594be 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -5057,6 +5057,10 @@ let TargetPrefix = "x86" in {
               GCCBuiltin<"__builtin_ia32_tilestored64_internal">,
               Intrinsic<[], [llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty,
                              llvm_i64_ty, llvm_x86amx_ty], []>;
+  def int_x86_tilezero_internal :
+              GCCBuiltin<"__builtin_ia32_tilezero_internal">,
+              Intrinsic<[llvm_x86amx_ty], [llvm_i16_ty, llvm_i16_ty],
+                        []>;
 }
 
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index a2fe09aecc49..15af0fb2e888 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -494,6 +494,12 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     MI.setDesc(TII->get(X86::TILESTORED));
     return true;
   }
+  case X86::PTILEZEROV: {
+    for (int i = 3; i > 0; --i) // Remove row, col, $tmmcfg
+      MI.RemoveOperand(i);
+    MI.setDesc(TII->get(X86::TILEZERO));
+    return true;
+  }
   }
   llvm_unreachable("Previous switch has a fallthrough?");
 }

diff  --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 883b6bfc145d..a96f73df855d 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -4641,6 +4641,18 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
       ReplaceNode(Node, CNode);
       return;
     }
+    case Intrinsic::x86_tilezero_internal: {
+      if (!Subtarget->hasAMXTILE())
+        break;
+      unsigned Opc = X86::PTILEZEROV;
+      SDValue Chain = Node->getOperand(0);
+      SDValue CFG = CurDAG->getRegister(0, MVT::Untyped);
+      SDValue Ops[] = {Node->getOperand(2), Node->getOperand(3), CFG, Chain};
+      MachineSDNode *CNode =
+          CurDAG->getMachineNode(Opc, dl, {MVT::x86amx, MVT::Other}, Ops);
+      ReplaceNode(Node, CNode);
+      return;
+    }
     }
     break;
   }

diff  --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td
index e7346261b40c..e4f3290cab9f 100644
--- a/llvm/lib/Target/X86/X86InstrAMX.td
+++ b/llvm/lib/Target/X86/X86InstrAMX.td
@@ -62,6 +62,9 @@ let Predicates = [HasAMXTILE, In64BitMode] in {
     def PTILESTOREDV : PseudoI<(outs), (ins GR16:$src1,
                                             GR16:$src2, opaquemem:$src3,
                                             TILE:$src4, TILECFG:$cfg), []>;
+    def PTILEZEROV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
+                                                     GR16:$src2,
+                                                     TILECFG:$cfg), []>;
 
     let usesCustomInserter = 1 in {
       // Pseudo instructions, using immediates instead of tile registers.

diff  --git a/llvm/lib/Target/X86/X86PreTileConfig.cpp b/llvm/lib/Target/X86/X86PreTileConfig.cpp
index 50719744f238..05ee6c6c8384 100644
--- a/llvm/lib/Target/X86/X86PreTileConfig.cpp
+++ b/llvm/lib/Target/X86/X86PreTileConfig.cpp
@@ -132,6 +132,7 @@ static ShapeT getShape(const MachineInstr &MI, MachineRegisterInfo *MRI) {
     llvm_unreachable("Unexpected machine instruction on tile");
   case X86::PTILELOADDV:
   case X86::PTDPBSSDV:
+  case X86::PTILEZEROV:
     MachineOperand &MO1 = const_cast<MachineOperand &>(MI.getOperand(1));
     MachineOperand &MO2 = const_cast<MachineOperand &>(MI.getOperand(2));
     ShapeT Shape(&MO1, &MO2, MRI);
@@ -230,6 +231,7 @@ static void addTileCFGUse(MachineFunction &MF, Register CFG) {
       case X86::PTILELOADDV:
       case X86::PTILESTOREDV:
       case X86::PTDPBSSDV:
+      case X86::PTILEZEROV:
         unsigned NumOperands = MI.getNumOperands();
         MI.RemoveOperand(NumOperands - 1);
         MI.addOperand(MF, MachineOperand::CreateReg(CFG, false));

diff  --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 81571decae2d..d90b4e7bdc7e 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -873,6 +873,7 @@ static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM,
   // We only collect the tile shape that is defined.
   case X86::PTILELOADDV:
   case X86::PTDPBSSDV:
+  case X86::PTILEZEROV:
     MachineOperand &MO1 = MI->getOperand(1);
     MachineOperand &MO2 = MI->getOperand(2);
     ShapeT Shape(&MO1, &MO2, MRI);

diff  --git a/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll b/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
new file mode 100644
index 000000000000..501bde029dc1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile -mattr=+avx512f -verify-machineinstrs | FileCheck %s
+
+define void @test_amx(i8* %pointer, i8* %base, i64 %stride) {
+; CHECK-LABEL: test_amx:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpxord %zmm0, %zmm0, %zmm0
+; CHECK-NEXT:    vmovdqu64 %zmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movb $1, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movb $8, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movw $8, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movb $8, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movw $8, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movb $8, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movw $8, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    ldtilecfg -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movw $8, %ax
+; CHECK-NEXT:    tilezero %tmm0
+; CHECK-NEXT:    tileloadd (%rsi,%rdx), %tmm1
+; CHECK-NEXT:    tileloadd (%rsi,%rdx), %tmm2
+; CHECK-NEXT:    tdpbssd %tmm2, %tmm1, %tmm0
+; CHECK-NEXT:    tilestored %tmm0, (%rdi,%rdx)
+; CHECK-NEXT:    tilerelease
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %c = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 8)
+  %a = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, i8* %base, i64 %stride)
+  %b = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, i8* %base, i64 %stride)
+  %d = call x86_amx @llvm.x86.tdpbssd.internal(i16 8, i16 8, i16 8, x86_amx %c, x86_amx %a, x86_amx %b)
+  call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %d)
+
+  ret void
+}
+
+declare x86_amx @llvm.x86.tilezero.internal(i16, i16)
+declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, i8*, i64)
+declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
+declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx)


        


More information about the llvm-branch-commits mailing list