[llvm] c4dba47 - [X86][AMX] Don't emit tilerelease for old AMX instrisic.

via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 17 17:28:43 PST 2021


Author: Luo, Yuanke
Date: 2021-11-18T09:28:32+08:00
New Revision: c4dba47196c280dcf3763ccb6133f3dec5285e78

URL: https://github.com/llvm/llvm-project/commit/c4dba47196c280dcf3763ccb6133f3dec5285e78
DIFF: https://github.com/llvm/llvm-project/commit/c4dba47196c280dcf3763ccb6133f3dec5285e78.diff

LOG: [X86][AMX] Don't emit tilerelease for old AMX instrisic.

We should avoid mixing old AMX instrinsic with new AMX intrinsic. For
old AMX intrinsic, user is responsible for invoking tile release. This
patch is to check if there is any tile config generated by compiler. If
so it emit tilerelease instruction, otherwise it don't emit the
instruction.

Differential Revision: https://reviews.llvm.org/D114066

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86FastTileConfig.cpp
    llvm/lib/Target/X86/X86FrameLowering.cpp
    llvm/lib/Target/X86/X86MachineFunctionInfo.h
    llvm/lib/Target/X86/X86PreTileConfig.cpp
    llvm/test/CodeGen/X86/AMX/amx-bf16-intrinsics.ll
    llvm/test/CodeGen/X86/AMX/amx-int8-intrinsics.ll
    llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86FastTileConfig.cpp b/llvm/lib/Target/X86/X86FastTileConfig.cpp
index 7031bd40215d8..87c04a07cd13e 100644
--- a/llvm/lib/Target/X86/X86FastTileConfig.cpp
+++ b/llvm/lib/Target/X86/X86FastTileConfig.cpp
@@ -44,6 +44,7 @@ class X86FastTileConfig : public MachineFunctionPass {
   const TargetRegisterInfo *TRI = nullptr;
   const TargetInstrInfo *TII = nullptr;
   MachineRegisterInfo *MRI = nullptr;
+  X86MachineFunctionInfo *X86FI = nullptr;
 
   MachineInstr *getTileConfigPoint();
   void tileConfig();
@@ -289,6 +290,8 @@ bool X86FastTileConfig::fastTileConfig() {
     if (!CFGs.empty())
       Changed = true;
   }
+  if (Changed)
+    X86FI->setHasVirtualTileReg(true);
   return Changed;
 }
 
@@ -298,6 +301,7 @@ bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
   ST = &MFunc.getSubtarget<X86Subtarget>();
   TRI = ST->getRegisterInfo();
   TII = MFunc.getSubtarget().getInstrInfo();
+  X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
 
   return fastTileConfig();
 }

diff  --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index d84cbac9dc4b0..bd780273509f8 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -2219,13 +2219,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
   }
 
   // Emit tilerelease for AMX kernel.
-  const MachineRegisterInfo &MRI = MF.getRegInfo();
-  const TargetRegisterClass *RC = TRI->getRegClass(X86::TILERegClassID);
-  for (unsigned I = 0; I < RC->getNumRegs(); I++)
-    if (!MRI.reg_nodbg_empty(X86::TMM0 + I)) {
-      BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
-      break;
-    }
+  if (X86FI->hasVirtualTileReg())
+    BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
 }
 
 StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF,

diff  --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
index 7f3c55f317c76..99d1a97380dd9 100644
--- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -113,6 +113,10 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
   /// other tools to detect the extended record.
   bool HasSwiftAsyncContext = false;
 
+  /// True if this function has tile virtual register. This is used to
+  /// determine if we should insert tilerelease in frame lowering.
+  bool HasVirtualTileReg = false;
+
   Optional<int> SwiftAsyncContextFrameIdx;
 
   ValueMap<const Value *, size_t> PreallocatedIds;
@@ -207,6 +211,9 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
   bool hasSwiftAsyncContext() const { return HasSwiftAsyncContext; }
   void setHasSwiftAsyncContext(bool v) { HasSwiftAsyncContext = v; }
 
+  bool hasVirtualTileReg() const { return HasVirtualTileReg; }
+  void setHasVirtualTileReg(bool v) { HasVirtualTileReg = v; }
+
   Optional<int> getSwiftAsyncContextFrameIdx() const {
     return SwiftAsyncContextFrameIdx;
   }

diff  --git a/llvm/lib/Target/X86/X86PreTileConfig.cpp b/llvm/lib/Target/X86/X86PreTileConfig.cpp
index 53aa8f99ff1e0..5d21f8666ec6f 100644
--- a/llvm/lib/Target/X86/X86PreTileConfig.cpp
+++ b/llvm/lib/Target/X86/X86PreTileConfig.cpp
@@ -25,6 +25,7 @@
 
 #include "X86.h"
 #include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
 #include "X86RegisterInfo.h"
 #include "X86Subtarget.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -235,6 +236,7 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
   const TargetInstrInfo *TII = ST.getInstrInfo();
   const TargetRegisterInfo *TRI = ST.getRegisterInfo();
   const TargetRegisterClass *RC = TRI->getRegClass(X86::TILERegClassID);
+  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
 
   BitVector AMXRegs(TRI->getNumRegs());
   for (unsigned I = 0; I < RC->getNumRegs(); I++)
@@ -294,6 +296,7 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
   // There's no AMX instruction if we didn't find a tile config live in point.
   if (CfgNeedInsert.empty())
     return false;
+  X86FI->setHasVirtualTileReg(true);
 
   // Avoid to insert ldtilecfg before any shape defs.
   SmallVector<MachineBasicBlock *, 8> WorkList;

diff  --git a/llvm/test/CodeGen/X86/AMX/amx-bf16-intrinsics.ll b/llvm/test/CodeGen/X86/AMX/amx-bf16-intrinsics.ll
index a59b05669e616..a415d9c152422 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-bf16-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-bf16-intrinsics.ll
@@ -5,7 +5,6 @@ define void @test_amx() {
 ; CHECK-LABEL: test_amx:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    tdpbf16ps %tmm7, %tmm4, %tmm3
-; CHECK-NEXT:    tilerelease
 ; CHECK-NEXT:    retq
   call void @llvm.x86.tdpbf16ps(i8 3, i8 4, i8 7)
   ret void

diff  --git a/llvm/test/CodeGen/X86/AMX/amx-int8-intrinsics.ll b/llvm/test/CodeGen/X86/AMX/amx-int8-intrinsics.ll
index fa811f02bd9af..2bbf4d9edb91a 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-int8-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-int8-intrinsics.ll
@@ -8,7 +8,6 @@ define void @test_amx() {
 ; CHECK-NEXT:    tdpbsud %tmm7, %tmm4, %tmm3
 ; CHECK-NEXT:    tdpbusd %tmm7, %tmm0, %tmm3
 ; CHECK-NEXT:    tdpbuud %tmm1, %tmm4, %tmm3
-; CHECK-NEXT:    tilerelease
 ; CHECK-NEXT:    retq
   call void @llvm.x86.tdpbssd(i8 3, i8 4, i8 7)
 

diff  --git a/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll b/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll
index 9792f8b19af45..4d469c23328e4 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll
@@ -11,7 +11,6 @@ define void @test_amx(i8* %pointer, i8* %base, i64 %stride) {
 ; CHECK-NEXT:    tileloadd (%rsi,%rdx), %tmm3
 ; CHECK-NEXT:    tileloaddt1 (%rsi,%rdx), %tmm3
 ; CHECK-NEXT:    tilestored %tmm3, (%rsi,%rdx)
-; CHECK-NEXT:    tilerelease
 ; CHECK-NEXT:    retq
   call void @llvm.x86.ldtilecfg(i8* %pointer)
 


        


More information about the llvm-commits mailing list