[llvm] aaaf9ce - [X86][AMX] Replace LDTILECFG with PLDTILECFGV on auto-config.

via llvm-commits llvm-commits at lists.llvm.org
Fri May 27 01:38:48 PDT 2022


Author: Luo, Yuanke
Date: 2022-05-27T16:38:35+08:00
New Revision: aaaf9cede774a3a82770e88a8096b8c605bdb2c0

URL: https://github.com/llvm/llvm-project/commit/aaaf9cede774a3a82770e88a8096b8c605bdb2c0
DIFF: https://github.com/llvm/llvm-project/commit/aaaf9cede774a3a82770e88a8096b8c605bdb2c0.diff

LOG: [X86][AMX] Replace LDTILECFG with PLDTILECFGV on auto-config.

There is intrinsic `@llvm.x86.ldtilecfg` which is lowered to LDTILECFG.
This intrinsic is open for user to configure tile registers by
themselves. There is a chance that `@llvm.x86.ldtilecfg` would be mixed
with the new AMX intrinsics which depend on compiler to configure tile
registers. Separate pusedo instruction PLDTILECFGV would avoid
unexpected behavious when `@llvm.x86.ldtilecfg` is mixed with new AMX
intrinsics. Though user should not mix the two programming model,
compiler should avoid crash or UB when they are mixed.

Differential Revision: https://reviews.llvm.org/D126519

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86FastPreTileConfig.cpp
    llvm/lib/Target/X86/X86FastTileConfig.cpp
    llvm/lib/Target/X86/X86InstrAMX.td
    llvm/lib/Target/X86/X86InstrInfo.cpp
    llvm/lib/Target/X86/X86PreTileConfig.cpp
    llvm/lib/Target/X86/X86TileConfig.cpp
    llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir
    llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir
    llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir
    llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir
    llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir
    llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir
    llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86FastPreTileConfig.cpp b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp
index da10964d51ecb..754baf2973d5e 100644
--- a/llvm/lib/Target/X86/X86FastPreTileConfig.cpp
+++ b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp
@@ -515,7 +515,7 @@ bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
       CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(),
                                      ST->getTileConfigAlignment(), false);
     LastTileCfg = addFrameReference(
-        BuildMI(MBB, Before, DebugLoc(), TII->get(X86::LDTILECFG)), CfgSS);
+        BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS);
     LastShapeMI = nullptr;
     Change = true;
   };

diff  --git a/llvm/lib/Target/X86/X86FastTileConfig.cpp b/llvm/lib/Target/X86/X86FastTileConfig.cpp
index 2949bd048ee00..2a20cd13791de 100644
--- a/llvm/lib/Target/X86/X86FastTileConfig.cpp
+++ b/llvm/lib/Target/X86/X86FastTileConfig.cpp
@@ -110,15 +110,15 @@ bool X86FastTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
   bool Change = false;
   SmallVector<std::pair<unsigned, ShapeT>, 6> ShapeInfos;
   for (MachineInstr &MI : reverse(MBB)) {
-    if (!isTileDef(MRI, MI) && MI.getOpcode() != X86::LDTILECFG)
+    if (!isTileDef(MRI, MI) && MI.getOpcode() != X86::PLDTILECFGV)
       continue;
     // AMX instructions that define tile register.
-    if (MI.getOpcode() != X86::LDTILECFG) {
+    if (MI.getOpcode() != X86::PLDTILECFGV) {
       MachineOperand &Row = MI.getOperand(1);
       MachineOperand &Col = MI.getOperand(2);
       unsigned TMMIdx = MI.getOperand(0).getReg() - X86::TMM0;
       ShapeInfos.push_back({TMMIdx, ShapeT(&Row, &Col)});
-    } else { // LDTILECFG
+    } else { // PLDTILECFGV
       // Rewrite the shape information to memory. Stack slot should have
       // been initialized to zero in pre config.
       int SS = MI.getOperand(0).getIndex(); // tile config stack slot.

diff  --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td
index df8c096204e05..5da06bc87b060 100644
--- a/llvm/lib/Target/X86/X86InstrAMX.td
+++ b/llvm/lib/Target/X86/X86InstrAMX.td
@@ -48,9 +48,9 @@ let Predicates = [HasAMXTILE, In64BitMode] in {
                      VEX, T8XD;
 
     // Pseduo instruction for RA.
-    let isPseudo = true, mayLoad = 1 in
-    def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src),
-                              [(int_x86_ldtilecfg_internal addr:$src)]>;
+    let isPseudo = true, mayLoad = 1, hasSideEffects = 1,
+        Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
+    def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src), []>;
     let isPseudo = true, mayLoad = 1 in
     def PTILELOADDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
                                                      GR16:$src2,

diff  --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 831142a66d6bb..6763fe57aeef1 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -7360,7 +7360,7 @@ bool X86InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
   // ENDBR instructions should not be scheduled around.
   unsigned Opcode = MI.getOpcode();
   if (Opcode == X86::ENDBR64 || Opcode == X86::ENDBR32 ||
-      Opcode == X86::LDTILECFG)
+      Opcode == X86::PLDTILECFGV)
     return true;
 
   return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF);

diff  --git a/llvm/lib/Target/X86/X86PreTileConfig.cpp b/llvm/lib/Target/X86/X86PreTileConfig.cpp
index ed0f5a5a36624..479db8585ca03 100644
--- a/llvm/lib/Target/X86/X86PreTileConfig.cpp
+++ b/llvm/lib/Target/X86/X86PreTileConfig.cpp
@@ -369,7 +369,7 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
       // multi insert.
       if (VisitedOrInserted.insert(I).second) {
         auto II = I.MI ? I.MI->getIterator() : I.MBB->instr_begin();
-        addFrameReference(BuildMI(*I.MBB, ++II, DL, TII->get(X86::LDTILECFG)),
+        addFrameReference(BuildMI(*I.MBB, ++II, DL, TII->get(X86::PLDTILECFGV)),
                           SS);
       }
     }

diff  --git a/llvm/lib/Target/X86/X86TileConfig.cpp b/llvm/lib/Target/X86/X86TileConfig.cpp
index 490b1ad12b267..c126aa33f9605 100644
--- a/llvm/lib/Target/X86/X86TileConfig.cpp
+++ b/llvm/lib/Target/X86/X86TileConfig.cpp
@@ -90,7 +90,7 @@ bool X86TileConfig::runOnMachineFunction(MachineFunction &MF) {
   int SS = INT_MAX;
   for (MachineBasicBlock &MBB : MF) {
     for (MachineInstr &MI : MBB) {
-      if (MI.getOpcode() == X86::LDTILECFG) {
+      if (MI.getOpcode() == X86::PLDTILECFGV) {
         SS = MI.getOperand(0).getIndex();
         break;
       }
@@ -98,7 +98,7 @@ bool X86TileConfig::runOnMachineFunction(MachineFunction &MF) {
     if (SS != INT_MAX)
       break;
   }
-  // Didn't find LDTILECFG, just return false;
+  // Didn't find PLDTILECFGV, just return false;
   if (SS == INT_MAX)
     return false;
 

diff  --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir
index 65b4bd230ff78..fa79fe27f4d5e 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir
+++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir
@@ -114,7 +114,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 64
   ; CHECK-NEXT:   [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 16
-  ; CHECK-NEXT:   LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
   ; CHECK-NEXT:   [[LEA64r:%[0-9]+]]:gr64_nosp = LEA64r %stack.0, 1, $noreg, 0, $noreg
   ; CHECK-NEXT:   [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri1]], [[MOV16ri]]
   ; CHECK-NEXT:   [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64
@@ -127,7 +127,7 @@ body:             |
   ; CHECK-NEXT:   [[MOV32ri64_:%[0-9]+]]:gr64_nosp = MOV32ri64 32
   ; CHECK-NEXT:   [[MOV16ri2:%[0-9]+]]:gr16 = MOV16ri 64
   ; CHECK-NEXT:   [[MOV16ri3:%[0-9]+]]:gr16 = MOV16ri 16
-  ; CHECK-NEXT:   LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
   ; CHECK-NEXT:   [[LEA64r1:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg
   ; CHECK-NEXT:   [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri3]], [[MOV16ri2]], [[COPY3]], 1, killed [[MOV32ri64_]], 0, $noreg
   ; CHECK-NEXT:   [[MOV64ri1:%[0-9]+]]:gr64_nosp = MOV64ri 64
@@ -139,7 +139,7 @@ body:             |
   ; CHECK-NEXT:   [[PHI:%[0-9]+]]:gr16 = PHI [[MOV16ri]], %bb.1, [[MOV16ri2]], %bb.2
   ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:gr16 = PHI [[MOV16ri1]], %bb.1, [[MOV16ri3]], %bb.2
   ; CHECK-NEXT:   [[PHI2:%[0-9]+]]:gr64_nosp = PHI [[LEA64r]], %bb.1, [[LEA64r1]], %bb.2
-  ; CHECK-NEXT:   LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
   ; CHECK-NEXT:   [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64
   ; CHECK-NEXT:   [[LEA64r2:%[0-9]+]]:gr64_nosp = LEA64r %stack.5, 1, $noreg, 0, $noreg
   ; CHECK-NEXT:   [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[PHI1]], [[PHI]], [[PHI2]], 1, killed [[MOV64ri2]], 0, $noreg
@@ -147,13 +147,13 @@ body:             |
   ; CHECK-NEXT:   TILESTORED %stack.5, 1, killed [[MOV64ri3]], 0, $noreg, [[PTILELOADDV1]] :: (store (s8192) into %stack.5)
   ; CHECK-NEXT:   [[MOV16ri4:%[0-9]+]]:gr16 = MOV16ri 64
   ; CHECK-NEXT:   [[MOV16ri5:%[0-9]+]]:gr16 = MOV16ri 16
-  ; CHECK-NEXT:   LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
   ; CHECK-NEXT:   [[PTILEZEROV1:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri5]], [[MOV16ri4]]
   ; CHECK-NEXT:   [[MOV64ri4:%[0-9]+]]:gr64_nosp = MOV64ri 64
   ; CHECK-NEXT:   TILESTORED %stack.4, 1, killed [[MOV64ri4]], 0, $noreg, [[PTILEZEROV1]] :: (store (s8192) into %stack.4)
   ; CHECK-NEXT:   [[MOV16ri6:%[0-9]+]]:gr16 = MOV16ri 64
   ; CHECK-NEXT:   [[MOV16ri7:%[0-9]+]]:gr16 = MOV16ri 16
-  ; CHECK-NEXT:   LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
   ; CHECK-NEXT:   [[PTILEZEROV2:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri7]], [[MOV16ri6]]
   ; CHECK-NEXT:   [[MOV64ri5:%[0-9]+]]:gr64_nosp = MOV64ri 64
   ; CHECK-NEXT:   TILESTORED %stack.3, 1, killed [[MOV64ri5]], 0, $noreg, [[PTILEZEROV2]] :: (store (s8192) into %stack.3)
@@ -161,7 +161,7 @@ body:             |
   ; CHECK-NEXT:   JMP_1 %bb.5
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4:
-  ; CHECK-NEXT:   LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
   ; CHECK-NEXT:   [[MOV32ri64_1:%[0-9]+]]:gr64_nosp = MOV32ri64 32
   ; CHECK-NEXT:   [[MOV16ri8:%[0-9]+]]:gr16 = MOV16ri 64
   ; CHECK-NEXT:   [[MOV16ri9:%[0-9]+]]:gr16 = MOV16ri 16
@@ -177,14 +177,14 @@ body:             |
   ; CHECK-NEXT:   [[PHI4:%[0-9]+]]:gr16 = PHI [[PHI]], %bb.3, %60, %bb.8
   ; CHECK-NEXT:   [[PHI5:%[0-9]+]]:gr16 = PHI [[PHI1]], %bb.3, %59, %bb.8
   ; CHECK-NEXT:   [[PHI6:%[0-9]+]]:gr64_nosp = PHI [[LEA64r2]], %bb.3, %58, %bb.8
-  ; CHECK-NEXT:   LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
   ; CHECK-NEXT:   [[MOV64ri7:%[0-9]+]]:gr64_nosp = MOV64ri 64
   ; CHECK-NEXT:   [[PTILELOADDV3:%[0-9]+]]:tile = PTILELOADDV [[PHI5]], [[PHI4]], [[PHI6]], 1, killed [[MOV64ri7]], 0, $noreg
   ; CHECK-NEXT:   [[MOV64ri8:%[0-9]+]]:gr64_nosp = MOV64ri 64
   ; CHECK-NEXT:   TILESTORED %stack.8, 1, killed [[MOV64ri8]], 0, $noreg, [[PTILELOADDV3]] :: (store (s8192) into %stack.8)
   ; CHECK-NEXT:   [[MOV16ri10:%[0-9]+]]:gr16 = MOV16ri 64
   ; CHECK-NEXT:   [[MOV16ri11:%[0-9]+]]:gr16 = MOV16ri 16
-  ; CHECK-NEXT:   LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
   ; CHECK-NEXT:   [[MOV64ri9:%[0-9]+]]:gr64_nosp = MOV64ri 64
   ; CHECK-NEXT:   [[PTILELOADDV4:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri7]], [[MOV16ri6]], %stack.3, 1, killed [[MOV64ri9]], 0, $noreg :: (load (s8192) from %stack.3)
   ; CHECK-NEXT:   [[MOV64ri10:%[0-9]+]]:gr64_nosp = MOV64ri 64
@@ -200,7 +200,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[MOV16ri12:%[0-9]+]]:gr16 = MOV16ri 64
   ; CHECK-NEXT:   [[MOV16ri13:%[0-9]+]]:gr16 = MOV16ri 16
-  ; CHECK-NEXT:   LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
   ; CHECK-NEXT:   [[LEA64r3:%[0-9]+]]:gr64_nosp = LEA64r %stack.6, 1, $noreg, 0, $noreg
   ; CHECK-NEXT:   [[PTILEZEROV3:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri13]], [[MOV16ri12]]
   ; CHECK-NEXT:   [[MOV64ri12:%[0-9]+]]:gr64_nosp = MOV64ri 64
@@ -213,7 +213,7 @@ body:             |
   ; CHECK-NEXT:   [[MOV32ri64_2:%[0-9]+]]:gr64_nosp = MOV32ri64 32
   ; CHECK-NEXT:   [[MOV16ri14:%[0-9]+]]:gr16 = MOV16ri 64
   ; CHECK-NEXT:   [[MOV16ri15:%[0-9]+]]:gr16 = MOV16ri 16
-  ; CHECK-NEXT:   LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
   ; CHECK-NEXT:   [[LEA64r4:%[0-9]+]]:gr64_nosp = LEA64r %stack.7, 1, $noreg, 0, $noreg
   ; CHECK-NEXT:   [[PTILELOADDV7:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri15]], [[MOV16ri14]], [[COPY3]], 1, killed [[MOV32ri64_2]], 0, $noreg
   ; CHECK-NEXT:   [[MOV64ri13:%[0-9]+]]:gr64_nosp = MOV64ri 64
@@ -225,7 +225,7 @@ body:             |
   ; CHECK-NEXT:   [[PHI7:%[0-9]+]]:gr16 = PHI [[MOV16ri12]], %bb.6, [[MOV16ri14]], %bb.7
   ; CHECK-NEXT:   [[PHI8:%[0-9]+]]:gr16 = PHI [[MOV16ri13]], %bb.6, [[MOV16ri15]], %bb.7
   ; CHECK-NEXT:   [[PHI9:%[0-9]+]]:gr64_nosp = PHI [[LEA64r3]], %bb.6, [[LEA64r4]], %bb.7
-  ; CHECK-NEXT:   LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
   ; CHECK-NEXT:   [[MOV64ri14:%[0-9]+]]:gr64_nosp = MOV64ri 64
   ; CHECK-NEXT:   [[PTILELOADDV8:%[0-9]+]]:tile = PTILELOADDV [[PHI8]], [[PHI7]], [[PHI9]], 1, killed [[MOV64ri14]], 0, $noreg
   ; CHECK-NEXT:   [[MOV64ri15:%[0-9]+]]:gr64_nosp = MOV64ri 64

diff  --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir
index 4cea456e7e917..97c44ffab66d9 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir
+++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir
@@ -51,7 +51,7 @@ body:             |
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gr32 = COPY killed [[COPY]]
   ; CHECK-NEXT:   %r0:gr16 = MOV16ri 64
   ; CHECK-NEXT:   %c0:gr16 = MOV16ri 16
-  ; CHECK-NEXT:   LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
   ; CHECK-NEXT:   [[LEA64r:%[0-9]+]]:gr64_nosp = LEA64r %stack.0, 1, $noreg, 0, $noreg
   ; CHECK-NEXT:   %t0:tile = PTILEZEROV %r0, %c0
   ; CHECK-NEXT:   [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64
@@ -66,7 +66,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 64
   ; CHECK-NEXT:   [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 16
-  ; CHECK-NEXT:   LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
   ; CHECK-NEXT:   [[LEA64r1:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg
   ; CHECK-NEXT:   [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri1]], [[MOV16ri]]
   ; CHECK-NEXT:   [[MOV64ri1:%[0-9]+]]:gr64_nosp = MOV64ri 64
@@ -79,7 +79,7 @@ body:             |
   ; CHECK-NEXT:   [[PHI:%[0-9]+]]:gr16 = PHI %c0, %bb.0, %24, %bb.3
   ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:gr16 = PHI %r0, %bb.0, %23, %bb.3
   ; CHECK-NEXT:   [[PHI2:%[0-9]+]]:gr64_nosp = PHI [[LEA64r]], %bb.0, %22, %bb.3
-  ; CHECK-NEXT:   LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
   ; CHECK-NEXT:   [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64
   ; CHECK-NEXT:   [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[PHI1]], [[PHI]], [[PHI2]], 1, killed [[MOV64ri2]], 0, $noreg
   ; CHECK-NEXT:   [[MOV64ri3:%[0-9]+]]:gr64_nosp = MOV64ri 64
@@ -92,7 +92,7 @@ body:             |
   ; CHECK-NEXT:   [[PHI3:%[0-9]+]]:gr16 = PHI [[MOV16ri]], %bb.1, [[PHI]], %bb.2
   ; CHECK-NEXT:   [[PHI4:%[0-9]+]]:gr16 = PHI [[MOV16ri1]], %bb.1, [[PHI1]], %bb.2
   ; CHECK-NEXT:   [[PHI5:%[0-9]+]]:gr64_nosp = PHI [[LEA64r1]], %bb.1, [[PHI2]], %bb.2
-  ; CHECK-NEXT:   LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
   ; CHECK-NEXT:   [[MOV64ri4:%[0-9]+]]:gr64_nosp = MOV64ri 64
   ; CHECK-NEXT:   [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[PHI4]], [[PHI3]], [[PHI5]], 1, killed [[MOV64ri4]], 0, $noreg
   ; CHECK-NEXT:   [[MOV64ri5:%[0-9]+]]:gr64_nosp = MOV64ri 64

diff  --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir
index 14ff3b2996d62..6f0976b3a4894 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir
+++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir
@@ -52,7 +52,7 @@ body:             |
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gr32 = COPY killed [[COPY]]
   ; CHECK-NEXT:   %r0:gr16 = MOV16ri 64
   ; CHECK-NEXT:   %c0:gr16 = MOV16ri 16
-  ; CHECK-NEXT:   LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
   ; CHECK-NEXT:   [[LEA64r:%[0-9]+]]:gr64_nosp = LEA64r %stack.0, 1, $noreg, 0, $noreg
   ; CHECK-NEXT:   [[LEA64r1:%[0-9]+]]:gr64_nosp = LEA64r %stack.0, 1, $noreg, 0, $noreg
   ; CHECK-NEXT:   %t0:tile = PTILEZEROV %r0, %c0
@@ -68,7 +68,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 64
   ; CHECK-NEXT:   [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 16
-  ; CHECK-NEXT:   LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
   ; CHECK-NEXT:   [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV killed [[MOV16ri1]], killed [[MOV16ri]]
   ; CHECK-NEXT:   JMP_1 %bb.3
   ; CHECK-NEXT: {{  $}}
@@ -81,14 +81,14 @@ body:             |
   ; CHECK-NEXT:   [[PHI3:%[0-9]+]]:gr16 = PHI %c0, %bb.0, %11, %bb.3, %17, %bb.2
   ; CHECK-NEXT:   [[PHI4:%[0-9]+]]:gr16 = PHI %r0, %bb.0, %12, %bb.3, %18, %bb.2
   ; CHECK-NEXT:   [[PHI5:%[0-9]+]]:gr64_nosp = PHI [[LEA64r]], %bb.0, %24, %bb.3, %25, %bb.2
-  ; CHECK-NEXT:   LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
   ; CHECK-NEXT:   [[MOV64ri1:%[0-9]+]]:gr64_nosp = MOV64ri 64
   ; CHECK-NEXT:   [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[PHI1]], [[PHI]], [[PHI2]], 1, killed [[MOV64ri1]], 0, $noreg
   ; CHECK-NEXT:   [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64
   ; CHECK-NEXT:   [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[PHI4]], [[PHI3]], [[PHI5]], 1, killed [[MOV64ri2]], 0, $noreg
   ; CHECK-NEXT:   [[MOV16ri2:%[0-9]+]]:gr16 = MOV16ri 64
   ; CHECK-NEXT:   [[MOV16ri3:%[0-9]+]]:gr16 = MOV16ri 16
-  ; CHECK-NEXT:   LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
   ; CHECK-NEXT:   [[LEA64r2:%[0-9]+]]:gr64_nosp = LEA64r %stack.3, 1, $noreg, 0, $noreg
   ; CHECK-NEXT:   [[LEA64r3:%[0-9]+]]:gr64_nosp = LEA64r %stack.3, 1, $noreg, 0, $noreg
   ; CHECK-NEXT:   [[PTILEZEROV1:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri3]], [[MOV16ri2]]
@@ -102,7 +102,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[MOV16ri4:%[0-9]+]]:gr16 = MOV16ri 64
   ; CHECK-NEXT:   [[MOV16ri5:%[0-9]+]]:gr16 = MOV16ri 16
-  ; CHECK-NEXT:   LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
   ; CHECK-NEXT:   [[LEA64r4:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg
   ; CHECK-NEXT:   [[LEA64r5:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg
   ; CHECK-NEXT:   [[PTILEZEROV2:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri5]], [[MOV16ri4]]

diff  --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir
index c797ce764c2c9..98744bbe8e147 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir
+++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir
@@ -34,7 +34,7 @@ body:             |
   ; CHECK-NEXT:   MOV8mi %stack.4, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.4, align 4)
   ; CHECK-NEXT:   [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 32
   ; CHECK-NEXT:   [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 8
-  ; CHECK-NEXT:   LDTILECFG %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.4, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.4, align 4)
   ; CHECK-NEXT:   [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri1]], [[MOV16ri]]
   ; CHECK-NEXT:   [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64
   ; CHECK-NEXT:   TILESTORED %stack.3, 1, killed [[MOV64ri]], 0, $noreg, [[PTILEZEROV]] :: (store (s8192) into %stack.3)
@@ -48,7 +48,7 @@ body:             |
   ; CHECK-NEXT:   JMP_1 %bb.1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   LDTILECFG %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.4, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.4, align 4)
   ; CHECK-NEXT:   [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV %row, %col, [[LEA64r]], 1, [[MOV32ri64_]], 0, $noreg
   ; CHECK-NEXT:   [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64
   ; CHECK-NEXT:   [[PTILELOADDV2:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], %stack.2, 1, killed [[MOV64ri2]], 0, $noreg :: (load (s8192) from %stack.2)
@@ -111,7 +111,7 @@ body:             |
   ; CHECK-NEXT:   MOV8mi %stack.4, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.4, align 4)
   ; CHECK-NEXT:   [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 32
   ; CHECK-NEXT:   [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 8
-  ; CHECK-NEXT:   LDTILECFG %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.4, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.4, align 4)
   ; CHECK-NEXT:   [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri1]], [[MOV16ri]]
   ; CHECK-NEXT:   [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64
   ; CHECK-NEXT:   TILESTORED %stack.3, 1, killed [[MOV64ri]], 0, $noreg, [[PTILEZEROV]] :: (store (s8192) into %stack.3)
@@ -123,7 +123,7 @@ body:             |
   ; CHECK-NEXT:   JMP_1 %bb.1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   LDTILECFG %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.4, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.4, align 4)
   ; CHECK-NEXT:   [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], [[LEA64r]], 1, [[MOV32ri64_]], 0, $noreg
   ; CHECK-NEXT:   [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64
   ; CHECK-NEXT:   %t:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], %stack.2, 1, killed [[MOV64ri2]], 0, $noreg :: (load (s8192) from %stack.2)

diff  --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir
index 1dfc45c4c0f0f..8da2b78d7944d 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir
+++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir
@@ -102,7 +102,7 @@ body:             |
   ; CHECK-NEXT:   [[MOV32ri64_:%[0-9]+]]:gr64 = MOV32ri64 @buf
   ; CHECK-NEXT:   [[MOV32ri64_1:%[0-9]+]]:gr64_nosp = MOV32ri64 32
   ; CHECK-NEXT:   [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 8
-  ; CHECK-NEXT:   LDTILECFG %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.3, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.3, align 4)
   ; CHECK-NEXT:   [[LEA64r:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg
   ; CHECK-NEXT:   [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[COPY4]], [[MOV16ri]], [[MOV32ri64_]], 1, [[MOV32ri64_1]], 0, $noreg
   ; CHECK-NEXT:   [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64
@@ -123,7 +123,7 @@ body:             |
   ; CHECK-NEXT:   [[MOV32ri64_2:%[0-9]+]]:gr64 = MOV32ri64 @buf2
   ; CHECK-NEXT:   [[MOV32ri64_3:%[0-9]+]]:gr64_nosp = MOV32ri64 32
   ; CHECK-NEXT:   [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 8
-  ; CHECK-NEXT:   LDTILECFG %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.3, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.3, align 4)
   ; CHECK-NEXT:   [[LEA64r3:%[0-9]+]]:gr64_nosp = LEA64r %stack.6, 1, $noreg, 0, $noreg
   ; CHECK-NEXT:   [[PTILELOADDV3:%[0-9]+]]:tile = PTILELOADDV [[COPY4]], [[MOV16ri1]], [[MOV32ri64_2]], 1, [[MOV32ri64_3]], 0, $noreg
   ; CHECK-NEXT:   [[MOV64ri3:%[0-9]+]]:gr64_nosp = MOV64ri 64
@@ -147,7 +147,7 @@ body:             |
   ; CHECK-NEXT:   [[PHI6:%[0-9]+]]:gr16 = PHI [[COPY3]], %bb.1, [[COPY3]], %bb.2
   ; CHECK-NEXT:   [[PHI7:%[0-9]+]]:gr16 = PHI [[COPY4]], %bb.1, [[COPY4]], %bb.2
   ; CHECK-NEXT:   [[PHI8:%[0-9]+]]:gr64_nosp = PHI [[LEA64r2]], %bb.1, [[LEA64r5]], %bb.2
-  ; CHECK-NEXT:   LDTILECFG %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.3, align 4)
+  ; CHECK-NEXT:   PLDTILECFGV %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.3, align 4)
   ; CHECK-NEXT:   [[MOV64ri6:%[0-9]+]]:gr64_nosp = MOV64ri 64
   ; CHECK-NEXT:   [[PTILELOADDV6:%[0-9]+]]:tile = PTILELOADDV [[PHI1]], [[PHI]], [[PHI2]], 1, killed [[MOV64ri6]], 0, $noreg
   ; CHECK-NEXT:   [[MOV64ri7:%[0-9]+]]:gr64_nosp = MOV64ri 64

diff  --git a/llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir b/llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir
index 53877b5da84a3..8807cc029ccd7 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir
+++ b/llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir
@@ -34,7 +34,7 @@ body:             |
     ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r %stack.0, 1, $noreg, 0, $noreg
     ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 32
     ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 8
-    ; CHECK-NEXT: LDTILECFG %stack.2, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.2, align 4)
+    ; CHECK-NEXT: PLDTILECFGV %stack.2, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.2, align 4)
     ; CHECK-NEXT: $tmm0 = TILELOADD [[LEA64r]], 1, [[MOV32ri64_]], 0, $noreg
     ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], [[LEA64r]], 1, [[MOV32ri64_]], 0, $noreg
     ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], [[LEA64r]], 1, [[MOV32ri64_]], 0, $noreg

diff  --git a/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll b/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll
index 4d469c23328e4..e5fb227329d0d 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile -verify-machineinstrs | FileCheck %s
 
 define void @test_amx(i8* %pointer, i8* %base, i64 %stride) {
 ; CHECK-LABEL: test_amx:


        


More information about the llvm-commits mailing list