[llvm] [X86][APX] Support APX + AMX-MOVRS/AMX-TRANSPOSE (PR #123267)
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 16 23:57:11 PST 2025
https://github.com/phoebewang updated https://github.com/llvm/llvm-project/pull/123267
>From 6bb14f8a247550e9fa34d5ea1b16caba5ec3c14b Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Fri, 17 Jan 2025 09:33:57 +0800
Subject: [PATCH 1/2] [X86][APX] Support APX + AMX-MOVRS/AMX-TRANSPOSE
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/784266
---
llvm/lib/Target/X86/X86ExpandPseudo.cpp | 20 +--
llvm/lib/Target/X86/X86ISelLowering.cpp | 24 +--
llvm/lib/Target/X86/X86InstrAMX.td | 52 ++++++-
llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll | 89 +++++++++++
.../X86/amx_movrs_transpose_intrinsics.ll | 30 ++++
.../CodeGen/X86/amx_transpose_intrinsics.ll | 146 ++++++++++++++++++
.../Disassembler/X86/AMX/x86-64-amx-movrs.txt | 96 ++++++++++++
.../MC/Disassembler/X86/amx-transpose-att.txt | 48 ++++++
llvm/test/MC/X86/AMX/x86-64-amx-movrs-att.s | 90 ++++++++++-
llvm/test/MC/X86/AMX/x86-64-amx-movrs-intel.s | 96 ++++++++++++
llvm/test/MC/X86/amx-transpose-att.s | 48 ++++++
llvm/test/MC/X86/amx-transpose-intel.s | 48 ++++++
llvm/test/TableGen/x86-instr-mapping.inc | 10 ++
13 files changed, 770 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index fc8a0eaed140d0..7fbba7f05e0a5e 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -578,10 +578,10 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
unsigned Opc;
switch (Opcode) {
case X86::PTILELOADDRSV:
- Opc = X86::TILELOADDRS;
+ Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRS);
break;
case X86::PTILELOADDRST1V:
- Opc = X86::TILELOADDRST1;
+ Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRST1);
break;
case X86::PTILELOADDV:
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD);
@@ -737,28 +737,28 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
unsigned Opc;
switch (Opcode) {
case X86::PT2RPNTLVWZ0V:
- Opc = X86::T2RPNTLVWZ0;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0);
break;
case X86::PT2RPNTLVWZ0T1V:
- Opc = X86::T2RPNTLVWZ0T1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0T1);
break;
case X86::PT2RPNTLVWZ1V:
- Opc = X86::T2RPNTLVWZ1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1);
break;
case X86::PT2RPNTLVWZ1T1V:
- Opc = X86::T2RPNTLVWZ1T1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1T1);
break;
case X86::PT2RPNTLVWZ0RSV:
- Opc = X86::T2RPNTLVWZ0RS;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RS);
break;
case X86::PT2RPNTLVWZ0RST1V:
- Opc = X86::T2RPNTLVWZ0RST1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RST1);
break;
case X86::PT2RPNTLVWZ1RSV:
- Opc = X86::T2RPNTLVWZ1RS;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RS);
break;
case X86::PT2RPNTLVWZ1RST1V:
- Opc = X86::T2RPNTLVWZ1RST1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RST1);
break;
default:
llvm_unreachable("Impossible Opcode!");
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 90e3e15b1fb46c..6d69665c17565a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37800,14 +37800,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTILESTORED:
Opc = GET_EGPR_IF_ENABLED(X86::TILESTORED);
break;
-#undef GET_EGPR_IF_ENABLED
case X86::PTILELOADDRS:
- Opc = X86::TILELOADDRS;
+ Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRS);
break;
case X86::PTILELOADDRST1:
- Opc = X86::TILELOADDRST1;
+ Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRST1);
break;
}
+#undef GET_EGPR_IF_ENABLED
MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc));
unsigned CurOp = 0;
@@ -37838,34 +37838,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PT2RPNTLVWZ1RST1: {
const DebugLoc &DL = MI.getDebugLoc();
unsigned Opc;
+#define GET_EGPR_IF_ENABLED(OPC) (Subtarget.hasEGPR() ? OPC##_EVEX : OPC)
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected instruction!");
case X86::PT2RPNTLVWZ0:
- Opc = X86::T2RPNTLVWZ0;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0);
break;
case X86::PT2RPNTLVWZ0T1:
- Opc = X86::T2RPNTLVWZ0T1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0T1);
break;
case X86::PT2RPNTLVWZ1:
- Opc = X86::T2RPNTLVWZ1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1);
break;
case X86::PT2RPNTLVWZ1T1:
- Opc = X86::T2RPNTLVWZ1T1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1T1);
break;
case X86::PT2RPNTLVWZ0RS:
- Opc = X86::T2RPNTLVWZ0RS;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RS);
break;
case X86::PT2RPNTLVWZ0RST1:
- Opc = X86::T2RPNTLVWZ0RST1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RST1);
break;
case X86::PT2RPNTLVWZ1RS:
- Opc = X86::T2RPNTLVWZ1RS;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RS);
break;
case X86::PT2RPNTLVWZ1RST1:
- Opc = X86::T2RPNTLVWZ1RST1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RST1);
break;
}
+#undef GET_EGPR_IF_ENABLED
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
MIB.addReg(TMMImmToTMMPair(MI.getOperand(0).getImm()), RegState::Define);
diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td
index a055ba91d3e171..b5d99f52f15c23 100644
--- a/llvm/lib/Target/X86/X86InstrAMX.td
+++ b/llvm/lib/Target/X86/X86InstrAMX.td
@@ -349,22 +349,22 @@ let Predicates = [HasAMXTRANSPOSE, In64BitMode] in {
let SchedRW = [WriteSystem] in {
def T2RPNTLVWZ0 : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src), "t2rpntlvwz0\t{$src, $dst|$dst, $src}",
- []>, VEX, WIG, T8,PS;
+ []>, VEX, T8, PS;
def T2RPNTLVWZ0T1 : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src), "t2rpntlvwz0t1\t{$src, $dst|$dst, $src}",
- []>, VEX, T8,PS;
+ []>, VEX, T8, PS;
def T2RPNTLVWZ1 : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src), "t2rpntlvwz1\t{$src, $dst|$dst, $src}",
- []>, VEX, T8,PD;
+ []>, VEX, T8, PD;
def T2RPNTLVWZ1T1 : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src), "t2rpntlvwz1t1\t{$src, $dst|$dst, $src}",
- []>, VEX, T8,PD;
+ []>, VEX, T8, PD;
def TTRANSPOSED : I<0x5f, MRMSrcReg, (outs TILE:$dst), (ins TILE:$src),
- "ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8,XS;
+ "ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8, XS;
let isPseudo = true in {
def PT2RPNTLVWZ0V : PseudoI<(outs TILEPair:$dst),
(ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
@@ -554,6 +554,48 @@ let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in {
}
} // HasAMXMOVRS, In64BitMode
+let Predicates = [HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in {
+ def T2RPNTLVWZ0_EVEX : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src), "t2rpntlvwz0\t{$src, $dst|$dst, $src}",
+ []>, EVEX, NoCD8, T8, PS;
+
+ def T2RPNTLVWZ0T1_EVEX : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src), "t2rpntlvwz0t1\t{$src, $dst|$dst, $src}",
+ []>, EVEX, NoCD8, T8, PS;
+
+ def T2RPNTLVWZ1_EVEX : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src), "t2rpntlvwz1\t{$src, $dst|$dst, $src}",
+ []>, EVEX, NoCD8, T8, PD;
+
+ def T2RPNTLVWZ1T1_EVEX : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src), "t2rpntlvwz1t1\t{$src, $dst|$dst, $src}",
+ []>, EVEX, NoCD8, T8, PD;
+} // HasAMXTRANSPOSE, HasEGPR, In64BitMode
+
+let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in {
+ def T2RPNTLVWZ0RS_EVEX : I<0xf8, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src1), "t2rpntlvwz0rs\t{$src1, $dst|$dst, $src1}",
+ []>, EVEX, NoCD8, T_MAP5;
+ def T2RPNTLVWZ0RST1_EVEX : I<0xf9, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src1), "t2rpntlvwz0rst1\t{$src1, $dst|$dst, $src1}",
+ []>, EVEX, NoCD8, T_MAP5;
+ def T2RPNTLVWZ1RS_EVEX : I<0xf8, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src1), "t2rpntlvwz1rs\t{$src1, $dst|$dst, $src1}",
+ []>, EVEX, NoCD8, T_MAP5, PD;
+ def T2RPNTLVWZ1RST1_EVEX : I<0xf9, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src1), "t2rpntlvwz1rst1\t{$src1, $dst|$dst, $src1}",
+ []>, EVEX, NoCD8, T_MAP5, PD;
+} // HasAMXMOVRS, HasAMXTRANSPOSE, HasEGPR, In64BitMode
+
+let Predicates = [HasAMXMOVRS, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in {
+ def TILELOADDRS_EVEX : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst),
+ (ins sibmem:$src1), "tileloaddrs\t{$src1, $dst|$dst, $src1}",
+ []>, EVEX, NoCD8, T8, XD;
+ def TILELOADDRST1_EVEX : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst),
+ (ins sibmem:$src1), "tileloaddrst1\t{$src1, $dst|$dst, $src1}",
+ []>, EVEX, NoCD8, T8, PD;
+} // HasAMXMOVRS, HasEGPR, In64BitMode
+
multiclass m_tcvtrowd2ps {
let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
let SchedRW = [WriteSystem] in {
diff --git a/llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll b/llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll
index da212a1850964e..67688326c17500 100755
--- a/llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll
+++ b/llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-movrs | FileCheck %s
+; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-movrs,+egpr --show-mc-encoding | FileCheck %s --check-prefix=APXF
define void @test_amx_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
; CHECK-LABEL: test_amx_internal:
@@ -35,6 +36,44 @@ define void @test_amx_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: tilerelease
; CHECK-NEXT: retq
+;
+; APXF-LABEL: test_amx_internal:
+; APXF: # %bb.0: # %entry
+; APXF-NEXT: pushq %rbp # encoding: [0x55]
+; APXF-NEXT: .cfi_def_cfa_offset 16
+; APXF-NEXT: .cfi_offset %rbp, -16
+; APXF-NEXT: movq %rsp, %rbp # encoding: [0x48,0x89,0xe5]
+; APXF-NEXT: .cfi_def_cfa_register %rbp
+; APXF-NEXT: andq $-1024, %rsp # encoding: [0x48,0x81,0xe4,0x00,0xfc,0xff,0xff]
+; APXF-NEXT: # imm = 0xFC00
+; APXF-NEXT: subq $3072, %rsp # encoding: [0x48,0x81,0xec,0x00,0x0c,0x00,0x00]
+; APXF-NEXT: # imm = 0xC00
+; APXF-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xc0,0x03,0x00,0x00]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xd0,0x03,0x00,0x00]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xe0,0x03,0x00,0x00]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xf0,0x03,0x00,0x00]
+; APXF-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xc0,0x03,0x00,0x00,0x01]
+; APXF-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; APXF-NEXT: # encoding: [0x48,0x89,0x8c,0x24,0xb8,0x03,0x00,0x00]
+; APXF-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
+; APXF-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; APXF-NEXT: # encoding: [0x48,0x8b,0xb4,0x24,0xb8,0x03,0x00,0x00]
+; APXF-NEXT: movw %ax, %cx # encoding: [0x66,0x89,0xc1]
+; APXF-NEXT: movw %di, %ax # encoding: [0x66,0x89,0xf8]
+; APXF-NEXT: # implicit-def: $al
+; APXF-NEXT: movb %al, {{[0-9]+}}(%rsp) # encoding: [0x88,0x84,0x24,0xf0,0x03,0x00,0x00]
+; APXF-NEXT: movw %cx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x8c,0x24,0xd0,0x03,0x00,0x00]
+; APXF-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
+; APXF-NEXT: tileloaddrs (%rdx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4a,0x04,0x32]
+; APXF-NEXT: movl $64, %esi # encoding: [0xbe,0x40,0x00,0x00,0x00]
+; APXF-NEXT: leaq {{[0-9]+}}(%rsp), %rdx # encoding: [0x48,0x8d,0x94,0x24,0x00,0x04,0x00,0x00]
+; APXF-NEXT: tilestored %tmm0, (%rdx,%rsi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x32]
+; APXF-NEXT: movq %rbp, %rsp # encoding: [0x48,0x89,0xec]
+; APXF-NEXT: popq %rbp # encoding: [0x5d]
+; APXF-NEXT: .cfi_def_cfa %rsp, 8
+; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+; APXF-NEXT: retq # encoding: [0xc3]
entry:
%t1 = call x86_amx @llvm.x86.tileloaddrs64.internal(i16 %m, i16 %n, ptr %buf, i64 %s)
%t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1)
@@ -48,6 +87,12 @@ define void @test_amx_old(i16 %m, i16 %n, ptr %buf) {
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: tileloaddrs (%rdx,%rax), %tmm2
; CHECK-NEXT: retq
+;
+; APXF-LABEL: test_amx_old:
+; APXF: # %bb.0: # %entry
+; APXF-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
+; APXF-NEXT: tileloaddrs (%rdx,%rax), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4a,0x14,0x02]
+; APXF-NEXT: retq # encoding: [0xc3]
entry:
call void @llvm.x86.tileloaddrs64(i8 2, ptr %buf, i64 32)
ret void
@@ -88,6 +133,44 @@ define void @test_amx_t1_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: tilerelease
; CHECK-NEXT: retq
+;
+; APXF-LABEL: test_amx_t1_internal:
+; APXF: # %bb.0: # %entry
+; APXF-NEXT: pushq %rbp # encoding: [0x55]
+; APXF-NEXT: .cfi_def_cfa_offset 16
+; APXF-NEXT: .cfi_offset %rbp, -16
+; APXF-NEXT: movq %rsp, %rbp # encoding: [0x48,0x89,0xe5]
+; APXF-NEXT: .cfi_def_cfa_register %rbp
+; APXF-NEXT: andq $-1024, %rsp # encoding: [0x48,0x81,0xe4,0x00,0xfc,0xff,0xff]
+; APXF-NEXT: # imm = 0xFC00
+; APXF-NEXT: subq $3072, %rsp # encoding: [0x48,0x81,0xec,0x00,0x0c,0x00,0x00]
+; APXF-NEXT: # imm = 0xC00
+; APXF-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xc0,0x03,0x00,0x00]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xd0,0x03,0x00,0x00]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xe0,0x03,0x00,0x00]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xf0,0x03,0x00,0x00]
+; APXF-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xc0,0x03,0x00,0x00,0x01]
+; APXF-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; APXF-NEXT: # encoding: [0x48,0x89,0x8c,0x24,0xb8,0x03,0x00,0x00]
+; APXF-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
+; APXF-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; APXF-NEXT: # encoding: [0x48,0x8b,0xb4,0x24,0xb8,0x03,0x00,0x00]
+; APXF-NEXT: movw %ax, %cx # encoding: [0x66,0x89,0xc1]
+; APXF-NEXT: movw %di, %ax # encoding: [0x66,0x89,0xf8]
+; APXF-NEXT: # implicit-def: $al
+; APXF-NEXT: movb %al, {{[0-9]+}}(%rsp) # encoding: [0x88,0x84,0x24,0xf0,0x03,0x00,0x00]
+; APXF-NEXT: movw %cx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x8c,0x24,0xd0,0x03,0x00,0x00]
+; APXF-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
+; APXF-NEXT: tileloaddrst1 (%rdx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x4a,0x04,0x32]
+; APXF-NEXT: movl $64, %esi # encoding: [0xbe,0x40,0x00,0x00,0x00]
+; APXF-NEXT: leaq {{[0-9]+}}(%rsp), %rdx # encoding: [0x48,0x8d,0x94,0x24,0x00,0x04,0x00,0x00]
+; APXF-NEXT: tilestored %tmm0, (%rdx,%rsi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x32]
+; APXF-NEXT: movq %rbp, %rsp # encoding: [0x48,0x89,0xec]
+; APXF-NEXT: popq %rbp # encoding: [0x5d]
+; APXF-NEXT: .cfi_def_cfa %rsp, 8
+; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+; APXF-NEXT: retq # encoding: [0xc3]
entry:
%t1 = call x86_amx @llvm.x86.tileloaddrst164.internal(i16 %m, i16 %n, ptr %buf, i64 %s)
%t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1)
@@ -101,6 +184,12 @@ define void @test_amx_t1_old(i16 %m, i16 %n, ptr %buf) {
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: tileloaddrst1 (%rdx,%rax), %tmm2
; CHECK-NEXT: retq
+;
+; APXF-LABEL: test_amx_t1_old:
+; APXF: # %bb.0: # %entry
+; APXF-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
+; APXF-NEXT: tileloaddrst1 (%rdx,%rax), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x4a,0x14,0x02]
+; APXF-NEXT: retq # encoding: [0xc3]
entry:
call void @llvm.x86.tileloaddrst164(i8 2, ptr %buf, i64 32)
ret void
diff --git a/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll b/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll
index 146b69773eb186..0d5b85f2bb1088 100755
--- a/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll
+++ b/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs | FileCheck %s --check-prefixes=CHECK,O0
; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs | FileCheck %s --check-prefixes=CHECK,O2
+; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs,+egpr --show-mc-encoding | FileCheck %s --check-prefix=APXF
define void @test_amx(i64 %stride, i8* %addr1) #0 {
; CHECK-LABEL: test_amx:
@@ -10,6 +11,14 @@ define void @test_amx(i64 %stride, i8* %addr1) #0 {
; CHECK-NEXT: t2rpntlvwz1rs (%rsi,%rdi), %tmm0
; CHECK-NEXT: t2rpntlvwz1rst1 (%rsi,%rdi), %tmm2
; CHECK-NEXT: retq
+;
+; APXF-LABEL: test_amx:
+; APXF: # %bb.0:
+; APXF-NEXT: t2rpntlvwz0rs (%rsi,%rdi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf8,0x04,0x3e]
+; APXF-NEXT: t2rpntlvwz0rst1 (%rsi,%rdi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf9,0x14,0x3e]
+; APXF-NEXT: t2rpntlvwz1rs (%rsi,%rdi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf8,0x04,0x3e]
+; APXF-NEXT: t2rpntlvwz1rst1 (%rsi,%rdi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf9,0x14,0x3e]
+; APXF-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.t2rpntlvwz0rs(i8 1, i8* %addr1, i64 %stride)
call void @llvm.x86.t2rpntlvwz0rst1(i8 2, i8* %addr1, i64 %stride)
call void @llvm.x86.t2rpntlvwz1rs(i8 1, i8* %addr1, i64 %stride)
@@ -80,6 +89,27 @@ define void @test_amx2(i8* %base, i64 %stride) #0 {
; O2-NEXT: t2rpntlvwz1rst1 (%rdi,%rsi), %tmm4
; O2-NEXT: tilerelease
; O2-NEXT: retq
+;
+; APXF-LABEL: test_amx2:
+; APXF: # %bb.0:
+; APXF-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
+; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xc0]
+; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xd0]
+; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xe0]
+; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xf0]
+; APXF-NEXT: movb $1, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xc0,0x01]
+; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf4,0x08]
+; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xd8,0x08,0x00]
+; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf5,0x08]
+; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xda,0x08,0x00]
+; APXF-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0xc0]
+; APXF-NEXT: movw $8, %ax # encoding: [0x66,0xb8,0x08,0x00]
+; APXF-NEXT: t2rpntlvwz0rs (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf8,0x24,0x37]
+; APXF-NEXT: t2rpntlvwz0rst1 (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf9,0x24,0x37]
+; APXF-NEXT: t2rpntlvwz1rs (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf8,0x24,0x37]
+; APXF-NEXT: t2rpntlvwz1rst1 (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf9,0x24,0x37]
+; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+; APXF-NEXT: retq # encoding: [0xc3]
call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rs.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rst1.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rs.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
diff --git a/llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll b/llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll
index cc4360317db7db..7495f6c9af4577 100644
--- a/llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll
+++ b/llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+amx-bf16,+amx-fp16,+amx-complex,+amx-transpose | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+amx-bf16,+amx-fp16,+amx-complex,+amx-transpose,+egpr --show-mc-encoding | FileCheck %s --check-prefix=APXF
define void @test_amx(i32 %rv32, i64 %stride, i64 %rvalue, i8* %addr1, <4 x float> %xmm) #0 {
; CHECK-LABEL: test_amx:
@@ -16,6 +17,21 @@ define void @test_amx(i32 %rv32, i64 %stride, i64 %rvalue, i8* %addr1, <4 x floa
; CHECK-NEXT: tconjtcmmimfp16ps %tmm3, %tmm2, %tmm1
; CHECK-NEXT: tconjtfp16 %tmm2, %tmm1
; CHECK-NEXT: retq
+;
+; APXF-LABEL: test_amx:
+; APXF: # %bb.0:
+; APXF-NEXT: t2rpntlvwz0 (%rcx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x04,0x31]
+; APXF-NEXT: t2rpntlvwz0t1 (%rcx,%rsi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6f,0x14,0x31]
+; APXF-NEXT: t2rpntlvwz1 (%rcx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6e,0x04,0x31]
+; APXF-NEXT: t2rpntlvwz1t1 (%rcx,%rsi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6f,0x14,0x31]
+; APXF-NEXT: ttransposed %tmm3, %tmm1 # encoding: [0xc4,0xe2,0x7a,0x5f,0xcb]
+; APXF-NEXT: ttdpbf16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x62,0x6c,0xca]
+; APXF-NEXT: ttdpfp16ps %tmm6, %tmm5, %tmm4 # encoding: [0xc4,0xe2,0x4b,0x6c,0xe5]
+; APXF-NEXT: ttcmmimfp16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x63,0x6b,0xca]
+; APXF-NEXT: ttcmmrlfp16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x62,0x6b,0xca]
+; APXF-NEXT: tconjtcmmimfp16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x60,0x6b,0xca]
+; APXF-NEXT: tconjtfp16 %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x79,0x6b,0xca]
+; APXF-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.t2rpntlvwz0(i8 1, i8* %addr1, i64 %stride)
call void @llvm.x86.t2rpntlvwz0t1(i8 2, i8* %addr1, i64 %stride)
call void @llvm.x86.t2rpntlvwz1(i8 1, i8* %addr1, i64 %stride)
@@ -78,6 +94,46 @@ define void @test_amx2(i8* %pointer, i8* %base, i64 %stride) #0 {
; CHECK-NEXT: tilerelease
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
+;
+; APXF-LABEL: test_amx2:
+; APXF: # %bb.0:
+; APXF-NEXT: pushq %rbp # encoding: [0x55]
+; APXF-NEXT: subq $2928, %rsp # encoding: [0x48,0x81,0xec,0x70,0x0b,0x00,0x00]
+; APXF-NEXT: # imm = 0xB70
+; APXF-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
+; APXF-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x44,0x24,0x0d]
+; APXF-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x40,0x03,0x00,0x00,0x01]
+; APXF-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x70,0x03,0x00,0x00,0x08]
+; APXF-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x50,0x03,0x00,0x00,0x08,0x00]
+; APXF-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x71,0x03,0x00,0x00,0x08]
+; APXF-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x52,0x03,0x00,0x00,0x08,0x00]
+; APXF-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x72,0x03,0x00,0x00,0x08]
+; APXF-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x54,0x03,0x00,0x00,0x08,0x00]
+; APXF-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x73,0x03,0x00,0x00,0x08]
+; APXF-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x56,0x03,0x00,0x00,0x08,0x00]
+; APXF-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0x40,0x03,0x00,0x00]
+; APXF-NEXT: movw $8, %ax # encoding: [0x66,0xb8,0x08,0x00]
+; APXF-NEXT: tileloadd (%rsi,%rdx), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x04,0x16]
+; APXF-NEXT: tilezero %tmm1 # encoding: [0xc4,0xe2,0x7b,0x49,0xc8]
+; APXF-NEXT: tilezero %tmm2 # encoding: [0xc4,0xe2,0x7b,0x49,0xd0]
+; APXF-NEXT: ttdpbf16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x72,0x6c,0xd0]
+; APXF-NEXT: ttdpfp16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x73,0x6c,0xd0]
+; APXF-NEXT: ttcmmimfp16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x73,0x6b,0xd0]
+; APXF-NEXT: ttcmmrlfp16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x72,0x6b,0xd0]
+; APXF-NEXT: movabsq $64, %rbp # encoding: [0x48,0xbd,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+; APXF-NEXT: tilestored %tmm2, 896(%rsp,%rbp) # 1024-byte Folded Spill
+; APXF-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x94,0x2c,0x80,0x03,0x00,0x00]
+; APXF-NEXT: tileloadd 896(%rsp,%rbp), %tmm3 # 1024-byte Folded Reload
+; APXF-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x9c,0x2c,0x80,0x03,0x00,0x00]
+; APXF-NEXT: tconjtcmmimfp16ps %tmm1, %tmm0, %tmm3 # encoding: [0xc4,0xe2,0x70,0x6b,0xd8]
+; APXF-NEXT: tconjtfp16 %tmm3, %tmm0 # encoding: [0xc4,0xe2,0x79,0x6b,0xc3]
+; APXF-NEXT: tilestored %tmm2, (%rdi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x14,0x17]
+; APXF-NEXT: addq $2928, %rsp # encoding: [0x48,0x81,0xc4,0x70,0x0b,0x00,0x00]
+; APXF-NEXT: # imm = 0xB70
+; APXF-NEXT: popq %rbp # encoding: [0x5d]
+; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+; APXF-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; APXF-NEXT: retq # encoding: [0xc3]
%a = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, i8* %base, i64 %stride)
%b = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 8)
@@ -117,6 +173,30 @@ define void @test_amx3(i8* %pointer, i8* %base, i64 %stride) #0 {
; CHECK-NEXT: tilerelease
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
+;
+; APXF-LABEL: test_amx3:
+; APXF: # %bb.0:
+; APXF-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
+; APXF-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x44,0x24,0xff]
+; APXF-NEXT: movb $1, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xc0,0x01]
+; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf0,0x08]
+; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xd0,0x08,0x00]
+; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf4,0x08]
+; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xd8,0x08,0x00]
+; APXF-NEXT: movb $0, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf5,0x00]
+; APXF-NEXT: movw $0, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xda,0x00,0x00]
+; APXF-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0xc0]
+; APXF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; APXF-NEXT: movw $8, %cx # encoding: [0x66,0xb9,0x08,0x00]
+; APXF-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x24,0x16]
+; APXF-NEXT: t2rpntlvwz0t1 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6f,0x24,0x16]
+; APXF-NEXT: t2rpntlvwz1 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6e,0x24,0x16]
+; APXF-NEXT: t2rpntlvwz1t1 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6f,0x24,0x16]
+; APXF-NEXT: ttransposed %tmm4, %tmm0 # encoding: [0xc4,0xe2,0x7a,0x5f,0xc4]
+; APXF-NEXT: tilestored %tmm0, (%rdi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x17]
+; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+; APXF-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; APXF-NEXT: retq # encoding: [0xc3]
%1 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 8, i16 8, i16 0, i8* %base, i64 %stride)
%2 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0t1.internal(i16 8, i16 8, i16 0, i8* %base, i64 %stride)
%3 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1.internal(i16 8, i16 8, i16 0, i8* %base, i64 %stride)
@@ -179,6 +259,72 @@ define void @test_amx_spill(i8* %pointer, i8* %base, i64 %stride) #0 {
; CHECK-NEXT: tilerelease
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
+;
+; APXF-LABEL: test_amx_spill:
+; APXF: # %bb.0:
+; APXF-NEXT: subq $6088, %rsp # encoding: [0x48,0x81,0xec,0xc8,0x17,0x00,0x00]
+; APXF-NEXT: # imm = 0x17C8
+; APXF-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
+; APXF-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x44,0x24,0xfe]
+; APXF-NEXT: movb $1, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0x80,0x01]
+; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb0,0x08]
+; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x90,0x08,0x00]
+; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb4,0x08]
+; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x98,0x08,0x00]
+; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb5,0x08]
+; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x9a,0x08,0x00]
+; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb6,0x08]
+; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x9c,0x08,0x00]
+; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb7,0x08]
+; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x9e,0x08,0x00]
+; APXF-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0x80]
+; APXF-NEXT: movw $8, %ax # encoding: [0x66,0xb8,0x08,0x00]
+; APXF-NEXT: tileloadd (%rsi,%rdx), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x04,0x16]
+; APXF-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x24,0x16]
+; APXF-NEXT: t2rpntlvwz0t1 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6f,0x34,0x16]
+; APXF-NEXT: movabsq $64, %rcx # encoding: [0x48,0xb9,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+; APXF-NEXT: tilestored %tmm6, 4032(%rsp,%rcx) # 1024-byte Folded Spill
+; APXF-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xb4,0x0c,0xc0,0x0f,0x00,0x00]
+; APXF-NEXT: tilestored %tmm7, 5056(%rsp,%rcx) # 1024-byte Folded Spill
+; APXF-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xbc,0x0c,0xc0,0x13,0x00,0x00]
+; APXF-NEXT: t2rpntlvwz1 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6e,0x34,0x16]
+; APXF-NEXT: tilestored %tmm6, 1984(%rsp,%rcx) # 1024-byte Folded Spill
+; APXF-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xb4,0x0c,0xc0,0x07,0x00,0x00]
+; APXF-NEXT: tilestored %tmm7, 3008(%rsp,%rcx) # 1024-byte Folded Spill
+; APXF-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xbc,0x0c,0xc0,0x0b,0x00,0x00]
+; APXF-NEXT: t2rpntlvwz1t1 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6f,0x34,0x16]
+; APXF-NEXT: tilestored %tmm6, -64(%rsp,%rcx) # 1024-byte Folded Spill
+; APXF-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0x74,0x0c,0xc0]
+; APXF-NEXT: tilestored %tmm7, 960(%rsp,%rcx) # 1024-byte Folded Spill
+; APXF-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xbc,0x0c,0xc0,0x03,0x00,0x00]
+; APXF-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x34,0x16]
+; APXF-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16]
+; APXF-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16]
+; APXF-NEXT: tileloadd 4032(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload
+; APXF-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xa4,0x0c,0xc0,0x0f,0x00,0x00]
+; APXF-NEXT: tileloadd 5056(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload
+; APXF-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xac,0x0c,0xc0,0x13,0x00,0x00]
+; APXF-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16]
+; APXF-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16]
+; APXF-NEXT: tileloadd 1984(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload
+; APXF-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xa4,0x0c,0xc0,0x07,0x00,0x00]
+; APXF-NEXT: tileloadd 3008(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload
+; APXF-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xac,0x0c,0xc0,0x0b,0x00,0x00]
+; APXF-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16]
+; APXF-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16]
+; APXF-NEXT: tileloadd -64(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload
+; APXF-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0x64,0x0c,0xc0]
+; APXF-NEXT: tileloadd 960(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload
+; APXF-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xac,0x0c,0xc0,0x03,0x00,0x00]
+; APXF-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16]
+; APXF-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16]
+; APXF-NEXT: tilestored %tmm6, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x34,0x16]
+; APXF-NEXT: tilestored %tmm7, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x3c,0x16]
+; APXF-NEXT: addq $6088, %rsp # encoding: [0x48,0x81,0xc4,0xc8,0x17,0x00,0x00]
+; APXF-NEXT: # imm = 0x17C8
+; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+; APXF-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; APXF-NEXT: retq # encoding: [0xc3]
%a = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, i8* %base, i64 %stride)
%b1 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
%b2 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0t1.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
diff --git a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-movrs.txt b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-movrs.txt
index 6df44c87d2332f..57e3153da401bf 100755
--- a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-movrs.txt
+++ b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-movrs.txt
@@ -96,3 +96,99 @@
# ATT: tileloaddrst1 -32(,%rbp,2), %tmm3
# INTEL: tileloaddrst1 tmm3, [2*rbp - 32]
0xc4,0xe2,0x79,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff
+
+# ATT: t2rpntlvwz0rs 268435456(%r16,%r14,8), %tmm6
+# INTEL: t2rpntlvwz0rs tmm6, [r16 + 8*r14 + 268435456]
+0x62,0xbd,0x7c,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10
+
+# ATT: t2rpntlvwz0rs 291(%r8,%r17,4), %tmm2
+# INTEL: t2rpntlvwz0rs tmm2, [r8 + 4*r17 + 291]
+0x62,0xd5,0x78,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00
+
+# ATT: t2rpntlvwz0rs 64(%r18), %tmm6
+# INTEL: t2rpntlvwz0rs tmm6, [r18 + 64]
+0x62,0xfd,0x7c,0x08,0xf8,0x74,0x22,0x40
+
+# ATT: t2rpntlvwz0rs -32(,%rbp,2), %tmm2
+# INTEL: t2rpntlvwz0rs tmm2, [2*rbp - 32]
+0x62,0xf5,0x7c,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff
+
+# ATT: t2rpntlvwz0rst1 268435456(%r16,%r14,8), %tmm6
+# INTEL: t2rpntlvwz0rst1 tmm6, [r16 + 8*r14 + 268435456]
+0x62,0xbd,0x7c,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10
+
+# ATT: t2rpntlvwz0rst1 291(%r8,%r17,4), %tmm2
+# INTEL: t2rpntlvwz0rst1 tmm2, [r8 + 4*r17 + 291]
+0x62,0xd5,0x78,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00
+
+# ATT: t2rpntlvwz0rst1 64(%r18), %tmm6
+# INTEL: t2rpntlvwz0rst1 tmm6, [r18 + 64]
+0x62,0xfd,0x7c,0x08,0xf9,0x74,0x22,0x40
+
+# ATT: t2rpntlvwz0rst1 -32(,%rbp,2), %tmm2
+# INTEL: t2rpntlvwz0rst1 tmm2, [2*rbp - 32]
+0x62,0xf5,0x7c,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff
+
+# ATT: t2rpntlvwz1rs 268435456(%r16,%r14,8), %tmm6
+# INTEL: t2rpntlvwz1rs tmm6, [r16 + 8*r14 + 268435456]
+0x62,0xbd,0x7d,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10
+
+# ATT: t2rpntlvwz1rs 291(%r8,%r17,4), %tmm2
+# INTEL: t2rpntlvwz1rs tmm2, [r8 + 4*r17 + 291]
+0x62,0xd5,0x79,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00
+
+# ATT: t2rpntlvwz1rs 64(%r18), %tmm6
+# INTEL: t2rpntlvwz1rs tmm6, [r18 + 64]
+0x62,0xfd,0x7d,0x08,0xf8,0x74,0x22,0x40
+
+# ATT: t2rpntlvwz1rs -32(,%rbp,2), %tmm2
+# INTEL: t2rpntlvwz1rs tmm2, [2*rbp - 32]
+0x62,0xf5,0x7d,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff
+
+# ATT: t2rpntlvwz1rst1 268435456(%r16,%r14,8), %tmm6
+# INTEL: t2rpntlvwz1rst1 tmm6, [r16 + 8*r14 + 268435456]
+0x62,0xbd,0x7d,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10
+
+# ATT: t2rpntlvwz1rst1 291(%r8,%r17,4), %tmm2
+# INTEL: t2rpntlvwz1rst1 tmm2, [r8 + 4*r17 + 291]
+0x62,0xd5,0x79,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00
+
+# ATT: t2rpntlvwz1rst1 64(%r18), %tmm6
+# INTEL: t2rpntlvwz1rst1 tmm6, [r18 + 64]
+0x62,0xfd,0x7d,0x08,0xf9,0x74,0x22,0x40
+
+# ATT: t2rpntlvwz1rst1 -32(,%rbp,2), %tmm2
+# INTEL: t2rpntlvwz1rst1 tmm2, [2*rbp - 32]
+0x62,0xf5,0x7d,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff
+
+# ATT: tileloaddrs 268435456(%r16,%r14,8), %tmm6
+# INTEL: tileloaddrs tmm6, [r16 + 8*r14 + 268435456]
+0x62,0xba,0x7f,0x08,0x4a,0xb4,0xf0,0x00,0x00,0x00,0x10
+
+# ATT: tileloaddrs 291(%r8,%r17,4), %tmm3
+# INTEL: tileloaddrs tmm3, [r8 + 4*r17 + 291]
+0x62,0xd2,0x7b,0x08,0x4a,0x9c,0x88,0x23,0x01,0x00,0x00
+
+# ATT: tileloaddrs 64(%r18), %tmm6
+# INTEL: tileloaddrs tmm6, [r18 + 64]
+0x62,0xfa,0x7f,0x08,0x4a,0x74,0x22,0x40
+
+# ATT: tileloaddrs -32(,%rbp,2), %tmm3
+# INTEL: tileloaddrs tmm3, [2*rbp - 32]
+0x62,0xf2,0x7f,0x08,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff
+
+# ATT: tileloaddrst1 268435456(%r16,%r14,8), %tmm6
+# INTEL: tileloaddrst1 tmm6, [r16 + 8*r14 + 268435456]
+0x62,0xba,0x7d,0x08,0x4a,0xb4,0xf0,0x00,0x00,0x00,0x10
+
+# ATT: tileloaddrst1 291(%r8,%r17,4), %tmm3
+# INTEL: tileloaddrst1 tmm3, [r8 + 4*r17 + 291]
+0x62,0xd2,0x79,0x08,0x4a,0x9c,0x88,0x23,0x01,0x00,0x00
+
+# ATT: tileloaddrst1 64(%r18), %tmm6
+# INTEL: tileloaddrst1 tmm6, [r18 + 64]
+0x62,0xfa,0x7d,0x08,0x4a,0x74,0x22,0x40
+
+# ATT: tileloaddrst1 -32(,%rbp,2), %tmm3
+# INTEL: tileloaddrst1 tmm3, [2*rbp - 32]
+0x62,0xf2,0x7d,0x08,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff
diff --git a/llvm/test/MC/Disassembler/X86/amx-transpose-att.txt b/llvm/test/MC/Disassembler/X86/amx-transpose-att.txt
index 8c6f1be80ba2dc..d768630ac1475f 100644
--- a/llvm/test/MC/Disassembler/X86/amx-transpose-att.txt
+++ b/llvm/test/MC/Disassembler/X86/amx-transpose-att.txt
@@ -49,6 +49,54 @@
# INTEL: t2rpntlvwz1t1 tmm2, [2*rbp - 32]
0xc4,0xe2,0x79,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff
+# ATT: t2rpntlvwz0 268435456(%r16,%r14,8), %tmm4
+# INTEL: t2rpntlvwz0 tmm4, [r16 + 8*r14 + 268435456]
+0x62,0xba,0x7c,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10
+
+# ATT: t2rpntlvwz0 291(%r8,%r17,4), %tmm2
+# INTEL: t2rpntlvwz0 tmm2, [r8 + 4*r17 + 291]
+0x62,0xd2,0x78,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00
+
+# ATT: t2rpntlvwz0 -32(,%rbp,2), %tmm2
+# INTEL: t2rpntlvwz0 tmm2, [2*rbp - 32]
+0x62,0xf2,0x7c,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff
+
+# ATT: t2rpntlvwz0t1 268435456(%r16,%r14,8), %tmm4
+# INTEL: t2rpntlvwz0t1 tmm4, [r16 + 8*r14 + 268435456]
+0x62,0xba,0x7c,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10
+
+# ATT: t2rpntlvwz0t1 291(%r8,%r17,4), %tmm2
+# INTEL: t2rpntlvwz0t1 tmm2, [r8 + 4*r17 + 291]
+0x62,0xd2,0x78,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00
+
+# ATT: t2rpntlvwz0t1 -32(,%rbp,2), %tmm2
+# INTEL: t2rpntlvwz0t1 tmm2, [2*rbp - 32]
+0x62,0xf2,0x7c,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff
+
+# ATT: t2rpntlvwz1 268435456(%r16,%r14,8), %tmm4
+# INTEL: t2rpntlvwz1 tmm4, [r16 + 8*r14 + 268435456]
+0x62,0xba,0x7d,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10
+
+# ATT: t2rpntlvwz1 291(%r8,%r17,4), %tmm2
+# INTEL: t2rpntlvwz1 tmm2, [r8 + 4*r17 + 291]
+0x62,0xd2,0x79,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00
+
+# ATT: t2rpntlvwz1 -32(,%rbp,2), %tmm2
+# INTEL: t2rpntlvwz1 tmm2, [2*rbp - 32]
+0x62,0xf2,0x7d,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff
+
+# ATT: t2rpntlvwz1t1 268435456(%r16,%r14,8), %tmm4
+# INTEL: t2rpntlvwz1t1 tmm4, [r16 + 8*r14 + 268435456]
+0x62,0xba,0x7d,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10
+
+# ATT: t2rpntlvwz1t1 291(%r8,%r17,4), %tmm2
+# INTEL: t2rpntlvwz1t1 tmm2, [r8 + 4*r17 + 291]
+0x62,0xd2,0x79,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00
+
+# ATT: t2rpntlvwz1t1 -32(,%rbp,2), %tmm2
+# INTEL: t2rpntlvwz1t1 tmm2, [2*rbp - 32]
+0x62,0xf2,0x7d,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff
+
# ATT: ttransposed %tmm1, %tmm2
# INTEL: ttransposed tmm2, tmm1
0xc4,0xe2,0x7a,0x5f,0xd1
diff --git a/llvm/test/MC/X86/AMX/x86-64-amx-movrs-att.s b/llvm/test/MC/X86/AMX/x86-64-amx-movrs-att.s
index d780ad4f0e3691..92db672e1c82d1 100755
--- a/llvm/test/MC/X86/AMX/x86-64-amx-movrs-att.s
+++ b/llvm/test/MC/X86/AMX/x86-64-amx-movrs-att.s
@@ -86,4 +86,92 @@
// CHECK: tileloaddrst1 -32(,%rbp,2), %tmm3
// CHECK: encoding: [0xc4,0xe2,0x79,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff]
- tileloaddrst1 -32(,%rbp,2), %tmm3
\ No newline at end of file
+ tileloaddrst1 -32(,%rbp,2), %tmm3
+
+// CHECK: t2rpntlvwz0rs 268435456(%r16,%r14,8), %tmm6
+// CHECK: encoding: [0x62,0xbd,0x7c,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10]
+ t2rpntlvwz0rs 268435456(%r16,%r14,8), %tmm6
+
+// CHECK: t2rpntlvwz0rs 291(%r8,%r17,4), %tmm2
+// CHECK: encoding: [0x62,0xd5,0x78,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00]
+ t2rpntlvwz0rs 291(%r8,%r17,4), %tmm2
+
+// CHECK: t2rpntlvwz0rs 64(%r18), %tmm6
+// CHECK: encoding: [0x62,0xfd,0x7c,0x08,0xf8,0x74,0x22,0x40]
+ t2rpntlvwz0rs 64(%r18), %tmm6
+
+// CHECK: {evex} t2rpntlvwz0rs -32(,%rbp,2), %tmm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff]
+ {evex} t2rpntlvwz0rs -32(,%rbp,2), %tmm2
+
+// CHECK: t2rpntlvwz0rst1 268435456(%r16,%r14,8), %tmm6
+// CHECK: encoding: [0x62,0xbd,0x7c,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10]
+ t2rpntlvwz0rst1 268435456(%r16,%r14,8), %tmm6
+
+// CHECK: t2rpntlvwz0rst1 291(%r8,%r17,4), %tmm2
+// CHECK: encoding: [0x62,0xd5,0x78,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00]
+ t2rpntlvwz0rst1 291(%r8,%r17,4), %tmm2
+
+// CHECK: t2rpntlvwz0rst1 64(%r18), %tmm6
+// CHECK: encoding: [0x62,0xfd,0x7c,0x08,0xf9,0x74,0x22,0x40]
+ t2rpntlvwz0rst1 64(%r18), %tmm6
+
+// CHECK: {evex} t2rpntlvwz0rst1 -32(,%rbp,2), %tmm2
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff]
+ {evex} t2rpntlvwz0rst1 -32(,%rbp,2), %tmm2
+
+// CHECK: t2rpntlvwz1rs 268435456(%r16,%r14,8), %tmm6
+// CHECK: encoding: [0x62,0xbd,0x7d,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10]
+ t2rpntlvwz1rs 268435456(%r16,%r14,8), %tmm6
+
+// CHECK: t2rpntlvwz1rs 291(%r8,%r17,4), %tmm2
+// CHECK: encoding: [0x62,0xd5,0x79,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00]
+ t2rpntlvwz1rs 291(%r8,%r17,4), %tmm2
+
+// CHECK: t2rpntlvwz1rs 64(%r18), %tmm6
+// CHECK: encoding: [0x62,0xfd,0x7d,0x08,0xf8,0x74,0x22,0x40]
+ t2rpntlvwz1rs 64(%r18), %tmm6
+
+// CHECK: {evex} t2rpntlvwz1rs -32(,%rbp,2), %tmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff]
+ {evex} t2rpntlvwz1rs -32(,%rbp,2), %tmm2
+
+// CHECK: t2rpntlvwz1rst1 268435456(%r16,%r14,8), %tmm6
+// CHECK: encoding: [0x62,0xbd,0x7d,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10]
+ t2rpntlvwz1rst1 268435456(%r16,%r14,8), %tmm6
+
+// CHECK: t2rpntlvwz1rst1 291(%r8,%r17,4), %tmm2
+// CHECK: encoding: [0x62,0xd5,0x79,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00]
+ t2rpntlvwz1rst1 291(%r8,%r17,4), %tmm2
+
+// CHECK: t2rpntlvwz1rst1 64(%r18), %tmm6
+// CHECK: encoding: [0x62,0xfd,0x7d,0x08,0xf9,0x74,0x22,0x40]
+ t2rpntlvwz1rst1 64(%r18), %tmm6
+
+// CHECK: {evex} t2rpntlvwz1rst1 -32(,%rbp,2), %tmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff]
+ {evex} t2rpntlvwz1rst1 -32(,%rbp,2), %tmm2
+
+// CHECK: tileloaddrs 291(%r16,%rax,4), %tmm3
+// CHECK: encoding: [0x62,0xfa,0x7f,0x08,0x4a,0x9c,0x80,0x23,0x01,0x00,0x00]
+ tileloaddrs 291(%r16,%rax,4), %tmm3
+
+// CHECK: tileloaddrs 291(%r8,%r17,4), %tmm3
+// CHECK: encoding: [0x62,0xd2,0x7b,0x08,0x4a,0x9c,0x88,0x23,0x01,0x00,0x00]
+ tileloaddrs 291(%r8,%r17,4), %tmm3
+
+// CHECK: {evex} tileloaddrs -32(,%rbp,2), %tmm3
+// CHECK: encoding: [0x62,0xf2,0x7f,0x08,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff]
+ {evex} tileloaddrs -32(,%rbp,2), %tmm3
+
+// CHECK: tileloaddrst1 291(%r16,%rax,4), %tmm3
+// CHECK: encoding: [0x62,0xfa,0x7d,0x08,0x4a,0x9c,0x80,0x23,0x01,0x00,0x00]
+ tileloaddrst1 291(%r16,%rax,4), %tmm3
+
+// CHECK: tileloaddrst1 291(%r8,%r17,4), %tmm3
+// CHECK: encoding: [0x62,0xd2,0x79,0x08,0x4a,0x9c,0x88,0x23,0x01,0x00,0x00]
+ tileloaddrst1 291(%r8,%r17,4), %tmm3
+
+// CHECK: {evex} tileloaddrst1 -32(,%rbp,2), %tmm3
+// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff]
+ {evex} tileloaddrst1 -32(,%rbp,2), %tmm3
diff --git a/llvm/test/MC/X86/AMX/x86-64-amx-movrs-intel.s b/llvm/test/MC/X86/AMX/x86-64-amx-movrs-intel.s
index ccc7ac51a98a44..140d1aa6b198ea 100755
--- a/llvm/test/MC/X86/AMX/x86-64-amx-movrs-intel.s
+++ b/llvm/test/MC/X86/AMX/x86-64-amx-movrs-intel.s
@@ -95,3 +95,99 @@
// CHECK: tileloaddrst1 tmm3, [2*rbp - 32]
// CHECK: encoding: [0xc4,0xe2,0x79,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff]
tileloaddrst1 tmm3, [2*rbp - 32]
+
+// CHECK: t2rpntlvwz0rs tmm6, [r16 + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xbd,0x7c,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10]
+ t2rpntlvwz0rs tmm6, [r16 + 8*r14 + 268435456]
+
+// CHECK: t2rpntlvwz0rs tmm2, [r8 + 4*r17 + 291]
+// CHECK: encoding: [0x62,0xd5,0x78,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00]
+ t2rpntlvwz0rs tmm2, [r8 + 4*r17 + 291]
+
+// CHECK: t2rpntlvwz0rs tmm6, [r18 + 64]
+// CHECK: encoding: [0x62,0xfd,0x7c,0x08,0xf8,0x74,0x22,0x40]
+ t2rpntlvwz0rs tmm6, [r18 + 64]
+
+// CHECK: {evex} t2rpntlvwz0rs tmm2, [2*rbp - 32]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff]
+ {evex} t2rpntlvwz0rs tmm2, [2*rbp - 32]
+
+// CHECK: t2rpntlvwz0rst1 tmm6, [r16 + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xbd,0x7c,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10]
+ t2rpntlvwz0rst1 tmm6, [r16 + 8*r14 + 268435456]
+
+// CHECK: t2rpntlvwz0rst1 tmm2, [r8 + 4*r17 + 291]
+// CHECK: encoding: [0x62,0xd5,0x78,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00]
+ t2rpntlvwz0rst1 tmm2, [r8 + 4*r17 + 291]
+
+// CHECK: t2rpntlvwz0rst1 tmm6, [r18 + 64]
+// CHECK: encoding: [0x62,0xfd,0x7c,0x08,0xf9,0x74,0x22,0x40]
+ t2rpntlvwz0rst1 tmm6, [r18 + 64]
+
+// CHECK: {evex} t2rpntlvwz0rst1 tmm2, [2*rbp - 32]
+// CHECK: encoding: [0x62,0xf5,0x7c,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff]
+ {evex} t2rpntlvwz0rst1 tmm2, [2*rbp - 32]
+
+// CHECK: t2rpntlvwz1rs tmm6, [r16 + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xbd,0x7d,0x08,0xf8,0xb4,0xf0,0x00,0x00,0x00,0x10]
+ t2rpntlvwz1rs tmm6, [r16 + 8*r14 + 268435456]
+
+// CHECK: t2rpntlvwz1rs tmm2, [r8 + 4*r17 + 291]
+// CHECK: encoding: [0x62,0xd5,0x79,0x08,0xf8,0x94,0x88,0x23,0x01,0x00,0x00]
+ t2rpntlvwz1rs tmm2, [r8 + 4*r17 + 291]
+
+// CHECK: t2rpntlvwz1rs tmm6, [r18 + 64]
+// CHECK: encoding: [0x62,0xfd,0x7d,0x08,0xf8,0x74,0x22,0x40]
+ t2rpntlvwz1rs tmm6, [r18 + 64]
+
+// CHECK: {evex} t2rpntlvwz1rs tmm2, [2*rbp - 32]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0xf8,0x14,0x6d,0xe0,0xff,0xff,0xff]
+ {evex} t2rpntlvwz1rs tmm2, [2*rbp - 32]
+
+// CHECK: t2rpntlvwz1rst1 tmm6, [r16 + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xbd,0x7d,0x08,0xf9,0xb4,0xf0,0x00,0x00,0x00,0x10]
+ t2rpntlvwz1rst1 tmm6, [r16 + 8*r14 + 268435456]
+
+// CHECK: t2rpntlvwz1rst1 tmm2, [r8 + 4*r17 + 291]
+// CHECK: encoding: [0x62,0xd5,0x79,0x08,0xf9,0x94,0x88,0x23,0x01,0x00,0x00]
+ t2rpntlvwz1rst1 tmm2, [r8 + 4*r17 + 291]
+
+// CHECK: t2rpntlvwz1rst1 tmm6, [r18 + 64]
+// CHECK: encoding: [0x62,0xfd,0x7d,0x08,0xf9,0x74,0x22,0x40]
+ t2rpntlvwz1rst1 tmm6, [r18 + 64]
+
+// CHECK: {evex} t2rpntlvwz1rst1 tmm2, [2*rbp - 32]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0xf9,0x14,0x6d,0xe0,0xff,0xff,0xff]
+ {evex} t2rpntlvwz1rst1 tmm2, [2*rbp - 32]
+
+// CHECK: tileloaddrs tmm6, [r16 + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xba,0x7f,0x08,0x4a,0xb4,0xf0,0x00,0x00,0x00,0x10]
+ tileloaddrs tmm6, [r16 + 8*r14 + 268435456]
+
+// CHECK: tileloaddrs tmm3, [r8 + 4*r17 + 291]
+// CHECK: encoding: [0x62,0xd2,0x7b,0x08,0x4a,0x9c,0x88,0x23,0x01,0x00,0x00]
+ tileloaddrs tmm3, [r8 + 4*r17 + 291]
+
+// CHECK: tileloaddrs tmm6, [r18 + 64]
+// CHECK: encoding: [0x62,0xfa,0x7f,0x08,0x4a,0x74,0x22,0x40]
+ tileloaddrs tmm6, [r18 + 64]
+
+// CHECK: {evex} tileloaddrs tmm3, [2*rbp - 32]
+// CHECK: encoding: [0x62,0xf2,0x7f,0x08,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff]
+ {evex} tileloaddrs tmm3, [2*rbp - 32]
+
+// CHECK: tileloaddrst1 tmm6, [r16 + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xba,0x7d,0x08,0x4a,0xb4,0xf0,0x00,0x00,0x00,0x10]
+ tileloaddrst1 tmm6, [r16 + 8*r14 + 268435456]
+
+// CHECK: tileloaddrst1 tmm3, [r8 + 4*r17 + 291]
+// CHECK: encoding: [0x62,0xd2,0x79,0x08,0x4a,0x9c,0x88,0x23,0x01,0x00,0x00]
+ tileloaddrst1 tmm3, [r8 + 4*r17 + 291]
+
+// CHECK: tileloaddrst1 tmm6, [r18 + 64]
+// CHECK: encoding: [0x62,0xfa,0x7d,0x08,0x4a,0x74,0x22,0x40]
+ tileloaddrst1 tmm6, [r18 + 64]
+
+// CHECK: {evex} tileloaddrst1 tmm3, [2*rbp - 32]
+// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x4a,0x1c,0x6d,0xe0,0xff,0xff,0xff]
+ {evex} tileloaddrst1 tmm3, [2*rbp - 32]
diff --git a/llvm/test/MC/X86/amx-transpose-att.s b/llvm/test/MC/X86/amx-transpose-att.s
index 21bbf258ac6ef8..5158470f8c9053 100644
--- a/llvm/test/MC/X86/amx-transpose-att.s
+++ b/llvm/test/MC/X86/amx-transpose-att.s
@@ -48,6 +48,54 @@
// CHECK: encoding: [0xc4,0xe2,0x79,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff]
t2rpntlvwz1t1 -32(,%rbp,2), %tmm2
+// CHECK: t2rpntlvwz0 268435456(%r16,%r14,8), %tmm4
+// CHECK: encoding: [0x62,0xba,0x7c,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10]
+ t2rpntlvwz0 268435456(%r16,%r14,8), %tmm4
+
+// CHECK: t2rpntlvwz0 291(%r8,%r17,4), %tmm2
+// CHECK: encoding: [0x62,0xd2,0x78,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00]
+ t2rpntlvwz0 291(%r8,%r17,4), %tmm2
+
+// CHECK: {evex} t2rpntlvwz0 -32(,%rbp,2), %tmm2
+// CHECK: encoding: [0x62,0xf2,0x7c,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff]
+ {evex} t2rpntlvwz0 -32(,%rbp,2), %tmm2
+
+// CHECK: t2rpntlvwz0t1 268435456(%r16,%r14,8), %tmm4
+// CHECK: encoding: [0x62,0xba,0x7c,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10]
+ t2rpntlvwz0t1 268435456(%r16,%r14,8), %tmm4
+
+// CHECK: t2rpntlvwz0t1 291(%r8,%r17,4), %tmm2
+// CHECK: encoding: [0x62,0xd2,0x78,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00]
+ t2rpntlvwz0t1 291(%r8,%r17,4), %tmm2
+
+// CHECK: {evex} t2rpntlvwz0t1 -32(,%rbp,2), %tmm2
+// CHECK: encoding: [0x62,0xf2,0x7c,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff]
+ {evex} t2rpntlvwz0t1 -32(,%rbp,2), %tmm2
+
+// CHECK: t2rpntlvwz1 268435456(%r16,%r14,8), %tmm4
+// CHECK: encoding: [0x62,0xba,0x7d,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10]
+ t2rpntlvwz1 268435456(%r16,%r14,8), %tmm4
+
+// CHECK: t2rpntlvwz1 291(%r8,%r17,4), %tmm2
+// CHECK: encoding: [0x62,0xd2,0x79,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00]
+ t2rpntlvwz1 291(%r8,%r17,4), %tmm2
+
+// CHECK: {evex} t2rpntlvwz1 -32(,%rbp,2), %tmm2
+// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff]
+ {evex} t2rpntlvwz1 -32(,%rbp,2), %tmm2
+
+// CHECK: t2rpntlvwz1t1 268435456(%r16,%r14,8), %tmm4
+// CHECK: encoding: [0x62,0xba,0x7d,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10]
+ t2rpntlvwz1t1 268435456(%r16,%r14,8), %tmm4
+
+// CHECK: t2rpntlvwz1t1 291(%r8,%r17,4), %tmm2
+// CHECK: encoding: [0x62,0xd2,0x79,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00]
+ t2rpntlvwz1t1 291(%r8,%r17,4), %tmm2
+
+// CHECK: {evex} t2rpntlvwz1t1 -32(,%rbp,2), %tmm2
+// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff]
+ {evex} t2rpntlvwz1t1 -32(,%rbp,2), %tmm2
+
// CHECK: ttransposed %tmm1, %tmm5
// CHECK: encoding: [0xc4,0xe2,0x7a,0x5f,0xe9]
ttransposed %tmm1, %tmm5
diff --git a/llvm/test/MC/X86/amx-transpose-intel.s b/llvm/test/MC/X86/amx-transpose-intel.s
index a772232ddbbf2e..0d2c22f67a1731 100644
--- a/llvm/test/MC/X86/amx-transpose-intel.s
+++ b/llvm/test/MC/X86/amx-transpose-intel.s
@@ -48,6 +48,54 @@
// CHECK: encoding: [0xc4,0xe2,0x79,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff]
t2rpntlvwz1t1 tmm2, [2*rbp - 32]
+// CHECK: t2rpntlvwz0 tmm4, [r16 + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xba,0x7c,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10]
+ t2rpntlvwz0 tmm4, [r16 + 8*r14 + 268435456]
+
+// CHECK: t2rpntlvwz0 tmm2, [r8 + 4*r17 + 291]
+// CHECK: encoding: [0x62,0xd2,0x78,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00]
+ t2rpntlvwz0 tmm2, [r8 + 4*r17 + 291]
+
+// CHECK: {evex} t2rpntlvwz0 tmm2, [2*rbp - 32]
+// CHECK: encoding: [0x62,0xf2,0x7c,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff]
+ {evex} t2rpntlvwz0 tmm2, [2*rbp - 32]
+
+// CHECK: t2rpntlvwz0t1 tmm4, [r16 + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xba,0x7c,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10]
+ t2rpntlvwz0t1 tmm4, [r16 + 8*r14 + 268435456]
+
+// CHECK: t2rpntlvwz0t1 tmm2, [r8 + 4*r17 + 291]
+// CHECK: encoding: [0x62,0xd2,0x78,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00]
+ t2rpntlvwz0t1 tmm2, [r8 + 4*r17 + 291]
+
+// CHECK: {evex} t2rpntlvwz0t1 tmm2, [2*rbp - 32]
+// CHECK: encoding: [0x62,0xf2,0x7c,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff]
+ {evex} t2rpntlvwz0t1 tmm2, [2*rbp - 32]
+
+// CHECK: t2rpntlvwz1 tmm4, [r16 + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xba,0x7d,0x08,0x6e,0xa4,0xf0,0x00,0x00,0x00,0x10]
+ t2rpntlvwz1 tmm4, [r16 + 8*r14 + 268435456]
+
+// CHECK: t2rpntlvwz1 tmm2, [r8 + 4*r17 + 291]
+// CHECK: encoding: [0x62,0xd2,0x79,0x08,0x6e,0x94,0x88,0x23,0x01,0x00,0x00]
+ t2rpntlvwz1 tmm2, [r8 + 4*r17 + 291]
+
+// CHECK: {evex} t2rpntlvwz1 tmm2, [2*rbp - 32]
+// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x6e,0x14,0x6d,0xe0,0xff,0xff,0xff]
+ {evex} t2rpntlvwz1 tmm2, [2*rbp - 32]
+
+// CHECK: t2rpntlvwz1t1 tmm4, [r16 + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xba,0x7d,0x08,0x6f,0xa4,0xf0,0x00,0x00,0x00,0x10]
+ t2rpntlvwz1t1 tmm4, [r16 + 8*r14 + 268435456]
+
+// CHECK: t2rpntlvwz1t1 tmm2, [r8 + 4*r17 + 291]
+// CHECK: encoding: [0x62,0xd2,0x79,0x08,0x6f,0x94,0x88,0x23,0x01,0x00,0x00]
+ t2rpntlvwz1t1 tmm2, [r8 + 4*r17 + 291]
+
+// CHECK: {evex} t2rpntlvwz1t1 tmm2, [2*rbp - 32]
+// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x6f,0x14,0x6d,0xe0,0xff,0xff,0xff]
+ {evex} t2rpntlvwz1t1 tmm2, [2*rbp - 32]
+
// CHECK: ttransposed tmm5, tmm1
// CHECK: encoding: [0xc4,0xe2,0x7a,0x5f,0xe9]
ttransposed tmm5, tmm1
diff --git a/llvm/test/TableGen/x86-instr-mapping.inc b/llvm/test/TableGen/x86-instr-mapping.inc
index ed43684db2dfc4..c1a64eb41f7848 100644
--- a/llvm/test/TableGen/x86-instr-mapping.inc
+++ b/llvm/test/TableGen/x86-instr-mapping.inc
@@ -163,6 +163,16 @@ static const X86TableEntry X86CompressEVEXTable[] = {
{ X86::SHRX64rm_EVEX, X86::SHRX64rm },
{ X86::SHRX64rr_EVEX, X86::SHRX64rr },
{ X86::STTILECFG_EVEX, X86::STTILECFG },
+ { X86::T2RPNTLVWZ0RST1_EVEX, X86::T2RPNTLVWZ0RST1 },
+ { X86::T2RPNTLVWZ0RS_EVEX, X86::T2RPNTLVWZ0RS },
+ { X86::T2RPNTLVWZ0T1_EVEX, X86::T2RPNTLVWZ0T1 },
+ { X86::T2RPNTLVWZ0_EVEX, X86::T2RPNTLVWZ0 },
+ { X86::T2RPNTLVWZ1RST1_EVEX, X86::T2RPNTLVWZ1RST1 },
+ { X86::T2RPNTLVWZ1RS_EVEX, X86::T2RPNTLVWZ1RS },
+ { X86::T2RPNTLVWZ1T1_EVEX, X86::T2RPNTLVWZ1T1 },
+ { X86::T2RPNTLVWZ1_EVEX, X86::T2RPNTLVWZ1 },
+ { X86::TILELOADDRST1_EVEX, X86::TILELOADDRST1 },
+ { X86::TILELOADDRS_EVEX, X86::TILELOADDRS },
{ X86::TILELOADDT1_EVEX, X86::TILELOADDT1 },
{ X86::TILELOADD_EVEX, X86::TILELOADD },
{ X86::TILESTORED_EVEX, X86::TILESTORED },
>From 868a7bbafae8bc0fc71157b016efa8ce7a599641 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Fri, 17 Jan 2025 15:56:52 +0800
Subject: [PATCH 2/2] Address review comment
---
llvm/lib/Target/X86/X86InstrAMX.td | 115 +++----
llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll | 170 +++++------
.../X86/amx_movrs_transpose_intrinsics.ll | 56 ++--
.../CodeGen/X86/amx_transpose_intrinsics.ll | 284 +++++++++---------
4 files changed, 289 insertions(+), 336 deletions(-)
diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td
index b5d99f52f15c23..85046228bc8c57 100644
--- a/llvm/lib/Target/X86/X86InstrAMX.td
+++ b/llvm/lib/Target/X86/X86InstrAMX.td
@@ -345,24 +345,31 @@ let Predicates = [HasAMXTILE, In64BitMode], isPseudo = true, SchedRW = [WriteSys
def PTILEPAIRLOAD : PseudoI<(outs TILEPair:$dst), (ins opaquemem:$src), []>;
}
-let Predicates = [HasAMXTRANSPOSE, In64BitMode] in {
- let SchedRW = [WriteSystem] in {
- def T2RPNTLVWZ0 : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
- (ins sibmem:$src), "t2rpntlvwz0\t{$src, $dst|$dst, $src}",
- []>, VEX, T8, PS;
+multiclass T2RPNTLVW_Base<bits<8> op1, bits<8> op2, string rs, string suffix> {
+ def Z0#rs#suffix : I<op1, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
+ "t2rpntlvwz0" #!tolower(rs)# "\t{$src, $dst|$dst, $src}", []>, PS;
+ def Z0#rs#T1#suffix : I<op2, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
+ "t2rpntlvwz0" #!tolower(rs)# "t1\t{$src, $dst|$dst, $src}", []>, PS;
+ def Z1#rs#suffix : I<op1, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
+ "t2rpntlvwz1" #!tolower(rs)# "\t{$src, $dst|$dst, $src}", []>, PD;
+ def Z1#rs#T1#suffix : I<op2, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
+ "t2rpntlvwz1" #!tolower(rs)# "t1\t{$src, $dst|$dst, $src}", []>, PD;
+}
- def T2RPNTLVWZ0T1 : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
- (ins sibmem:$src), "t2rpntlvwz0t1\t{$src, $dst|$dst, $src}",
- []>, VEX, T8, PS;
+let Predicates = [HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in
+ defm T2RPNTLVW : T2RPNTLVW_Base<0x6e, 0x6f, "", "">, T8, VEX;
- def T2RPNTLVWZ1 : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
- (ins sibmem:$src), "t2rpntlvwz1\t{$src, $dst|$dst, $src}",
- []>, VEX, T8, PD;
+let Predicates = [HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in
+ defm T2RPNTLVW : T2RPNTLVW_Base<0x6e, 0x6f, "", "_EVEX">, T8, EVEX, NoCD8;
- def T2RPNTLVWZ1T1 : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
- (ins sibmem:$src), "t2rpntlvwz1t1\t{$src, $dst|$dst, $src}",
- []>, VEX, T8, PD;
+let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in
+ defm T2RPNTLVW : T2RPNTLVW_Base<0xf8, 0xf9, "RS", "">, T_MAP5, VEX;
+let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in
+ defm T2RPNTLVW : T2RPNTLVW_Base<0xf8, 0xf9, "RS", "_EVEX">, T_MAP5, EVEX, NoCD8;
+
+let Predicates = [HasAMXTRANSPOSE, In64BitMode] in {
+ let SchedRW = [WriteSystem] in {
def TTRANSPOSED : I<0x5f, MRMSrcReg, (outs TILE:$dst), (ins TILE:$src),
"ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8, XS;
let isPseudo = true in {
@@ -491,22 +498,6 @@ let Predicates = [HasAMXCOMPLEX, HasAMXTRANSPOSE, In64BitMode], SchedRW = [Write
}
let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in {
- def T2RPNTLVWZ0RS : I<0xf8, MRMSrcMemFSIB, (outs TILEPair:$dst),
- (ins sibmem:$src1),
- "t2rpntlvwz0rs\t{$src1, $dst|$dst, $src1}",
- []>, VEX, T_MAP5;
- def T2RPNTLVWZ0RST1 : I<0xf9, MRMSrcMemFSIB, (outs TILEPair:$dst),
- (ins sibmem:$src1),
- "t2rpntlvwz0rst1\t{$src1, $dst|$dst, $src1}",
- []>, VEX, T_MAP5;
- def T2RPNTLVWZ1RS : I<0xf8, MRMSrcMemFSIB, (outs TILEPair:$dst),
- (ins sibmem:$src1),
- "t2rpntlvwz1rs\t{$src1, $dst|$dst, $src1}",
- []>, VEX, T_MAP5, PD;
- def T2RPNTLVWZ1RST1 : I<0xf9, MRMSrcMemFSIB, (outs TILEPair:$dst),
- (ins sibmem:$src1),
- "t2rpntlvwz1rst1\t{$src1, $dst|$dst, $src1}",
- []>, VEX, T_MAP5, PD;
let isPseudo = true in {
def PT2RPNTLVWZ0RSV : PseudoI<(outs TILEPair:$dst),
(ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
@@ -529,16 +520,20 @@ let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSy
}
} // HasAMXMOVRS, HasAMXTRANSPOSE
-let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in {
- def TILELOADDRS : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst),
- (ins sibmem:$src1),
- "tileloaddrs\t{$src1, $dst|$dst, $src1}",
- []>, VEX, T8, XD;
- def TILELOADDRST1 : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst),
- (ins sibmem:$src1),
- "tileloaddrst1\t{$src1, $dst|$dst, $src1}",
- []>, VEX, T8, PD;
+multiclass TILELOADDRS_Base<string suffix> {
+ def suffix : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src1),
+ "tileloaddrs\t{$src1, $dst|$dst, $src1}", []>, T8, XD;
+ def T1#suffix : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src1),
+ "tileloaddrst1\t{$src1, $dst|$dst, $src1}", []>, T8, PD;
+}
+
+let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in
+ defm TILELOADDRS : TILELOADDRS_Base<"">, VEX;
+let Predicates = [HasAMXMOVRS, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in
+ defm TILELOADDRS : TILELOADDRS_Base<"_EVEX">, EVEX, NoCD8;
+
+let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in {
let isPseudo = true, mayLoad = 1 in {
def PTILELOADDRSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
GR16:$src2,
@@ -554,48 +549,6 @@ let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in {
}
} // HasAMXMOVRS, In64BitMode
-let Predicates = [HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in {
- def T2RPNTLVWZ0_EVEX : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
- (ins sibmem:$src), "t2rpntlvwz0\t{$src, $dst|$dst, $src}",
- []>, EVEX, NoCD8, T8, PS;
-
- def T2RPNTLVWZ0T1_EVEX : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
- (ins sibmem:$src), "t2rpntlvwz0t1\t{$src, $dst|$dst, $src}",
- []>, EVEX, NoCD8, T8, PS;
-
- def T2RPNTLVWZ1_EVEX : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
- (ins sibmem:$src), "t2rpntlvwz1\t{$src, $dst|$dst, $src}",
- []>, EVEX, NoCD8, T8, PD;
-
- def T2RPNTLVWZ1T1_EVEX : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
- (ins sibmem:$src), "t2rpntlvwz1t1\t{$src, $dst|$dst, $src}",
- []>, EVEX, NoCD8, T8, PD;
-} // HasAMXTRANSPOSE, HasEGPR, In64BitMode
-
-let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in {
- def T2RPNTLVWZ0RS_EVEX : I<0xf8, MRMSrcMemFSIB, (outs TILEPair:$dst),
- (ins sibmem:$src1), "t2rpntlvwz0rs\t{$src1, $dst|$dst, $src1}",
- []>, EVEX, NoCD8, T_MAP5;
- def T2RPNTLVWZ0RST1_EVEX : I<0xf9, MRMSrcMemFSIB, (outs TILEPair:$dst),
- (ins sibmem:$src1), "t2rpntlvwz0rst1\t{$src1, $dst|$dst, $src1}",
- []>, EVEX, NoCD8, T_MAP5;
- def T2RPNTLVWZ1RS_EVEX : I<0xf8, MRMSrcMemFSIB, (outs TILEPair:$dst),
- (ins sibmem:$src1), "t2rpntlvwz1rs\t{$src1, $dst|$dst, $src1}",
- []>, EVEX, NoCD8, T_MAP5, PD;
- def T2RPNTLVWZ1RST1_EVEX : I<0xf9, MRMSrcMemFSIB, (outs TILEPair:$dst),
- (ins sibmem:$src1), "t2rpntlvwz1rst1\t{$src1, $dst|$dst, $src1}",
- []>, EVEX, NoCD8, T_MAP5, PD;
-} // HasAMXMOVRS, HasAMXTRANSPOSE, HasEGPR, In64BitMode
-
-let Predicates = [HasAMXMOVRS, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in {
- def TILELOADDRS_EVEX : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst),
- (ins sibmem:$src1), "tileloaddrs\t{$src1, $dst|$dst, $src1}",
- []>, EVEX, NoCD8, T8, XD;
- def TILELOADDRST1_EVEX : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst),
- (ins sibmem:$src1), "tileloaddrst1\t{$src1, $dst|$dst, $src1}",
- []>, EVEX, NoCD8, T8, PD;
-} // HasAMXMOVRS, HasEGPR, In64BitMode
-
multiclass m_tcvtrowd2ps {
let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
let SchedRW = [WriteSystem] in {
diff --git a/llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll b/llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll
index 67688326c17500..1b93ae029f27b9 100755
--- a/llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll
+++ b/llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-movrs | FileCheck %s
-; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-movrs,+egpr --show-mc-encoding | FileCheck %s --check-prefix=APXF
+; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-movrs,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
define void @test_amx_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
; CHECK-LABEL: test_amx_internal:
@@ -37,43 +37,43 @@ define void @test_amx_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
; CHECK-NEXT: tilerelease
; CHECK-NEXT: retq
;
-; APXF-LABEL: test_amx_internal:
-; APXF: # %bb.0: # %entry
-; APXF-NEXT: pushq %rbp # encoding: [0x55]
-; APXF-NEXT: .cfi_def_cfa_offset 16
-; APXF-NEXT: .cfi_offset %rbp, -16
-; APXF-NEXT: movq %rsp, %rbp # encoding: [0x48,0x89,0xe5]
-; APXF-NEXT: .cfi_def_cfa_register %rbp
-; APXF-NEXT: andq $-1024, %rsp # encoding: [0x48,0x81,0xe4,0x00,0xfc,0xff,0xff]
-; APXF-NEXT: # imm = 0xFC00
-; APXF-NEXT: subq $3072, %rsp # encoding: [0x48,0x81,0xec,0x00,0x0c,0x00,0x00]
-; APXF-NEXT: # imm = 0xC00
-; APXF-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
-; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xc0,0x03,0x00,0x00]
-; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xd0,0x03,0x00,0x00]
-; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xe0,0x03,0x00,0x00]
-; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xf0,0x03,0x00,0x00]
-; APXF-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xc0,0x03,0x00,0x00,0x01]
-; APXF-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; APXF-NEXT: # encoding: [0x48,0x89,0x8c,0x24,0xb8,0x03,0x00,0x00]
-; APXF-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
-; APXF-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; APXF-NEXT: # encoding: [0x48,0x8b,0xb4,0x24,0xb8,0x03,0x00,0x00]
-; APXF-NEXT: movw %ax, %cx # encoding: [0x66,0x89,0xc1]
-; APXF-NEXT: movw %di, %ax # encoding: [0x66,0x89,0xf8]
-; APXF-NEXT: # implicit-def: $al
-; APXF-NEXT: movb %al, {{[0-9]+}}(%rsp) # encoding: [0x88,0x84,0x24,0xf0,0x03,0x00,0x00]
-; APXF-NEXT: movw %cx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x8c,0x24,0xd0,0x03,0x00,0x00]
-; APXF-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
-; APXF-NEXT: tileloaddrs (%rdx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4a,0x04,0x32]
-; APXF-NEXT: movl $64, %esi # encoding: [0xbe,0x40,0x00,0x00,0x00]
-; APXF-NEXT: leaq {{[0-9]+}}(%rsp), %rdx # encoding: [0x48,0x8d,0x94,0x24,0x00,0x04,0x00,0x00]
-; APXF-NEXT: tilestored %tmm0, (%rdx,%rsi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x32]
-; APXF-NEXT: movq %rbp, %rsp # encoding: [0x48,0x89,0xec]
-; APXF-NEXT: popq %rbp # encoding: [0x5d]
-; APXF-NEXT: .cfi_def_cfa %rsp, 8
-; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
-; APXF-NEXT: retq # encoding: [0xc3]
+; EGPR-LABEL: test_amx_internal:
+; EGPR: # %bb.0: # %entry
+; EGPR-NEXT: pushq %rbp # encoding: [0x55]
+; EGPR-NEXT: .cfi_def_cfa_offset 16
+; EGPR-NEXT: .cfi_offset %rbp, -16
+; EGPR-NEXT: movq %rsp, %rbp # encoding: [0x48,0x89,0xe5]
+; EGPR-NEXT: .cfi_def_cfa_register %rbp
+; EGPR-NEXT: andq $-1024, %rsp # encoding: [0x48,0x81,0xe4,0x00,0xfc,0xff,0xff]
+; EGPR-NEXT: # imm = 0xFC00
+; EGPR-NEXT: subq $3072, %rsp # encoding: [0x48,0x81,0xec,0x00,0x0c,0x00,0x00]
+; EGPR-NEXT: # imm = 0xC00
+; EGPR-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
+; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xc0,0x03,0x00,0x00]
+; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xd0,0x03,0x00,0x00]
+; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xe0,0x03,0x00,0x00]
+; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xf0,0x03,0x00,0x00]
+; EGPR-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xc0,0x03,0x00,0x00,0x01]
+; EGPR-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NEXT: # encoding: [0x48,0x89,0x8c,0x24,0xb8,0x03,0x00,0x00]
+; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
+; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; EGPR-NEXT: # encoding: [0x48,0x8b,0xb4,0x24,0xb8,0x03,0x00,0x00]
+; EGPR-NEXT: movw %ax, %cx # encoding: [0x66,0x89,0xc1]
+; EGPR-NEXT: movw %di, %ax # encoding: [0x66,0x89,0xf8]
+; EGPR-NEXT: # implicit-def: $al
+; EGPR-NEXT: movb %al, {{[0-9]+}}(%rsp) # encoding: [0x88,0x84,0x24,0xf0,0x03,0x00,0x00]
+; EGPR-NEXT: movw %cx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x8c,0x24,0xd0,0x03,0x00,0x00]
+; EGPR-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
+; EGPR-NEXT: tileloaddrs (%rdx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4a,0x04,0x32]
+; EGPR-NEXT: movl $64, %esi # encoding: [0xbe,0x40,0x00,0x00,0x00]
+; EGPR-NEXT: leaq {{[0-9]+}}(%rsp), %rdx # encoding: [0x48,0x8d,0x94,0x24,0x00,0x04,0x00,0x00]
+; EGPR-NEXT: tilestored %tmm0, (%rdx,%rsi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x32]
+; EGPR-NEXT: movq %rbp, %rsp # encoding: [0x48,0x89,0xec]
+; EGPR-NEXT: popq %rbp # encoding: [0x5d]
+; EGPR-NEXT: .cfi_def_cfa %rsp, 8
+; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
entry:
%t1 = call x86_amx @llvm.x86.tileloaddrs64.internal(i16 %m, i16 %n, ptr %buf, i64 %s)
%t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1)
@@ -88,11 +88,11 @@ define void @test_amx_old(i16 %m, i16 %n, ptr %buf) {
; CHECK-NEXT: tileloaddrs (%rdx,%rax), %tmm2
; CHECK-NEXT: retq
;
-; APXF-LABEL: test_amx_old:
-; APXF: # %bb.0: # %entry
-; APXF-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
-; APXF-NEXT: tileloaddrs (%rdx,%rax), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4a,0x14,0x02]
-; APXF-NEXT: retq # encoding: [0xc3]
+; EGPR-LABEL: test_amx_old:
+; EGPR: # %bb.0: # %entry
+; EGPR-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
+; EGPR-NEXT: tileloaddrs (%rdx,%rax), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4a,0x14,0x02]
+; EGPR-NEXT: retq # encoding: [0xc3]
entry:
call void @llvm.x86.tileloaddrs64(i8 2, ptr %buf, i64 32)
ret void
@@ -134,43 +134,43 @@ define void @test_amx_t1_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
; CHECK-NEXT: tilerelease
; CHECK-NEXT: retq
;
-; APXF-LABEL: test_amx_t1_internal:
-; APXF: # %bb.0: # %entry
-; APXF-NEXT: pushq %rbp # encoding: [0x55]
-; APXF-NEXT: .cfi_def_cfa_offset 16
-; APXF-NEXT: .cfi_offset %rbp, -16
-; APXF-NEXT: movq %rsp, %rbp # encoding: [0x48,0x89,0xe5]
-; APXF-NEXT: .cfi_def_cfa_register %rbp
-; APXF-NEXT: andq $-1024, %rsp # encoding: [0x48,0x81,0xe4,0x00,0xfc,0xff,0xff]
-; APXF-NEXT: # imm = 0xFC00
-; APXF-NEXT: subq $3072, %rsp # encoding: [0x48,0x81,0xec,0x00,0x0c,0x00,0x00]
-; APXF-NEXT: # imm = 0xC00
-; APXF-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
-; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xc0,0x03,0x00,0x00]
-; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xd0,0x03,0x00,0x00]
-; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xe0,0x03,0x00,0x00]
-; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xf0,0x03,0x00,0x00]
-; APXF-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xc0,0x03,0x00,0x00,0x01]
-; APXF-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; APXF-NEXT: # encoding: [0x48,0x89,0x8c,0x24,0xb8,0x03,0x00,0x00]
-; APXF-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
-; APXF-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; APXF-NEXT: # encoding: [0x48,0x8b,0xb4,0x24,0xb8,0x03,0x00,0x00]
-; APXF-NEXT: movw %ax, %cx # encoding: [0x66,0x89,0xc1]
-; APXF-NEXT: movw %di, %ax # encoding: [0x66,0x89,0xf8]
-; APXF-NEXT: # implicit-def: $al
-; APXF-NEXT: movb %al, {{[0-9]+}}(%rsp) # encoding: [0x88,0x84,0x24,0xf0,0x03,0x00,0x00]
-; APXF-NEXT: movw %cx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x8c,0x24,0xd0,0x03,0x00,0x00]
-; APXF-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
-; APXF-NEXT: tileloaddrst1 (%rdx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x4a,0x04,0x32]
-; APXF-NEXT: movl $64, %esi # encoding: [0xbe,0x40,0x00,0x00,0x00]
-; APXF-NEXT: leaq {{[0-9]+}}(%rsp), %rdx # encoding: [0x48,0x8d,0x94,0x24,0x00,0x04,0x00,0x00]
-; APXF-NEXT: tilestored %tmm0, (%rdx,%rsi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x32]
-; APXF-NEXT: movq %rbp, %rsp # encoding: [0x48,0x89,0xec]
-; APXF-NEXT: popq %rbp # encoding: [0x5d]
-; APXF-NEXT: .cfi_def_cfa %rsp, 8
-; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
-; APXF-NEXT: retq # encoding: [0xc3]
+; EGPR-LABEL: test_amx_t1_internal:
+; EGPR: # %bb.0: # %entry
+; EGPR-NEXT: pushq %rbp # encoding: [0x55]
+; EGPR-NEXT: .cfi_def_cfa_offset 16
+; EGPR-NEXT: .cfi_offset %rbp, -16
+; EGPR-NEXT: movq %rsp, %rbp # encoding: [0x48,0x89,0xe5]
+; EGPR-NEXT: .cfi_def_cfa_register %rbp
+; EGPR-NEXT: andq $-1024, %rsp # encoding: [0x48,0x81,0xe4,0x00,0xfc,0xff,0xff]
+; EGPR-NEXT: # imm = 0xFC00
+; EGPR-NEXT: subq $3072, %rsp # encoding: [0x48,0x81,0xec,0x00,0x0c,0x00,0x00]
+; EGPR-NEXT: # imm = 0xC00
+; EGPR-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
+; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xc0,0x03,0x00,0x00]
+; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xd0,0x03,0x00,0x00]
+; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xe0,0x03,0x00,0x00]
+; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xf0,0x03,0x00,0x00]
+; EGPR-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xc0,0x03,0x00,0x00,0x01]
+; EGPR-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NEXT: # encoding: [0x48,0x89,0x8c,0x24,0xb8,0x03,0x00,0x00]
+; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
+; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; EGPR-NEXT: # encoding: [0x48,0x8b,0xb4,0x24,0xb8,0x03,0x00,0x00]
+; EGPR-NEXT: movw %ax, %cx # encoding: [0x66,0x89,0xc1]
+; EGPR-NEXT: movw %di, %ax # encoding: [0x66,0x89,0xf8]
+; EGPR-NEXT: # implicit-def: $al
+; EGPR-NEXT: movb %al, {{[0-9]+}}(%rsp) # encoding: [0x88,0x84,0x24,0xf0,0x03,0x00,0x00]
+; EGPR-NEXT: movw %cx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x8c,0x24,0xd0,0x03,0x00,0x00]
+; EGPR-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
+; EGPR-NEXT: tileloaddrst1 (%rdx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x4a,0x04,0x32]
+; EGPR-NEXT: movl $64, %esi # encoding: [0xbe,0x40,0x00,0x00,0x00]
+; EGPR-NEXT: leaq {{[0-9]+}}(%rsp), %rdx # encoding: [0x48,0x8d,0x94,0x24,0x00,0x04,0x00,0x00]
+; EGPR-NEXT: tilestored %tmm0, (%rdx,%rsi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x32]
+; EGPR-NEXT: movq %rbp, %rsp # encoding: [0x48,0x89,0xec]
+; EGPR-NEXT: popq %rbp # encoding: [0x5d]
+; EGPR-NEXT: .cfi_def_cfa %rsp, 8
+; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
entry:
%t1 = call x86_amx @llvm.x86.tileloaddrst164.internal(i16 %m, i16 %n, ptr %buf, i64 %s)
%t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1)
@@ -185,11 +185,11 @@ define void @test_amx_t1_old(i16 %m, i16 %n, ptr %buf) {
; CHECK-NEXT: tileloaddrst1 (%rdx,%rax), %tmm2
; CHECK-NEXT: retq
;
-; APXF-LABEL: test_amx_t1_old:
-; APXF: # %bb.0: # %entry
-; APXF-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
-; APXF-NEXT: tileloaddrst1 (%rdx,%rax), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x4a,0x14,0x02]
-; APXF-NEXT: retq # encoding: [0xc3]
+; EGPR-LABEL: test_amx_t1_old:
+; EGPR: # %bb.0: # %entry
+; EGPR-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
+; EGPR-NEXT: tileloaddrst1 (%rdx,%rax), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x4a,0x14,0x02]
+; EGPR-NEXT: retq # encoding: [0xc3]
entry:
call void @llvm.x86.tileloaddrst164(i8 2, ptr %buf, i64 32)
ret void
diff --git a/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll b/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll
index 0d5b85f2bb1088..1f5758c804b2ba 100755
--- a/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll
+++ b/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs | FileCheck %s --check-prefixes=CHECK,O0
; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs | FileCheck %s --check-prefixes=CHECK,O2
-; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs,+egpr --show-mc-encoding | FileCheck %s --check-prefix=APXF
+; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
define void @test_amx(i64 %stride, i8* %addr1) #0 {
; CHECK-LABEL: test_amx:
@@ -12,13 +12,13 @@ define void @test_amx(i64 %stride, i8* %addr1) #0 {
; CHECK-NEXT: t2rpntlvwz1rst1 (%rsi,%rdi), %tmm2
; CHECK-NEXT: retq
;
-; APXF-LABEL: test_amx:
-; APXF: # %bb.0:
-; APXF-NEXT: t2rpntlvwz0rs (%rsi,%rdi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf8,0x04,0x3e]
-; APXF-NEXT: t2rpntlvwz0rst1 (%rsi,%rdi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf9,0x14,0x3e]
-; APXF-NEXT: t2rpntlvwz1rs (%rsi,%rdi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf8,0x04,0x3e]
-; APXF-NEXT: t2rpntlvwz1rst1 (%rsi,%rdi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf9,0x14,0x3e]
-; APXF-NEXT: retq # encoding: [0xc3]
+; EGPR-LABEL: test_amx:
+; EGPR: # %bb.0:
+; EGPR-NEXT: t2rpntlvwz0rs (%rsi,%rdi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf8,0x04,0x3e]
+; EGPR-NEXT: t2rpntlvwz0rst1 (%rsi,%rdi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf9,0x14,0x3e]
+; EGPR-NEXT: t2rpntlvwz1rs (%rsi,%rdi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf8,0x04,0x3e]
+; EGPR-NEXT: t2rpntlvwz1rst1 (%rsi,%rdi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf9,0x14,0x3e]
+; EGPR-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.t2rpntlvwz0rs(i8 1, i8* %addr1, i64 %stride)
call void @llvm.x86.t2rpntlvwz0rst1(i8 2, i8* %addr1, i64 %stride)
call void @llvm.x86.t2rpntlvwz1rs(i8 1, i8* %addr1, i64 %stride)
@@ -90,26 +90,26 @@ define void @test_amx2(i8* %base, i64 %stride) #0 {
; O2-NEXT: tilerelease
; O2-NEXT: retq
;
-; APXF-LABEL: test_amx2:
-; APXF: # %bb.0:
-; APXF-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
-; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xc0]
-; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xd0]
-; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xe0]
-; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xf0]
-; APXF-NEXT: movb $1, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xc0,0x01]
-; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf4,0x08]
-; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xd8,0x08,0x00]
-; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf5,0x08]
-; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xda,0x08,0x00]
-; APXF-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0xc0]
-; APXF-NEXT: movw $8, %ax # encoding: [0x66,0xb8,0x08,0x00]
-; APXF-NEXT: t2rpntlvwz0rs (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf8,0x24,0x37]
-; APXF-NEXT: t2rpntlvwz0rst1 (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf9,0x24,0x37]
-; APXF-NEXT: t2rpntlvwz1rs (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf8,0x24,0x37]
-; APXF-NEXT: t2rpntlvwz1rst1 (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf9,0x24,0x37]
-; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
-; APXF-NEXT: retq # encoding: [0xc3]
+; EGPR-LABEL: test_amx2:
+; EGPR: # %bb.0:
+; EGPR-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
+; EGPR-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xc0]
+; EGPR-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xd0]
+; EGPR-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xe0]
+; EGPR-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xf0]
+; EGPR-NEXT: movb $1, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xc0,0x01]
+; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf4,0x08]
+; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xd8,0x08,0x00]
+; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf5,0x08]
+; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xda,0x08,0x00]
+; EGPR-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0xc0]
+; EGPR-NEXT: movw $8, %ax # encoding: [0x66,0xb8,0x08,0x00]
+; EGPR-NEXT: t2rpntlvwz0rs (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf8,0x24,0x37]
+; EGPR-NEXT: t2rpntlvwz0rst1 (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf9,0x24,0x37]
+; EGPR-NEXT: t2rpntlvwz1rs (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf8,0x24,0x37]
+; EGPR-NEXT: t2rpntlvwz1rst1 (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf9,0x24,0x37]
+; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+; EGPR-NEXT: retq # encoding: [0xc3]
call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rs.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rst1.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rs.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
diff --git a/llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll b/llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll
index 7495f6c9af4577..4cfd97afe721bc 100644
--- a/llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll
+++ b/llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+amx-bf16,+amx-fp16,+amx-complex,+amx-transpose | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+amx-bf16,+amx-fp16,+amx-complex,+amx-transpose,+egpr --show-mc-encoding | FileCheck %s --check-prefix=APXF
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+amx-bf16,+amx-fp16,+amx-complex,+amx-transpose,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
define void @test_amx(i32 %rv32, i64 %stride, i64 %rvalue, i8* %addr1, <4 x float> %xmm) #0 {
; CHECK-LABEL: test_amx:
@@ -18,20 +18,20 @@ define void @test_amx(i32 %rv32, i64 %stride, i64 %rvalue, i8* %addr1, <4 x floa
; CHECK-NEXT: tconjtfp16 %tmm2, %tmm1
; CHECK-NEXT: retq
;
-; APXF-LABEL: test_amx:
-; APXF: # %bb.0:
-; APXF-NEXT: t2rpntlvwz0 (%rcx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x04,0x31]
-; APXF-NEXT: t2rpntlvwz0t1 (%rcx,%rsi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6f,0x14,0x31]
-; APXF-NEXT: t2rpntlvwz1 (%rcx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6e,0x04,0x31]
-; APXF-NEXT: t2rpntlvwz1t1 (%rcx,%rsi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6f,0x14,0x31]
-; APXF-NEXT: ttransposed %tmm3, %tmm1 # encoding: [0xc4,0xe2,0x7a,0x5f,0xcb]
-; APXF-NEXT: ttdpbf16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x62,0x6c,0xca]
-; APXF-NEXT: ttdpfp16ps %tmm6, %tmm5, %tmm4 # encoding: [0xc4,0xe2,0x4b,0x6c,0xe5]
-; APXF-NEXT: ttcmmimfp16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x63,0x6b,0xca]
-; APXF-NEXT: ttcmmrlfp16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x62,0x6b,0xca]
-; APXF-NEXT: tconjtcmmimfp16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x60,0x6b,0xca]
-; APXF-NEXT: tconjtfp16 %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x79,0x6b,0xca]
-; APXF-NEXT: retq # encoding: [0xc3]
+; EGPR-LABEL: test_amx:
+; EGPR: # %bb.0:
+; EGPR-NEXT: t2rpntlvwz0 (%rcx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x04,0x31]
+; EGPR-NEXT: t2rpntlvwz0t1 (%rcx,%rsi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6f,0x14,0x31]
+; EGPR-NEXT: t2rpntlvwz1 (%rcx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6e,0x04,0x31]
+; EGPR-NEXT: t2rpntlvwz1t1 (%rcx,%rsi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6f,0x14,0x31]
+; EGPR-NEXT: ttransposed %tmm3, %tmm1 # encoding: [0xc4,0xe2,0x7a,0x5f,0xcb]
+; EGPR-NEXT: ttdpbf16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x62,0x6c,0xca]
+; EGPR-NEXT: ttdpfp16ps %tmm6, %tmm5, %tmm4 # encoding: [0xc4,0xe2,0x4b,0x6c,0xe5]
+; EGPR-NEXT: ttcmmimfp16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x63,0x6b,0xca]
+; EGPR-NEXT: ttcmmrlfp16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x62,0x6b,0xca]
+; EGPR-NEXT: tconjtcmmimfp16ps %tmm3, %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x60,0x6b,0xca]
+; EGPR-NEXT: tconjtfp16 %tmm2, %tmm1 # encoding: [0xc4,0xe2,0x79,0x6b,0xca]
+; EGPR-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.t2rpntlvwz0(i8 1, i8* %addr1, i64 %stride)
call void @llvm.x86.t2rpntlvwz0t1(i8 2, i8* %addr1, i64 %stride)
call void @llvm.x86.t2rpntlvwz1(i8 1, i8* %addr1, i64 %stride)
@@ -95,45 +95,45 @@ define void @test_amx2(i8* %pointer, i8* %base, i64 %stride) #0 {
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
;
-; APXF-LABEL: test_amx2:
-; APXF: # %bb.0:
-; APXF-NEXT: pushq %rbp # encoding: [0x55]
-; APXF-NEXT: subq $2928, %rsp # encoding: [0x48,0x81,0xec,0x70,0x0b,0x00,0x00]
-; APXF-NEXT: # imm = 0xB70
-; APXF-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
-; APXF-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x44,0x24,0x0d]
-; APXF-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x40,0x03,0x00,0x00,0x01]
-; APXF-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x70,0x03,0x00,0x00,0x08]
-; APXF-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x50,0x03,0x00,0x00,0x08,0x00]
-; APXF-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x71,0x03,0x00,0x00,0x08]
-; APXF-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x52,0x03,0x00,0x00,0x08,0x00]
-; APXF-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x72,0x03,0x00,0x00,0x08]
-; APXF-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x54,0x03,0x00,0x00,0x08,0x00]
-; APXF-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x73,0x03,0x00,0x00,0x08]
-; APXF-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x56,0x03,0x00,0x00,0x08,0x00]
-; APXF-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0x40,0x03,0x00,0x00]
-; APXF-NEXT: movw $8, %ax # encoding: [0x66,0xb8,0x08,0x00]
-; APXF-NEXT: tileloadd (%rsi,%rdx), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x04,0x16]
-; APXF-NEXT: tilezero %tmm1 # encoding: [0xc4,0xe2,0x7b,0x49,0xc8]
-; APXF-NEXT: tilezero %tmm2 # encoding: [0xc4,0xe2,0x7b,0x49,0xd0]
-; APXF-NEXT: ttdpbf16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x72,0x6c,0xd0]
-; APXF-NEXT: ttdpfp16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x73,0x6c,0xd0]
-; APXF-NEXT: ttcmmimfp16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x73,0x6b,0xd0]
-; APXF-NEXT: ttcmmrlfp16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x72,0x6b,0xd0]
-; APXF-NEXT: movabsq $64, %rbp # encoding: [0x48,0xbd,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
-; APXF-NEXT: tilestored %tmm2, 896(%rsp,%rbp) # 1024-byte Folded Spill
-; APXF-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x94,0x2c,0x80,0x03,0x00,0x00]
-; APXF-NEXT: tileloadd 896(%rsp,%rbp), %tmm3 # 1024-byte Folded Reload
-; APXF-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x9c,0x2c,0x80,0x03,0x00,0x00]
-; APXF-NEXT: tconjtcmmimfp16ps %tmm1, %tmm0, %tmm3 # encoding: [0xc4,0xe2,0x70,0x6b,0xd8]
-; APXF-NEXT: tconjtfp16 %tmm3, %tmm0 # encoding: [0xc4,0xe2,0x79,0x6b,0xc3]
-; APXF-NEXT: tilestored %tmm2, (%rdi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x14,0x17]
-; APXF-NEXT: addq $2928, %rsp # encoding: [0x48,0x81,0xc4,0x70,0x0b,0x00,0x00]
-; APXF-NEXT: # imm = 0xB70
-; APXF-NEXT: popq %rbp # encoding: [0x5d]
-; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
-; APXF-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; APXF-NEXT: retq # encoding: [0xc3]
+; EGPR-LABEL: test_amx2:
+; EGPR: # %bb.0:
+; EGPR-NEXT: pushq %rbp # encoding: [0x55]
+; EGPR-NEXT: subq $2928, %rsp # encoding: [0x48,0x81,0xec,0x70,0x0b,0x00,0x00]
+; EGPR-NEXT: # imm = 0xB70
+; EGPR-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
+; EGPR-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x44,0x24,0x0d]
+; EGPR-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x40,0x03,0x00,0x00,0x01]
+; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x70,0x03,0x00,0x00,0x08]
+; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x50,0x03,0x00,0x00,0x08,0x00]
+; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x71,0x03,0x00,0x00,0x08]
+; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x52,0x03,0x00,0x00,0x08,0x00]
+; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x72,0x03,0x00,0x00,0x08]
+; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x54,0x03,0x00,0x00,0x08,0x00]
+; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0x73,0x03,0x00,0x00,0x08]
+; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0x56,0x03,0x00,0x00,0x08,0x00]
+; EGPR-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0x40,0x03,0x00,0x00]
+; EGPR-NEXT: movw $8, %ax # encoding: [0x66,0xb8,0x08,0x00]
+; EGPR-NEXT: tileloadd (%rsi,%rdx), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x04,0x16]
+; EGPR-NEXT: tilezero %tmm1 # encoding: [0xc4,0xe2,0x7b,0x49,0xc8]
+; EGPR-NEXT: tilezero %tmm2 # encoding: [0xc4,0xe2,0x7b,0x49,0xd0]
+; EGPR-NEXT: ttdpbf16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x72,0x6c,0xd0]
+; EGPR-NEXT: ttdpfp16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x73,0x6c,0xd0]
+; EGPR-NEXT: ttcmmimfp16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x73,0x6b,0xd0]
+; EGPR-NEXT: ttcmmrlfp16ps %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x72,0x6b,0xd0]
+; EGPR-NEXT: movabsq $64, %rbp # encoding: [0x48,0xbd,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+; EGPR-NEXT: tilestored %tmm2, 896(%rsp,%rbp) # 1024-byte Folded Spill
+; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x94,0x2c,0x80,0x03,0x00,0x00]
+; EGPR-NEXT: tileloadd 896(%rsp,%rbp), %tmm3 # 1024-byte Folded Reload
+; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x9c,0x2c,0x80,0x03,0x00,0x00]
+; EGPR-NEXT: tconjtcmmimfp16ps %tmm1, %tmm0, %tmm3 # encoding: [0xc4,0xe2,0x70,0x6b,0xd8]
+; EGPR-NEXT: tconjtfp16 %tmm3, %tmm0 # encoding: [0xc4,0xe2,0x79,0x6b,0xc3]
+; EGPR-NEXT: tilestored %tmm2, (%rdi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x14,0x17]
+; EGPR-NEXT: addq $2928, %rsp # encoding: [0x48,0x81,0xc4,0x70,0x0b,0x00,0x00]
+; EGPR-NEXT: # imm = 0xB70
+; EGPR-NEXT: popq %rbp # encoding: [0x5d]
+; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+; EGPR-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; EGPR-NEXT: retq # encoding: [0xc3]
%a = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, i8* %base, i64 %stride)
%b = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 8)
@@ -174,29 +174,29 @@ define void @test_amx3(i8* %pointer, i8* %base, i64 %stride) #0 {
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
;
-; APXF-LABEL: test_amx3:
-; APXF: # %bb.0:
-; APXF-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
-; APXF-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x44,0x24,0xff]
-; APXF-NEXT: movb $1, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xc0,0x01]
-; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf0,0x08]
-; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xd0,0x08,0x00]
-; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf4,0x08]
-; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xd8,0x08,0x00]
-; APXF-NEXT: movb $0, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf5,0x00]
-; APXF-NEXT: movw $0, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xda,0x00,0x00]
-; APXF-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0xc0]
-; APXF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; APXF-NEXT: movw $8, %cx # encoding: [0x66,0xb9,0x08,0x00]
-; APXF-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x24,0x16]
-; APXF-NEXT: t2rpntlvwz0t1 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6f,0x24,0x16]
-; APXF-NEXT: t2rpntlvwz1 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6e,0x24,0x16]
-; APXF-NEXT: t2rpntlvwz1t1 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6f,0x24,0x16]
-; APXF-NEXT: ttransposed %tmm4, %tmm0 # encoding: [0xc4,0xe2,0x7a,0x5f,0xc4]
-; APXF-NEXT: tilestored %tmm0, (%rdi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x17]
-; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
-; APXF-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; APXF-NEXT: retq # encoding: [0xc3]
+; EGPR-LABEL: test_amx3:
+; EGPR: # %bb.0:
+; EGPR-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
+; EGPR-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x44,0x24,0xff]
+; EGPR-NEXT: movb $1, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xc0,0x01]
+; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf0,0x08]
+; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xd0,0x08,0x00]
+; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf4,0x08]
+; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xd8,0x08,0x00]
+; EGPR-NEXT: movb $0, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf5,0x00]
+; EGPR-NEXT: movw $0, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xda,0x00,0x00]
+; EGPR-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0xc0]
+; EGPR-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; EGPR-NEXT: movw $8, %cx # encoding: [0x66,0xb9,0x08,0x00]
+; EGPR-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x24,0x16]
+; EGPR-NEXT: t2rpntlvwz0t1 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6f,0x24,0x16]
+; EGPR-NEXT: t2rpntlvwz1 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6e,0x24,0x16]
+; EGPR-NEXT: t2rpntlvwz1t1 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6f,0x24,0x16]
+; EGPR-NEXT: ttransposed %tmm4, %tmm0 # encoding: [0xc4,0xe2,0x7a,0x5f,0xc4]
+; EGPR-NEXT: tilestored %tmm0, (%rdi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x17]
+; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+; EGPR-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; EGPR-NEXT: retq # encoding: [0xc3]
%1 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 8, i16 8, i16 0, i8* %base, i64 %stride)
%2 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0t1.internal(i16 8, i16 8, i16 0, i8* %base, i64 %stride)
%3 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1.internal(i16 8, i16 8, i16 0, i8* %base, i64 %stride)
@@ -260,71 +260,71 @@ define void @test_amx_spill(i8* %pointer, i8* %base, i64 %stride) #0 {
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
;
-; APXF-LABEL: test_amx_spill:
-; APXF: # %bb.0:
-; APXF-NEXT: subq $6088, %rsp # encoding: [0x48,0x81,0xec,0xc8,0x17,0x00,0x00]
-; APXF-NEXT: # imm = 0x17C8
-; APXF-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
-; APXF-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x44,0x24,0xfe]
-; APXF-NEXT: movb $1, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0x80,0x01]
-; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb0,0x08]
-; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x90,0x08,0x00]
-; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb4,0x08]
-; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x98,0x08,0x00]
-; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb5,0x08]
-; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x9a,0x08,0x00]
-; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb6,0x08]
-; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x9c,0x08,0x00]
-; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb7,0x08]
-; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x9e,0x08,0x00]
-; APXF-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0x80]
-; APXF-NEXT: movw $8, %ax # encoding: [0x66,0xb8,0x08,0x00]
-; APXF-NEXT: tileloadd (%rsi,%rdx), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x04,0x16]
-; APXF-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x24,0x16]
-; APXF-NEXT: t2rpntlvwz0t1 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6f,0x34,0x16]
-; APXF-NEXT: movabsq $64, %rcx # encoding: [0x48,0xb9,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
-; APXF-NEXT: tilestored %tmm6, 4032(%rsp,%rcx) # 1024-byte Folded Spill
-; APXF-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xb4,0x0c,0xc0,0x0f,0x00,0x00]
-; APXF-NEXT: tilestored %tmm7, 5056(%rsp,%rcx) # 1024-byte Folded Spill
-; APXF-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xbc,0x0c,0xc0,0x13,0x00,0x00]
-; APXF-NEXT: t2rpntlvwz1 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6e,0x34,0x16]
-; APXF-NEXT: tilestored %tmm6, 1984(%rsp,%rcx) # 1024-byte Folded Spill
-; APXF-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xb4,0x0c,0xc0,0x07,0x00,0x00]
-; APXF-NEXT: tilestored %tmm7, 3008(%rsp,%rcx) # 1024-byte Folded Spill
-; APXF-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xbc,0x0c,0xc0,0x0b,0x00,0x00]
-; APXF-NEXT: t2rpntlvwz1t1 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6f,0x34,0x16]
-; APXF-NEXT: tilestored %tmm6, -64(%rsp,%rcx) # 1024-byte Folded Spill
-; APXF-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0x74,0x0c,0xc0]
-; APXF-NEXT: tilestored %tmm7, 960(%rsp,%rcx) # 1024-byte Folded Spill
-; APXF-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xbc,0x0c,0xc0,0x03,0x00,0x00]
-; APXF-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x34,0x16]
-; APXF-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16]
-; APXF-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16]
-; APXF-NEXT: tileloadd 4032(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload
-; APXF-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xa4,0x0c,0xc0,0x0f,0x00,0x00]
-; APXF-NEXT: tileloadd 5056(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload
-; APXF-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xac,0x0c,0xc0,0x13,0x00,0x00]
-; APXF-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16]
-; APXF-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16]
-; APXF-NEXT: tileloadd 1984(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload
-; APXF-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xa4,0x0c,0xc0,0x07,0x00,0x00]
-; APXF-NEXT: tileloadd 3008(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload
-; APXF-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xac,0x0c,0xc0,0x0b,0x00,0x00]
-; APXF-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16]
-; APXF-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16]
-; APXF-NEXT: tileloadd -64(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload
-; APXF-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0x64,0x0c,0xc0]
-; APXF-NEXT: tileloadd 960(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload
-; APXF-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xac,0x0c,0xc0,0x03,0x00,0x00]
-; APXF-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16]
-; APXF-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16]
-; APXF-NEXT: tilestored %tmm6, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x34,0x16]
-; APXF-NEXT: tilestored %tmm7, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x3c,0x16]
-; APXF-NEXT: addq $6088, %rsp # encoding: [0x48,0x81,0xc4,0xc8,0x17,0x00,0x00]
-; APXF-NEXT: # imm = 0x17C8
-; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
-; APXF-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
-; APXF-NEXT: retq # encoding: [0xc3]
+; EGPR-LABEL: test_amx_spill:
+; EGPR: # %bb.0:
+; EGPR-NEXT: subq $6088, %rsp # encoding: [0x48,0x81,0xec,0xc8,0x17,0x00,0x00]
+; EGPR-NEXT: # imm = 0x17C8
+; EGPR-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
+; EGPR-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x44,0x24,0xfe]
+; EGPR-NEXT: movb $1, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0x80,0x01]
+; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb0,0x08]
+; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x90,0x08,0x00]
+; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb4,0x08]
+; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x98,0x08,0x00]
+; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb5,0x08]
+; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x9a,0x08,0x00]
+; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb6,0x08]
+; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x9c,0x08,0x00]
+; EGPR-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xb7,0x08]
+; EGPR-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x9e,0x08,0x00]
+; EGPR-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0x80]
+; EGPR-NEXT: movw $8, %ax # encoding: [0x66,0xb8,0x08,0x00]
+; EGPR-NEXT: tileloadd (%rsi,%rdx), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x04,0x16]
+; EGPR-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x24,0x16]
+; EGPR-NEXT: t2rpntlvwz0t1 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6f,0x34,0x16]
+; EGPR-NEXT: movabsq $64, %rcx # encoding: [0x48,0xb9,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+; EGPR-NEXT: tilestored %tmm6, 4032(%rsp,%rcx) # 1024-byte Folded Spill
+; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xb4,0x0c,0xc0,0x0f,0x00,0x00]
+; EGPR-NEXT: tilestored %tmm7, 5056(%rsp,%rcx) # 1024-byte Folded Spill
+; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xbc,0x0c,0xc0,0x13,0x00,0x00]
+; EGPR-NEXT: t2rpntlvwz1 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6e,0x34,0x16]
+; EGPR-NEXT: tilestored %tmm6, 1984(%rsp,%rcx) # 1024-byte Folded Spill
+; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xb4,0x0c,0xc0,0x07,0x00,0x00]
+; EGPR-NEXT: tilestored %tmm7, 3008(%rsp,%rcx) # 1024-byte Folded Spill
+; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xbc,0x0c,0xc0,0x0b,0x00,0x00]
+; EGPR-NEXT: t2rpntlvwz1t1 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x6f,0x34,0x16]
+; EGPR-NEXT: tilestored %tmm6, -64(%rsp,%rcx) # 1024-byte Folded Spill
+; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0x74,0x0c,0xc0]
+; EGPR-NEXT: tilestored %tmm7, 960(%rsp,%rcx) # 1024-byte Folded Spill
+; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7a,0x4b,0xbc,0x0c,0xc0,0x03,0x00,0x00]
+; EGPR-NEXT: t2rpntlvwz0 (%rsi,%rdx), %tmm6 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x6e,0x34,0x16]
+; EGPR-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16]
+; EGPR-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16]
+; EGPR-NEXT: tileloadd 4032(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload
+; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xa4,0x0c,0xc0,0x0f,0x00,0x00]
+; EGPR-NEXT: tileloadd 5056(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload
+; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xac,0x0c,0xc0,0x13,0x00,0x00]
+; EGPR-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16]
+; EGPR-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16]
+; EGPR-NEXT: tileloadd 1984(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload
+; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xa4,0x0c,0xc0,0x07,0x00,0x00]
+; EGPR-NEXT: tileloadd 3008(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload
+; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xac,0x0c,0xc0,0x0b,0x00,0x00]
+; EGPR-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16]
+; EGPR-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16]
+; EGPR-NEXT: tileloadd -64(%rsp,%rcx), %tmm4 # 1024-byte Folded Reload
+; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0x64,0x0c,0xc0]
+; EGPR-NEXT: tileloadd 960(%rsp,%rcx), %tmm5 # 1024-byte Folded Reload
+; EGPR-NEXT: # encoding: [0xc4,0xe2,0x7b,0x4b,0xac,0x0c,0xc0,0x03,0x00,0x00]
+; EGPR-NEXT: tilestored %tmm4, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x24,0x16]
+; EGPR-NEXT: tilestored %tmm5, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x2c,0x16]
+; EGPR-NEXT: tilestored %tmm6, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x34,0x16]
+; EGPR-NEXT: tilestored %tmm7, (%rsi,%rdx) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x3c,0x16]
+; EGPR-NEXT: addq $6088, %rsp # encoding: [0x48,0x81,0xc4,0xc8,0x17,0x00,0x00]
+; EGPR-NEXT: # imm = 0x17C8
+; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+; EGPR-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; EGPR-NEXT: retq # encoding: [0xc3]
%a = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, i8* %base, i64 %stride)
%b1 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
%b2 = call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0t1.internal(i16 8, i16 8, i16 8, i8* %base, i64 %stride)
More information about the llvm-commits
mailing list