[llvm] [X86][APX] Support APX + AMX-MOVRS/AMX-TRANSPOSE (PR #123267)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 16 17:47:01 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Phoebe Wang (phoebewang)
<details>
<summary>Changes</summary>
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/784266
---
Patch is 55.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123267.diff
13 Files Affected:
- (modified) llvm/lib/Target/X86/X86ExpandPseudo.cpp (+10-10)
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+13-11)
- (modified) llvm/lib/Target/X86/X86InstrAMX.td (+47-5)
- (modified) llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll (+89)
- (modified) llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll (+30)
- (modified) llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll (+146)
- (modified) llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-movrs.txt (+96)
- (modified) llvm/test/MC/Disassembler/X86/amx-transpose-att.txt (+48)
- (modified) llvm/test/MC/X86/AMX/x86-64-amx-movrs-att.s (+89-1)
- (modified) llvm/test/MC/X86/AMX/x86-64-amx-movrs-intel.s (+96)
- (modified) llvm/test/MC/X86/amx-transpose-att.s (+48)
- (modified) llvm/test/MC/X86/amx-transpose-intel.s (+48)
- (modified) llvm/test/TableGen/x86-instr-mapping.inc (+10)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index fc8a0eaed140d0..7fbba7f05e0a5e 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -578,10 +578,10 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
unsigned Opc;
switch (Opcode) {
case X86::PTILELOADDRSV:
- Opc = X86::TILELOADDRS;
+ Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRS);
break;
case X86::PTILELOADDRST1V:
- Opc = X86::TILELOADDRST1;
+ Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRST1);
break;
case X86::PTILELOADDV:
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD);
@@ -737,28 +737,28 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
unsigned Opc;
switch (Opcode) {
case X86::PT2RPNTLVWZ0V:
- Opc = X86::T2RPNTLVWZ0;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0);
break;
case X86::PT2RPNTLVWZ0T1V:
- Opc = X86::T2RPNTLVWZ0T1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0T1);
break;
case X86::PT2RPNTLVWZ1V:
- Opc = X86::T2RPNTLVWZ1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1);
break;
case X86::PT2RPNTLVWZ1T1V:
- Opc = X86::T2RPNTLVWZ1T1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1T1);
break;
case X86::PT2RPNTLVWZ0RSV:
- Opc = X86::T2RPNTLVWZ0RS;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RS);
break;
case X86::PT2RPNTLVWZ0RST1V:
- Opc = X86::T2RPNTLVWZ0RST1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RST1);
break;
case X86::PT2RPNTLVWZ1RSV:
- Opc = X86::T2RPNTLVWZ1RS;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RS);
break;
case X86::PT2RPNTLVWZ1RST1V:
- Opc = X86::T2RPNTLVWZ1RST1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RST1);
break;
default:
llvm_unreachable("Impossible Opcode!");
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 90e3e15b1fb46c..6d69665c17565a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37800,14 +37800,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTILESTORED:
Opc = GET_EGPR_IF_ENABLED(X86::TILESTORED);
break;
-#undef GET_EGPR_IF_ENABLED
case X86::PTILELOADDRS:
- Opc = X86::TILELOADDRS;
+ Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRS);
break;
case X86::PTILELOADDRST1:
- Opc = X86::TILELOADDRST1;
+ Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRST1);
break;
}
+#undef GET_EGPR_IF_ENABLED
MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc));
unsigned CurOp = 0;
@@ -37838,34 +37838,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PT2RPNTLVWZ1RST1: {
const DebugLoc &DL = MI.getDebugLoc();
unsigned Opc;
+#define GET_EGPR_IF_ENABLED(OPC) (Subtarget.hasEGPR() ? OPC##_EVEX : OPC)
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected instruction!");
case X86::PT2RPNTLVWZ0:
- Opc = X86::T2RPNTLVWZ0;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0);
break;
case X86::PT2RPNTLVWZ0T1:
- Opc = X86::T2RPNTLVWZ0T1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0T1);
break;
case X86::PT2RPNTLVWZ1:
- Opc = X86::T2RPNTLVWZ1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1);
break;
case X86::PT2RPNTLVWZ1T1:
- Opc = X86::T2RPNTLVWZ1T1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1T1);
break;
case X86::PT2RPNTLVWZ0RS:
- Opc = X86::T2RPNTLVWZ0RS;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RS);
break;
case X86::PT2RPNTLVWZ0RST1:
- Opc = X86::T2RPNTLVWZ0RST1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RST1);
break;
case X86::PT2RPNTLVWZ1RS:
- Opc = X86::T2RPNTLVWZ1RS;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RS);
break;
case X86::PT2RPNTLVWZ1RST1:
- Opc = X86::T2RPNTLVWZ1RST1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RST1);
break;
}
+#undef GET_EGPR_IF_ENABLED
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
MIB.addReg(TMMImmToTMMPair(MI.getOperand(0).getImm()), RegState::Define);
diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td
index a055ba91d3e171..b5d99f52f15c23 100644
--- a/llvm/lib/Target/X86/X86InstrAMX.td
+++ b/llvm/lib/Target/X86/X86InstrAMX.td
@@ -349,22 +349,22 @@ let Predicates = [HasAMXTRANSPOSE, In64BitMode] in {
let SchedRW = [WriteSystem] in {
def T2RPNTLVWZ0 : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src), "t2rpntlvwz0\t{$src, $dst|$dst, $src}",
- []>, VEX, WIG, T8,PS;
+ []>, VEX, T8, PS;
def T2RPNTLVWZ0T1 : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src), "t2rpntlvwz0t1\t{$src, $dst|$dst, $src}",
- []>, VEX, T8,PS;
+ []>, VEX, T8, PS;
def T2RPNTLVWZ1 : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src), "t2rpntlvwz1\t{$src, $dst|$dst, $src}",
- []>, VEX, T8,PD;
+ []>, VEX, T8, PD;
def T2RPNTLVWZ1T1 : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src), "t2rpntlvwz1t1\t{$src, $dst|$dst, $src}",
- []>, VEX, T8,PD;
+ []>, VEX, T8, PD;
def TTRANSPOSED : I<0x5f, MRMSrcReg, (outs TILE:$dst), (ins TILE:$src),
- "ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8,XS;
+ "ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8, XS;
let isPseudo = true in {
def PT2RPNTLVWZ0V : PseudoI<(outs TILEPair:$dst),
(ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
@@ -554,6 +554,48 @@ let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in {
}
} // HasAMXMOVRS, In64BitMode
+let Predicates = [HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in {
+ def T2RPNTLVWZ0_EVEX : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src), "t2rpntlvwz0\t{$src, $dst|$dst, $src}",
+ []>, EVEX, NoCD8, T8, PS;
+
+ def T2RPNTLVWZ0T1_EVEX : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src), "t2rpntlvwz0t1\t{$src, $dst|$dst, $src}",
+ []>, EVEX, NoCD8, T8, PS;
+
+ def T2RPNTLVWZ1_EVEX : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src), "t2rpntlvwz1\t{$src, $dst|$dst, $src}",
+ []>, EVEX, NoCD8, T8, PD;
+
+ def T2RPNTLVWZ1T1_EVEX : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src), "t2rpntlvwz1t1\t{$src, $dst|$dst, $src}",
+ []>, EVEX, NoCD8, T8, PD;
+} // HasAMXTRANSPOSE, HasEGPR, In64BitMode
+
+let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in {
+ def T2RPNTLVWZ0RS_EVEX : I<0xf8, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src1), "t2rpntlvwz0rs\t{$src1, $dst|$dst, $src1}",
+ []>, EVEX, NoCD8, T_MAP5;
+ def T2RPNTLVWZ0RST1_EVEX : I<0xf9, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src1), "t2rpntlvwz0rst1\t{$src1, $dst|$dst, $src1}",
+ []>, EVEX, NoCD8, T_MAP5;
+ def T2RPNTLVWZ1RS_EVEX : I<0xf8, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src1), "t2rpntlvwz1rs\t{$src1, $dst|$dst, $src1}",
+ []>, EVEX, NoCD8, T_MAP5, PD;
+ def T2RPNTLVWZ1RST1_EVEX : I<0xf9, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src1), "t2rpntlvwz1rst1\t{$src1, $dst|$dst, $src1}",
+ []>, EVEX, NoCD8, T_MAP5, PD;
+} // HasAMXMOVRS, HasAMXTRANSPOSE, HasEGPR, In64BitMode
+
+let Predicates = [HasAMXMOVRS, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in {
+ def TILELOADDRS_EVEX : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst),
+ (ins sibmem:$src1), "tileloaddrs\t{$src1, $dst|$dst, $src1}",
+ []>, EVEX, NoCD8, T8, XD;
+ def TILELOADDRST1_EVEX : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst),
+ (ins sibmem:$src1), "tileloaddrst1\t{$src1, $dst|$dst, $src1}",
+ []>, EVEX, NoCD8, T8, PD;
+} // HasAMXMOVRS, HasEGPR, In64BitMode
+
multiclass m_tcvtrowd2ps {
let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
let SchedRW = [WriteSystem] in {
diff --git a/llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll b/llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll
index da212a1850964e..67688326c17500 100755
--- a/llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll
+++ b/llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-movrs | FileCheck %s
+; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-movrs,+egpr --show-mc-encoding | FileCheck %s --check-prefix=APXF
define void @test_amx_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
; CHECK-LABEL: test_amx_internal:
@@ -35,6 +36,44 @@ define void @test_amx_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: tilerelease
; CHECK-NEXT: retq
+;
+; APXF-LABEL: test_amx_internal:
+; APXF: # %bb.0: # %entry
+; APXF-NEXT: pushq %rbp # encoding: [0x55]
+; APXF-NEXT: .cfi_def_cfa_offset 16
+; APXF-NEXT: .cfi_offset %rbp, -16
+; APXF-NEXT: movq %rsp, %rbp # encoding: [0x48,0x89,0xe5]
+; APXF-NEXT: .cfi_def_cfa_register %rbp
+; APXF-NEXT: andq $-1024, %rsp # encoding: [0x48,0x81,0xe4,0x00,0xfc,0xff,0xff]
+; APXF-NEXT: # imm = 0xFC00
+; APXF-NEXT: subq $3072, %rsp # encoding: [0x48,0x81,0xec,0x00,0x0c,0x00,0x00]
+; APXF-NEXT: # imm = 0xC00
+; APXF-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xc0,0x03,0x00,0x00]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xd0,0x03,0x00,0x00]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xe0,0x03,0x00,0x00]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xf0,0x03,0x00,0x00]
+; APXF-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xc0,0x03,0x00,0x00,0x01]
+; APXF-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; APXF-NEXT: # encoding: [0x48,0x89,0x8c,0x24,0xb8,0x03,0x00,0x00]
+; APXF-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
+; APXF-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; APXF-NEXT: # encoding: [0x48,0x8b,0xb4,0x24,0xb8,0x03,0x00,0x00]
+; APXF-NEXT: movw %ax, %cx # encoding: [0x66,0x89,0xc1]
+; APXF-NEXT: movw %di, %ax # encoding: [0x66,0x89,0xf8]
+; APXF-NEXT: # implicit-def: $al
+; APXF-NEXT: movb %al, {{[0-9]+}}(%rsp) # encoding: [0x88,0x84,0x24,0xf0,0x03,0x00,0x00]
+; APXF-NEXT: movw %cx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x8c,0x24,0xd0,0x03,0x00,0x00]
+; APXF-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
+; APXF-NEXT: tileloaddrs (%rdx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4a,0x04,0x32]
+; APXF-NEXT: movl $64, %esi # encoding: [0xbe,0x40,0x00,0x00,0x00]
+; APXF-NEXT: leaq {{[0-9]+}}(%rsp), %rdx # encoding: [0x48,0x8d,0x94,0x24,0x00,0x04,0x00,0x00]
+; APXF-NEXT: tilestored %tmm0, (%rdx,%rsi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x32]
+; APXF-NEXT: movq %rbp, %rsp # encoding: [0x48,0x89,0xec]
+; APXF-NEXT: popq %rbp # encoding: [0x5d]
+; APXF-NEXT: .cfi_def_cfa %rsp, 8
+; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+; APXF-NEXT: retq # encoding: [0xc3]
entry:
%t1 = call x86_amx @llvm.x86.tileloaddrs64.internal(i16 %m, i16 %n, ptr %buf, i64 %s)
%t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1)
@@ -48,6 +87,12 @@ define void @test_amx_old(i16 %m, i16 %n, ptr %buf) {
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: tileloaddrs (%rdx,%rax), %tmm2
; CHECK-NEXT: retq
+;
+; APXF-LABEL: test_amx_old:
+; APXF: # %bb.0: # %entry
+; APXF-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
+; APXF-NEXT: tileloaddrs (%rdx,%rax), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4a,0x14,0x02]
+; APXF-NEXT: retq # encoding: [0xc3]
entry:
call void @llvm.x86.tileloaddrs64(i8 2, ptr %buf, i64 32)
ret void
@@ -88,6 +133,44 @@ define void @test_amx_t1_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: tilerelease
; CHECK-NEXT: retq
+;
+; APXF-LABEL: test_amx_t1_internal:
+; APXF: # %bb.0: # %entry
+; APXF-NEXT: pushq %rbp # encoding: [0x55]
+; APXF-NEXT: .cfi_def_cfa_offset 16
+; APXF-NEXT: .cfi_offset %rbp, -16
+; APXF-NEXT: movq %rsp, %rbp # encoding: [0x48,0x89,0xe5]
+; APXF-NEXT: .cfi_def_cfa_register %rbp
+; APXF-NEXT: andq $-1024, %rsp # encoding: [0x48,0x81,0xe4,0x00,0xfc,0xff,0xff]
+; APXF-NEXT: # imm = 0xFC00
+; APXF-NEXT: subq $3072, %rsp # encoding: [0x48,0x81,0xec,0x00,0x0c,0x00,0x00]
+; APXF-NEXT: # imm = 0xC00
+; APXF-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xc0,0x03,0x00,0x00]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xd0,0x03,0x00,0x00]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xe0,0x03,0x00,0x00]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xf0,0x03,0x00,0x00]
+; APXF-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xc0,0x03,0x00,0x00,0x01]
+; APXF-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; APXF-NEXT: # encoding: [0x48,0x89,0x8c,0x24,0xb8,0x03,0x00,0x00]
+; APXF-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
+; APXF-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; APXF-NEXT: # encoding: [0x48,0x8b,0xb4,0x24,0xb8,0x03,0x00,0x00]
+; APXF-NEXT: movw %ax, %cx # encoding: [0x66,0x89,0xc1]
+; APXF-NEXT: movw %di, %ax # encoding: [0x66,0x89,0xf8]
+; APXF-NEXT: # implicit-def: $al
+; APXF-NEXT: movb %al, {{[0-9]+}}(%rsp) # encoding: [0x88,0x84,0x24,0xf0,0x03,0x00,0x00]
+; APXF-NEXT: movw %cx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x8c,0x24,0xd0,0x03,0x00,0x00]
+; APXF-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
+; APXF-NEXT: tileloaddrst1 (%rdx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x4a,0x04,0x32]
+; APXF-NEXT: movl $64, %esi # encoding: [0xbe,0x40,0x00,0x00,0x00]
+; APXF-NEXT: leaq {{[0-9]+}}(%rsp), %rdx # encoding: [0x48,0x8d,0x94,0x24,0x00,0x04,0x00,0x00]
+; APXF-NEXT: tilestored %tmm0, (%rdx,%rsi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x32]
+; APXF-NEXT: movq %rbp, %rsp # encoding: [0x48,0x89,0xec]
+; APXF-NEXT: popq %rbp # encoding: [0x5d]
+; APXF-NEXT: .cfi_def_cfa %rsp, 8
+; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+; APXF-NEXT: retq # encoding: [0xc3]
entry:
%t1 = call x86_amx @llvm.x86.tileloaddrst164.internal(i16 %m, i16 %n, ptr %buf, i64 %s)
%t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1)
@@ -101,6 +184,12 @@ define void @test_amx_t1_old(i16 %m, i16 %n, ptr %buf) {
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: tileloaddrst1 (%rdx,%rax), %tmm2
; CHECK-NEXT: retq
+;
+; APXF-LABEL: test_amx_t1_old:
+; APXF: # %bb.0: # %entry
+; APXF-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
+; APXF-NEXT: tileloaddrst1 (%rdx,%rax), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x4a,0x14,0x02]
+; APXF-NEXT: retq # encoding: [0xc3]
entry:
call void @llvm.x86.tileloaddrst164(i8 2, ptr %buf, i64 32)
ret void
diff --git a/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll b/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll
index 146b69773eb186..0d5b85f2bb1088 100755
--- a/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll
+++ b/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs | FileCheck %s --check-prefixes=CHECK,O0
; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs | FileCheck %s --check-prefixes=CHECK,O2
+; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs,+egpr --show-mc-encoding | FileCheck %s --check-prefix=APXF
define void @test_amx(i64 %stride, i8* %addr1) #0 {
; CHECK-LABEL: test_amx:
@@ -10,6 +11,14 @@ define void @test_amx(i64 %stride, i8* %addr1) #0 {
; CHECK-NEXT: t2rpntlvwz1rs (%rsi,%rdi), %tmm0
; CHECK-NEXT: t2rpntlvwz1rst1 (%rsi,%rdi), %tmm2
; CHECK-NEXT: retq
+;
+; APXF-LABEL: test_amx:
+; APXF: # %bb.0:
+; APXF-NEXT: t2rpntlvwz0rs (%rsi,%rdi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf8,0x04,0x3e]
+; APXF-NEXT: t2rpntlvwz0rst1 (%rsi,%rdi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf9,0x14,0x3e]
+; APXF-NEXT: t2rpntlvwz1rs (%rsi,%rdi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf8,0x04,0x3e]
+; APXF-NEXT: t2rpntlvwz1rst1 (%rsi,%rdi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf9,0x14,0x3e]
+; APXF-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.t2rpntlvwz0rs(i8 1, i8* %addr1, i64 %stride)
call void @llvm.x86.t2rpntlvwz0rst1(i8 2, i8* %addr1, i64 %stride)
call void @llvm.x86.t2rpntlvwz1rs(i8 1, i8* %addr1, i64 %stride)
@@ -80,6 +89,27 @@ define void @test_amx2(i8* %base, i64 %stride) #0 {
; O2-NEXT: t2rpntlvwz1rst1 (%rdi,%rsi), %tmm4
; O2-NEXT: tilerelease
; O2-NEXT: retq
+;
+; APXF-LABEL: test_amx2:
+; APXF: # %bb.0:
+; APXF-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
+; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xc0]
+; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xd0]
+; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xe0]
+; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xf0]
+; APXF-NEXT: movb $1, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xc0,0x01]
+; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf4,0x08]
+; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xd8,0x08,0x00]
+; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf5,0x08]
+; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xda,0x08,0x00]
+; APXF-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0xc0]
+; APXF-NEXT: movw $8, %ax # encoding: [0x66,0xb8,0x08,0x00]
+; APXF-NEXT: t2rpntlvwz0rs (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf8,0x24,0x37]
+; APXF-NEXT: t2rpntlvwz0rst1 (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf9,0x24,0x37]
+; APXF-NEXT: t2rpntlvwz1rs (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf8,0x24,0x37]
+; APXF-NEXT: t2rpntlvwz1rst1 (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf9,0x24,0x37]
+; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+; APXF-NEXT: retq # encoding: [0xc3]
call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rs.internal(i16 8, i16 8, i16 8, i8* %base, i64 %...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/123267
More information about the llvm-commits
mailing list