[clang] [llvm] Revert "[X86][AMX] Support AMX-AVX512" (PR #115570)
Alan Zhao via cfe-commits
cfe-commits at lists.llvm.org
Fri Nov 8 16:14:05 PST 2024
https://github.com/alanzhao1 created https://github.com/llvm/llvm-project/pull/115570
Reverts llvm/llvm-project#114070
Reason: Causes `immintrin.h` to fail to compile if `-msse` and `-mno-sse2` are passed to clang: https://github.com/llvm/llvm-project/pull/114070#issuecomment-2465926700
>From e06af1d045a57e93ebf3c86c4ac70aa752a93fa1 Mon Sep 17 00:00:00 2001
From: Alan Zhao <azhao101 at gmail.com>
Date: Fri, 8 Nov 2024 16:12:54 -0800
Subject: [PATCH] Revert "[X86][AMX] Support AMX-AVX512 (#114070)"
This reverts commit 58a17e1bbc54357385d0b89cfc5635e402c31ef6.
---
clang/docs/ReleaseNotes.rst | 1 -
clang/include/clang/Basic/BuiltinsX86_64.def | 13 -
clang/include/clang/Driver/Options.td | 2 -
clang/lib/Basic/Targets/X86.cpp | 6 -
clang/lib/Basic/Targets/X86.h | 1 -
clang/lib/Headers/CMakeLists.txt | 1 -
clang/lib/Headers/amxavx512intrin.h | 382 ------------------
clang/lib/Headers/immintrin.h | 4 -
clang/lib/Sema/SemaX86.cpp | 6 -
clang/test/CodeGen/X86/amx_avx512_api.c | 52 ---
clang/test/CodeGen/X86/amxavx512-builtins.c | 41 --
clang/test/CodeGen/attr-target-x86.c | 8 +-
clang/test/Driver/x86-target-features.c | 7 -
clang/test/Preprocessor/x86_target_features.c | 12 -
llvm/include/llvm/IR/IntrinsicsX86.td | 51 ---
.../llvm/TargetParser/X86TargetParser.def | 1 -
llvm/lib/Target/X86/X86.td | 4 -
llvm/lib/Target/X86/X86ExpandPseudo.cpp | 67 +--
llvm/lib/Target/X86/X86ISelLowering.cpp | 76 ----
llvm/lib/Target/X86/X86InstrAMX.td | 147 -------
llvm/lib/Target/X86/X86InstrPredicates.td | 1 -
llvm/lib/Target/X86/X86LowerAMXType.cpp | 11 -
llvm/lib/Target/X86/X86PreTileConfig.cpp | 18 +-
llvm/lib/TargetParser/Host.cpp | 1 -
llvm/lib/TargetParser/X86TargetParser.cpp | 2 -
.../CodeGen/X86/amx-across-func-tilemovrow.ll | 171 --------
.../test/CodeGen/X86/amx-avx512-intrinsics.ll | 116 ------
.../CodeGen/X86/amx-tile-avx512-internals.ll | 61 ---
llvm/test/MC/Disassembler/X86/amx-avx512.txt | 106 -----
llvm/test/MC/X86/amx-avx512-att.s | 105 -----
llvm/test/MC/X86/amx-avx512-intel.s | 105 -----
31 files changed, 12 insertions(+), 1567 deletions(-)
delete mode 100644 clang/lib/Headers/amxavx512intrin.h
delete mode 100644 clang/test/CodeGen/X86/amx_avx512_api.c
delete mode 100644 clang/test/CodeGen/X86/amxavx512-builtins.c
delete mode 100644 llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll
delete mode 100644 llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll
delete mode 100644 llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll
delete mode 100644 llvm/test/MC/Disassembler/X86/amx-avx512.txt
delete mode 100644 llvm/test/MC/X86/amx-avx512-att.s
delete mode 100644 llvm/test/MC/X86/amx-avx512-intel.s
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index c3424e0e6f34c9..f82fbb73b12162 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -739,7 +739,6 @@ X86 Support
* Supported intrinsics of ``_mm(256|512)_(mask(z))_loadrs_epi(8|16|32|64)``.
- Support ISA of ``AMX-FP8``.
- Support ISA of ``AMX-TRANSPOSE``.
-- Support ISA of ``AMX-AVX512``.
Arm and AArch64 Support
^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def
index 9f7462b1e0d962..d95e8455a304b6 100644
--- a/clang/include/clang/Basic/BuiltinsX86_64.def
+++ b/clang/include/clang/Basic/BuiltinsX86_64.def
@@ -133,12 +133,6 @@ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0t1_internal, "vUsUsUsV256i*V256i*vC*z",
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1t1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
TARGET_BUILTIN(__builtin_ia32_ttransposed_internal, "V256iUsUsV256i", "n", "amx-transpose")
-TARGET_BUILTIN(__builtin_ia32_tcvtrowd2ps_internal, "V16fUsUsV256iUi", "n", "amx-avx512,avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16h_internal, "V32yUsUsV256iUi", "n", "amx-avx512,avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16l_internal, "V32yUsUsV256iUi", "n", "amx-avx512,avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phh_internal, "V32xUsUsV256iUi", "n", "amx-avx512,avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phl_internal, "V32xUsUsV256iUi", "n", "amx-avx512,avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_tilemovrow_internal, "V16iUsUsV256iUi", "n", "amx-avx512,avx10.2-512")
// AMX
TARGET_BUILTIN(__builtin_ia32_tile_loadconfig, "vvC*", "n", "amx-tile")
TARGET_BUILTIN(__builtin_ia32_tile_storeconfig, "vvC*", "n", "amx-tile")
@@ -165,13 +159,6 @@ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1, "vIUcvC*z", "n", "amx-transpose")
TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1t1, "vIUcvC*z", "n","amx-transpose")
TARGET_BUILTIN(__builtin_ia32_ttransposed, "vIUcIUc", "n", "amx-transpose")
-TARGET_BUILTIN(__builtin_ia32_tcvtrowd2ps, "V16fIUcUi", "n", "amx-avx512,avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16h, "V32yIUcUi", "n", "amx-avx512,avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16l, "V32yIUcUi", "n", "amx-avx512,avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phh, "V32xIUcUi", "n", "amx-avx512,avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phl, "V32xIUcUi", "n", "amx-avx512,avx10.2-512")
-TARGET_BUILTIN(__builtin_ia32_tilemovrow, "V16iIUcUi", "n", "amx-avx512,avx10.2-512")
-
TARGET_BUILTIN(__builtin_ia32_prefetchi, "vvC*Ui", "nc", "prefetchi")
TARGET_BUILTIN(__builtin_ia32_cmpccxadd32, "Siv*SiSiIi", "n", "cmpccxadd")
TARGET_BUILTIN(__builtin_ia32_cmpccxadd64, "SLLiSLLi*SLLiSLLiIi", "n", "cmpccxadd")
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 0dba5672c5a85d..8887e0c1495d2a 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6285,8 +6285,6 @@ def mno_80387 : Flag<["-"], "mno-80387">, Alias<mno_x87>;
def mno_fp_ret_in_387 : Flag<["-"], "mno-fp-ret-in-387">, Alias<mno_x87>;
def mmmx : Flag<["-"], "mmmx">, Group<m_x86_Features_Group>;
def mno_mmx : Flag<["-"], "mno-mmx">, Group<m_x86_Features_Group>;
-def mamx_avx512 : Flag<["-"], "mamx-avx512">, Group<m_x86_Features_Group>;
-def mno_amx_avx512 : Flag<["-"], "mno-amx-avx512">, Group<m_x86_Features_Group>;
def mamx_bf16 : Flag<["-"], "mamx-bf16">, Group<m_x86_Features_Group>;
def mno_amx_bf16 : Flag<["-"], "mno-amx-bf16">, Group<m_x86_Features_Group>;
def mamx_complex : Flag<["-"], "mamx-complex">, Group<m_x86_Features_Group>;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 3c3dbfa13e452b..d7d3adef42c79a 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -432,8 +432,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasAMXFP8 = true;
} else if (Feature == "+amx-transpose") {
HasAMXTRANSPOSE = true;
- } else if (Feature == "+amx-avx512") {
- HasAMXAVX512 = true;
} else if (Feature == "+cmpccxadd") {
HasCMPCCXADD = true;
} else if (Feature == "+raoint") {
@@ -957,8 +955,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__AMX_FP8__");
if (HasAMXTRANSPOSE)
Builder.defineMacro("__AMX_TRANSPOSE__");
- if (HasAMXAVX512)
- Builder.defineMacro("__AMX_AVX512__");
if (HasCMPCCXADD)
Builder.defineMacro("__CMPCCXADD__");
if (HasRAOINT)
@@ -1084,7 +1080,6 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
return llvm::StringSwitch<bool>(Name)
.Case("adx", true)
.Case("aes", true)
- .Case("amx-avx512", true)
.Case("amx-bf16", true)
.Case("amx-complex", true)
.Case("amx-fp16", true)
@@ -1205,7 +1200,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
return llvm::StringSwitch<bool>(Feature)
.Case("adx", HasADX)
.Case("aes", HasAES)
- .Case("amx-avx512", HasAMXAVX512)
.Case("amx-bf16", HasAMXBF16)
.Case("amx-complex", HasAMXCOMPLEX)
.Case("amx-fp16", HasAMXFP16)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 70047731b17295..e2eba63b992355 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -159,7 +159,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasAMXCOMPLEX = false;
bool HasAMXFP8 = false;
bool HasAMXTRANSPOSE = false;
- bool HasAMXAVX512 = false;
bool HasSERIALIZE = false;
bool HasTSXLDTRK = false;
bool HasUSERMSR = false;
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 76366ca1f108e9..67242cd4d981bc 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -146,7 +146,6 @@ set(x86_files
adcintrin.h
adxintrin.h
ammintrin.h
- amxavx512intrin.h
amxcomplexintrin.h
amxfp16intrin.h
amxfp8intrin.h
diff --git a/clang/lib/Headers/amxavx512intrin.h b/clang/lib/Headers/amxavx512intrin.h
deleted file mode 100644
index 945edea543e706..00000000000000
--- a/clang/lib/Headers/amxavx512intrin.h
+++ /dev/null
@@ -1,382 +0,0 @@
-/*===--------------------- amxavx512intrin.h - AMXAVX512 --------------------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===------------------------------------------------------------------------===
- */
-#ifndef __IMMINTRIN_H
-#error "Never use <amxavx512intrin.h> directly; include <immintrin.h> instead."
-#endif // __IMMINTRIN_H
-
-#ifndef __AMX_AVX512INTRIN_H
-#define __AMX_AVX512INTRIN_H
-#ifdef __x86_64__
-
-#define __DEFAULT_FN_ATTRS_AVX512 \
- __attribute__((__always_inline__, __nodebug__, \
- __target__("amx-avx512,avx10.2-512")))
-
-/// Moves a row from a tile register to a zmm destination register, converting
-/// the int32 source elements to fp32. The row of the tile is selected by a
-/// 32b GPR.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// __m512i _tile_cvtrowd2ps(__tile tsrc, unsigned int row);
-/// \endcode
-///
-/// \code{.operation}
-/// VL := 512
-/// VL_bytes := VL >> 3
-/// row_index := row & 0xffff
-/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
-/// FOR i := 0 TO (VL_bytes / 4) - 1
-/// IF i + row_chunk / 4 >= tsrc.colsb / 4
-/// dst.dword[i] := 0
-/// ELSE
-/// dst.f32[i] := CONVERT_INT32_TO_FP32(tsrc.row[row_index].dword[row_chunk/4+i], RNE)
-/// FI
-/// ENDFOR
-/// dst[MAX_VL-1:VL] := 0
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TCVTROWD2PS instruction.
-///
-/// \param tsrc
-/// The source tile. Max size is 1024 Bytes.
-/// \param row
-/// The row of the source tile
-#define _tile_cvtrowd2ps(tsrc, row) __builtin_ia32_tcvtrowd2ps(tsrc, row)
-
-/// Moves a row from a tile register to a zmm destination register, converting
-/// the fp32 source elements to bf16. It places the resulting bf16 elements
-/// in the high 16 bits within each dword. The row of the tile is selected
-/// by a 32b GPR.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// __m512i _tile_cvtrowps2pbf16h(__tile tsrc, unsigned int row);
-/// \endcode
-///
-/// \code{.operation}
-/// VL := 512
-/// VL_bytes := VL >> 3
-/// row_index := row & 0xffff
-/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
-/// FOR i := 0 TO (VL_bytes / 4) - 1
-/// IF i + row_chunk / 4 >= tsrc.colsb / 4
-/// dst.dword[i] := 0
-/// ELSE
-/// dst.word[2*i+0] := 0
-/// dst.bf16[2*i+1] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
-/// FI
-/// ENDFOR
-/// dst[MAX_VL-1:VL] := 0
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TCVTROWPS2PBF16H instruction.
-///
-/// \param tsrc
-/// The source tile. Max size is 1024 Bytes.
-/// \param row
-/// The the row of the source tile.
-#define _tile_cvtrowps2pbf16h(tsrc, row) \
- __builtin_ia32_tcvtrowps2pbf16h(tsrc, row)
-
-/// Moves a row from a tile register to a zmm destination register, converting
-/// the fp32 source elements to bf16. It places the resulting bf16 elements
-/// in the low 16 bits within each dword. The row of the tile is selected
-/// by a 32b GPR.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// __m512i _tile_cvtrowps2pbf16l(__tile tsrc, unsigned int row);
-/// \endcode
-///
-/// \code{.operation}
-/// VL := 512
-/// VL_bytes := VL >> 3
-/// row_index := row & 0xffff
-/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
-/// FOR i := 0 TO (VL_bytes / 4) - 1
-/// IF i + row_chunk / 4 >= tsrc.colsb / 4
-/// dst.dword[i] := 0
-/// ELSE
-/// dst.word[2*i+1] := 0
-/// dst.bf16[2*i+0] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
-/// FI
-/// ENDFOR
-/// dst[MAX_VL-1:VL] := 0
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TCVTROWPS2PBF16L instruction.
-///
-/// \param tsrc
-/// The source tile. Max size is 1024 Bytes.
-/// \param row
-/// The the row of the source tile.
-#define _tile_cvtrowps2pbf16l(tsrc, row) \
- __builtin_ia32_tcvtrowps2pbf16l(tsrc, row)
-
-/// Moves a row from a tile register to a zmm destination register, converting
-/// the fp32 source elements to fp16. It places the resulting fp16 elements
-/// in the high 16 bits within each dword. The row of the tile is selected
-/// by a 32b GPR.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// __m512i _tile_cvtrowps2phh(__tile tsrc, unsigned int row);
-/// \endcode
-///
-/// \code{.operation}
-/// VL := 512
-/// VL_bytes := VL >> 3
-/// row_index := row & 0xffff
-/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
-/// FOR i := 0 TO (VL_bytes / 4) - 1
-/// IF i + row_chunk / 4 >= tsrc.colsb / 4
-/// dst.dword[i] := 0
-/// ELSE
-/// dst.word[2*i+0] := 0
-/// dst.fp16[2*i+1] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
-/// FI
-/// ENDFOR
-/// dst[MAX_VL-1:VL] := 0
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TCVTROWPS2PHH instruction.
-///
-/// \param tsrc
-/// The source tile. Max size is 1024 Bytes.
-/// \param row
-/// The the row of the source tile.
-#define _tile_cvtrowps2phh(tsrc, row) __builtin_ia32_tcvtrowps2phh(tsrc, row)
-
-/// Moves a row from a tile register to a zmm destination register, converting
-/// the fp32 source elements to fp16. It places the resulting fp16 elements
-/// in the low 16 bits within each dword. The row of the tile is selected
-/// by a 32b GPR.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// __m512i _tile_cvtrowps2phl(__tile tsrc, unsigned int row);
-/// \endcode
-///
-/// \code{.operation}
-/// VL := 512
-/// VL_bytes := VL >> 3
-/// row_index := row & 0xffff
-/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
-/// FOR i := 0 TO (VL_bytes / 4) - 1
-/// IF i + row_chunk / 4 >= tsrc.colsb / 4
-/// dst.dword[i] := 0
-/// ELSE
-/// dst.word[2*i+1] := 0
-/// dst.fp16[2*i+0] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
-/// FI
-/// ENDFOR
-/// dst[MAX_VL-1:VL] := 0
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TCVTROWPS2PHL instruction.
-///
-/// \param tsrc
-/// The source tile. Max size is 1024 Bytes.
-/// \param row
-/// The the row of the source tile.
-#define _tile_cvtrowps2phl(tsrc, row) __builtin_ia32_tcvtrowps2phl(tsrc, row)
-
-/// Move one row of a tile data to a v16f32 data.
-/// The row of the tile is selected by a 32b GPR.
-///
-/// \headerfile <immintrin.h>
-///
-/// \code
-/// __m512 _tile_movrow(__tile a, unsigned b);
-/// \endcode
-///
-/// This intrinsic corresponds to the <c> TILEMOVROW </c> instruction.
-///
-/// \param a
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-/// The 2nd source r32. Size is 4 Bytes.
-/// \returns
-/// The destination v16f32 data. Size is 64 Bytes.
-///
-/// \code{.operation}
-/// VL := 512
-/// VL_bytes := VL>>3
-/// row_index := b&0xffff
-/// row_chunk := ((b>>16)&0xffff) * VL_bytes
-/// FOR i := 0 TO (VL_bytes-1)
-/// IF (row_chunk + i >= a.colsb)
-/// dst.byte[i] := 0
-/// ELSE
-/// dst.byte[i] := a.row[row_index].byte[row_chunk+i]
-/// ENDFOR
-/// \endcode
-#define _tile_movrow(a, b) __builtin_ia32_tilemovrow(a, b)
-
-/// This is internal intrinsic. C/C++ user should avoid calling it directly.
-
-static __inline__ __m512 __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowd2ps_internal(
- unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
- return __builtin_ia32_tcvtrowd2ps_internal(m, n, src, u);
-}
-
-static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
-_tile_cvtrowps2pbf16h_internal(unsigned short m, unsigned short n,
- _tile1024i src, unsigned u) {
- return __builtin_ia32_tcvtrowps2pbf16h_internal(m, n, src, u);
-}
-
-static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
-_tile_cvtrowps2pbf16l_internal(unsigned short m, unsigned short n,
- _tile1024i src, unsigned u) {
- return __builtin_ia32_tcvtrowps2pbf16l_internal(m, n, src, u);
-}
-
-static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phh_internal(
- unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
- return __builtin_ia32_tcvtrowps2phh_internal(m, n, src, u);
-}
-
-static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phl_internal(
- unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
- return __builtin_ia32_tcvtrowps2phl_internal(m, n, src, u);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS_AVX512 _tile_movrow_internal(
- unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
- return (__m512i)__builtin_ia32_tilemovrow_internal(m, n, src, u);
-}
-
-/// Move a row from a tile (src0) to a v16f32 dst, converting the int32 source
-/// elements to fp32. No SIMD exceptions are generated. Rounding is done as if
-/// MXCSR.RC=RNE. Embedded rounding is not supported.
-/// The row and chunk elements of tile is fetched from 32bit src1.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TCVTROWD2PS </c> instruction.
-///
-/// \param src0
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-/// The 2nd source r32. Size is 4 Bytes.
-/// \returns
-/// The destination v16f32 data. Size is 64 Bytes.
-__DEFAULT_FN_ATTRS_AVX512
-static __m512 __tile_cvtrowd2ps(__tile1024i src0, unsigned src1) {
- return _tile_cvtrowd2ps_internal(src0.row, src0.col, src0.tile, src1);
-}
-
-/// Move a row from a tile (src0) to a v32bf16 dst, converting the fp32 source
-/// elements to bf16 at high 16-bits of each dword.
-/// The row and chunk elements of tile is fetched from 32bit src1.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TCVTROWPS2PBF16H </c> instruction.
-///
-/// \param src0
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-/// The 2nd source r32. Size is 4 Bytes.
-/// \returns
-/// The destination v32bf16 data. Size is 64 Bytes.
-__DEFAULT_FN_ATTRS_AVX512
-static __m512bh __tile_cvtrowps2pbf16h(__tile1024i src0, unsigned src1) {
- return _tile_cvtrowps2pbf16h_internal(src0.row, src0.col, src0.tile, src1);
-}
-
-/// Move a row from a tile (src0) to a v32bf16 dst, converting the fp32 source
-/// elements to bf16 at low 16-bits of each dword.
-/// The row and chunk elements of tile is fetched from 32bit src1.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TCVTROWPS2PBF16L </c> instruction.
-///
-/// \param src0
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-/// The 2nd source r32. Size is 4 Bytes.
-/// \returns
-/// The destination v32bf16 data. Size is 64 Bytes.
-__DEFAULT_FN_ATTRS_AVX512
-static __m512bh __tile_cvtrowps2pbf16l(__tile1024i src0, unsigned src1) {
- return _tile_cvtrowps2pbf16l_internal(src0.row, src0.col, src0.tile, src1);
-}
-
-/// Move a row from a tile (src0) to a v32fp16 dst, converting the fp32 source
-/// elements to fp16 at high 16-bits of each dword.
-/// The row and chunk elements of tile is fetched from 32bit src1.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TCVTROWPS2PHH </c> instruction.
-///
-/// \param src0
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-/// The 2nd source r32. Size is 4 Bytes.
-/// \returns
-/// The destination v32fp16 data. Size is 64 Bytes.
-__DEFAULT_FN_ATTRS_AVX512
-static __m512h __tile_cvtrowps2phh(__tile1024i src0, unsigned src1) {
- return _tile_cvtrowps2phh_internal(src0.row, src0.col, src0.tile, src1);
-}
-
-/// Move a row from a tile (src0) to a v32fp16 dst, converting the fp32 source
-/// elements to fp16 at low 16-bits of each dword.
-/// The row and chunk elements of tile is fetched from 32bit src1.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TCVTROWPS2PHL </c> instruction.
-///
-/// \param src0
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-/// The 2nd source r32. Size is 4 Bytes.
-/// \returns
-/// The destination v32fp16 data. Size is 64 Bytes.
-__DEFAULT_FN_ATTRS_AVX512
-static __m512h __tile_cvtrowps2phl(__tile1024i src0, unsigned src1) {
- return _tile_cvtrowps2phl_internal(src0.row, src0.col, src0.tile, src1);
-}
-
-/// Move one row of a tile data to a v16f32 data.
-/// The row of the tile is selected by a 32b GPR.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TILEMOVROW </c> instruction.
-///
-/// \param src0
-/// The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-/// The 2nd source r32. Size is 4 Bytes.
-/// \returns
-/// The destination v16i32 data. Size is 64 Bytes.
-__DEFAULT_FN_ATTRS_AVX512
-static __m512i __tile_movrow(__tile1024i src0, unsigned src1) {
- return (__m512i)_tile_movrow_internal(src0.row, src0.col, src0.tile, src1);
-}
-
-#endif // __x86_64__
-#endif // __AMX_AVX512INTRIN_H
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index bc240e28d59142..4bf7eac4195eec 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -656,10 +656,6 @@ _storebe_i64(void * __P, long long __D) {
#include <amxtransposeintrin.h>
#endif
-#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_AVX512__)
-#include <amxavx512intrin.h>
-#endif
-
#if !defined(__SCE__) || __has_feature(modules) || \
defined(__AVX512VP2INTERSECT__)
#include <avx512vp2intersectintrin.h>
diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp
index 1155a5edc73c34..ef878d16d445fd 100644
--- a/clang/lib/Sema/SemaX86.cpp
+++ b/clang/lib/Sema/SemaX86.cpp
@@ -635,12 +635,6 @@ bool SemaX86::CheckBuiltinTileArguments(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_t2rpntlvwz0t1:
case X86::BI__builtin_ia32_t2rpntlvwz1:
case X86::BI__builtin_ia32_t2rpntlvwz1t1:
- case X86::BI__builtin_ia32_tcvtrowps2pbf16h:
- case X86::BI__builtin_ia32_tcvtrowps2pbf16l:
- case X86::BI__builtin_ia32_tcvtrowps2phh:
- case X86::BI__builtin_ia32_tcvtrowps2phl:
- case X86::BI__builtin_ia32_tcvtrowd2ps:
- case X86::BI__builtin_ia32_tilemovrow:
return CheckBuiltinTileArgumentsRange(TheCall, 0);
case X86::BI__builtin_ia32_tdpbssd:
case X86::BI__builtin_ia32_tdpbsud:
diff --git a/clang/test/CodeGen/X86/amx_avx512_api.c b/clang/test/CodeGen/X86/amx_avx512_api.c
deleted file mode 100644
index aea790d61268d3..00000000000000
--- a/clang/test/CodeGen/X86/amx_avx512_api.c
+++ /dev/null
@@ -1,52 +0,0 @@
-// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown \
-// RUN: -target-feature +amx-avx512 -target-feature +avx10.2-512 \
-// RUN: -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK
-
-#include <immintrin.h>
-
-char buf[1024];
-#define STRIDE 32
-
-char buf2[1024];
-
-__m512 test_tile_cvtrowd2ps(__tile1024i a, unsigned b) {
- //CHECK-LABEL: @test_tile_cvtrowd2ps
- //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
- //CHECK-DAG: call <16 x float> @llvm.x86.tcvtrowd2ps.internal
- return __tile_cvtrowd2ps(a, b);
-}
-
-__m512bh test_tile_cvtrowps2pbf16h(__tile1024i a, unsigned b) {
- //CHECK-LABEL: @test_tile_cvtrowps2pbf16h
- //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
- //CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h.internal
- return __tile_cvtrowps2pbf16h(a, b);
-}
-
-__m512bh test_tile_cvtrowps2pbf16l(__tile1024i a, unsigned b) {
- //CHECK-LABEL: @test_tile_cvtrowps2pbf16l
- //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
- //CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l.internal
- return __tile_cvtrowps2pbf16l(a, b);
-}
-
-__m512h test_tile_cvtrowps2phh(__tile1024i a, unsigned b) {
- //CHECK-LABEL: @test_tile_cvtrowps2phh
- //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
- //CHECK-DAG: call <32 x half> @llvm.x86.tcvtrowps2phh.internal
- return __tile_cvtrowps2phh(a, b);
-}
-
-__m512h test_tile_cvtrowps2phl(__tile1024i a, unsigned b) {
- //CHECK-LABEL: @test_tile_cvtrowps2phl
- //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
- //CHECK-DAG: call <32 x half> @llvm.x86.tcvtrowps2phl.internal
- return __tile_cvtrowps2phl(a, b);
-}
-
-__m512i test_tile_movrow(__tile1024i a, unsigned b) {
- //CHECK-LABEL: @test_tile_movrow
- //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
- //CHECK-DAG: call <16 x i32> @llvm.x86.tilemovrow.internal
- return __tile_movrow(a, b);
-}
diff --git a/clang/test/CodeGen/X86/amxavx512-builtins.c b/clang/test/CodeGen/X86/amxavx512-builtins.c
deleted file mode 100644
index 172b5ae8f53081..00000000000000
--- a/clang/test/CodeGen/X86/amxavx512-builtins.c
+++ /dev/null
@@ -1,41 +0,0 @@
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-tile -target-feature +amx-avx512 \
-// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s
-
-#include <immintrin.h>
-#include <stddef.h>
-
-__m512 test_tile_cvtrowd2ps(unsigned int A) {
- // CHECK-LABEL: @test_tile_cvtrowd2ps(
- // CHECK: call <16 x float> @llvm.x86.tcvtrowd2ps(i8 1, i32 %{{.*}})
- return _tile_cvtrowd2ps(1, A);
-}
-
-__m512bh test_tile_cvtrowps2pbf16h(unsigned int A) {
- // CHECK-LABEL: @test_tile_cvtrowps2pbf16h(
- // CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h(i8 1, i32 %{{.*}})
- return _tile_cvtrowps2pbf16h(1, A);
-}
-
-__m512bh test_tile_cvtrowps2pbf16l(unsigned int A) {
- // CHECK-LABEL: @test_tile_cvtrowps2pbf16l(
- // CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l(i8 1, i32 %{{.*}})
- return _tile_cvtrowps2pbf16l(1, A);
-}
-
-__m512h test_tile_cvtrowps2phh(unsigned int A) {
- // CHECK-LABEL: @test_tile_cvtrowps2phh(
- // CHECK: call <32 x half> @llvm.x86.tcvtrowps2phh(i8 1, i32 %{{.*}})
- return _tile_cvtrowps2phh(1, A);
-}
-
-__m512h test_tile_cvtrowps2phl(unsigned int A) {
- // CHECK-LABEL: @test_tile_cvtrowps2phl(
- // CHECK: call <32 x half> @llvm.x86.tcvtrowps2phl(i8 1, i32 %{{.*}})
- return _tile_cvtrowps2phl(1, A);
-}
-
-__m512i test_tile_movrow(unsigned int A) {
- // CHECK-LABEL: @test_tile_movrow
- // CHECK: %1 = call <16 x i32> @llvm.x86.tilemovrow(i8 1, i32 %{{.*}})
- return _tile_movrow(1, A);
-}
diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c
index 2033a8b4c335f9..593ccffbcda095 100644
--- a/clang/test/CodeGen/attr-target-x86.c
+++ b/clang/test/CodeGen/attr-target-x86.c
@@ -59,10 +59,10 @@ void __attribute__((target("avx10.1-512"))) avx10_1_512(void) {}
// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="i686"
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
// CHECK-NOT: tune-cpu
-// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
+// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
-// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-amx-avx512,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
-// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-amx-avx512,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-vaes"
+// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
+// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-vaes"
// CHECK-NOT: tune-cpu
// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-mmx"
// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx"
@@ -76,5 +76,5 @@ void __attribute__((target("avx10.1-512"))) avx10_1_512(void) {}
// CHECK: "target-cpu"="x86-64-v4"
// CHECK-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
-// CHECK: #12 = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave,-amx-avx512,-avx10.1-512,-avx10.2-512,-evex512"
+// CHECK: #12 = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave,-avx10.1-512,-avx10.2-512,-evex512"
// CHECK: #13 = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx10.1-512,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave"
diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c
index 822c997f71744f..e8c439ab48f21f 100644
--- a/clang/test/Driver/x86-target-features.c
+++ b/clang/test/Driver/x86-target-features.c
@@ -311,13 +311,6 @@
// AMX-TRANSPOSE: "-target-feature" "+amx-transpose"
// NO-AMX-TRANSPOSE: "-target-feature" "-amx-transpose"
-// RUN: %clang -target x86_64-unknown-linux-gnu -mamx-avx512 %s \
-// RUN: -### -o %t.o 2>&1 | FileCheck -check-prefix=AMX-AVX512 %s
-// RUN: %clang -target x86_64-unknown-linux-gnu -mno-amx-avx512 %s \
-// RUN: -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AMX-AVX512 %s
-// AMX-AVX512: "-target-feature" "+amx-avx512"
-// NO-AMX-AVX512: "-target-feature" "-amx-avx512"
-
// RUN: %clang --target=i386 -march=i386 -mhreset %s -### 2>&1 | FileCheck -check-prefix=HRESET %s
// RUN: %clang --target=i386 -march=i386 -mno-hreset %s -### 2>&1 | FileCheck -check-prefix=NO-HRESET %s
// HRESET: "-target-feature" "+hreset"
diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c
index 8e4ddb1526626e..c240b27c91a479 100644
--- a/clang/test/Preprocessor/x86_target_features.c
+++ b/clang/test/Preprocessor/x86_target_features.c
@@ -558,18 +558,6 @@
// NO-AMX-TRANSPOSE-NOT: #define __AMX_TRANSPOSE__ 1
-// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mamx-avx512 -x c \
-// RUN: -E -dM -o - %s | FileCheck -check-prefix=AMX-AVX512 %s
-
-// AMX-AVX512: #define __AMX_AVX512__ 1
-
-// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mno-amx-avx512 -x c \
-// RUN: -E -dM -o - %s | FileCheck -check-prefix=NO-AMX-AVX512 %s
-// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mamx-avx512 -mno-amx-tile \
-// RUN: -x c -E -dM -o - %s | FileCheck -check-prefix=NO-AMX-AVX512 %s
-
-// NO-AMX-AVX512-NOT: #define __AMX_AVX512__ 1
-
// RUN: %clang -target i386-unknown-unknown -march=atom -mavxvnni -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVXVNNI %s
// AVXVNNI: #define __AVX2__ 1
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 3003f9887e239c..c42397024e45a7 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -5952,26 +5952,6 @@ let TargetPrefix = "x86" in {
Intrinsic<[], [llvm_i8_ty, llvm_i8_ty],
[ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
- // AMX-AVX512
- def int_x86_tcvtrowd2ps : ClangBuiltin<"__builtin_ia32_tcvtrowd2ps">,
- Intrinsic<[llvm_v16f32_ty], [llvm_i8_ty, llvm_i32_ty],
- [ImmArg<ArgIndex<0>>]>;
- def int_x86_tcvtrowps2pbf16h : ClangBuiltin<"__builtin_ia32_tcvtrowps2pbf16h">,
- Intrinsic<[llvm_v32bf16_ty], [llvm_i8_ty, llvm_i32_ty],
- [ImmArg<ArgIndex<0>>]>;
- def int_x86_tcvtrowps2pbf16l : ClangBuiltin<"__builtin_ia32_tcvtrowps2pbf16l">,
- Intrinsic<[llvm_v32bf16_ty], [llvm_i8_ty, llvm_i32_ty],
- [ImmArg<ArgIndex<0>>]>;
- def int_x86_tcvtrowps2phh : ClangBuiltin<"__builtin_ia32_tcvtrowps2phh">,
- Intrinsic<[llvm_v32f16_ty], [llvm_i8_ty, llvm_i32_ty],
- [ImmArg<ArgIndex<0>>]>;
- def int_x86_tcvtrowps2phl : ClangBuiltin<"__builtin_ia32_tcvtrowps2phl">,
- Intrinsic<[llvm_v32f16_ty], [llvm_i8_ty, llvm_i32_ty],
- [ImmArg<ArgIndex<0>>]>;
- def int_x86_tilemovrow : ClangBuiltin<"__builtin_ia32_tilemovrow">,
- Intrinsic<[llvm_v16i32_ty], [llvm_i8_ty, llvm_i32_ty],
- [ImmArg<ArgIndex<0>>]>;
-
// AMX - internal intrinsics
def int_x86_ldtilecfg_internal :
ClangBuiltin<"__builtin_ia32_tile_loadconfig_internal">,
@@ -6070,37 +6050,6 @@ let TargetPrefix = "x86" in {
ClangBuiltin<"__builtin_ia32_ttransposed_internal">,
Intrinsic<[llvm_x86amx_ty],
[llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty], []>;
-
- def int_x86_tcvtrowd2ps_internal :
- ClangBuiltin<"__builtin_ia32_tcvtrowd2ps_internal">,
- Intrinsic<[llvm_v16f32_ty],
- [llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_i32_ty],
- []>;
- def int_x86_tcvtrowps2pbf16h_internal :
- ClangBuiltin<"__builtin_ia32_tcvtrowps2pbf16h_internal">,
- Intrinsic<[llvm_v32bf16_ty],
- [llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_i32_ty],
- []>;
- def int_x86_tcvtrowps2pbf16l_internal :
- ClangBuiltin<"__builtin_ia32_tcvtrowps2pbf16l_internal">,
- Intrinsic<[llvm_v32bf16_ty],
- [llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_i32_ty],
- []>;
- def int_x86_tcvtrowps2phh_internal :
- ClangBuiltin<"__builtin_ia32_tcvtrowps2phh_internal">,
- Intrinsic<[llvm_v32f16_ty],
- [llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_i32_ty],
- []>;
- def int_x86_tcvtrowps2phl_internal :
- ClangBuiltin<"__builtin_ia32_tcvtrowps2phl_internal">,
- Intrinsic<[llvm_v32f16_ty],
- [llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_i32_ty],
- []>;
- def int_x86_tilemovrow_internal :
- ClangBuiltin<"__builtin_ia32_tilemovrow_internal">,
- Intrinsic<[llvm_v16i32_ty],
- [llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_i32_ty],
- []>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index 815556e374bef5..a62b4df420ec6a 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -266,7 +266,6 @@ X86_FEATURE (MOVRS, "movrs")
X86_FEATURE (ZU, "zu")
X86_FEATURE (AMX_FP8, "amx-fp8")
X86_FEATURE (AMX_TRANSPOSE, "amx-transpose")
-X86_FEATURE (AMX_AVX512, "amx-avx512")
// These features aren't really CPU features, but the frontend can set them.
X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk")
X86_FEATURE (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches")
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 59780ba5b99fcf..160e7c0fc0310a 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -276,10 +276,6 @@ def FeatureAMXFP8 : SubtargetFeature<"amx-fp8", "HasAMXFP8", "true",
def FeatureAMXTRANSPOSE : SubtargetFeature<"amx-transpose", "HasAMXTRANSPOSE", "true",
"Support AMX amx-transpose instructions",
[FeatureAMXTILE]>;
-def FeatureAMXAVX512 : SubtargetFeature<"amx-avx512",
- "HasAMXAVX512", "true",
- "Support AMX-AVX512 instructions",
- [FeatureAMXTILE]>;
def FeatureCMPCCXADD : SubtargetFeature<"cmpccxadd", "HasCMPCCXADD", "true",
"Support CMPCCXADD instructions">;
def FeatureRAOINT : SubtargetFeature<"raoint", "HasRAOINT", "true",
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 9511a82f0e97d2..f832955d1202fa 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -559,68 +559,12 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
return true;
}
case X86::PTILELOADDV:
- case X86::PTILELOADDT1V:
- case X86::PTCVTROWD2PSrreV:
- case X86::PTCVTROWD2PSrriV:
- case X86::PTCVTROWPS2PBF16HrreV:
- case X86::PTCVTROWPS2PBF16HrriV:
- case X86::PTCVTROWPS2PBF16LrreV:
- case X86::PTCVTROWPS2PBF16LrriV:
- case X86::PTCVTROWPS2PHHrreV:
- case X86::PTCVTROWPS2PHHrriV:
- case X86::PTCVTROWPS2PHLrreV:
- case X86::PTCVTROWPS2PHLrriV:
- case X86::PTILEMOVROWrreV:
- case X86::PTILEMOVROWrriV: {
+ case X86::PTILELOADDT1V: {
for (unsigned i = 2; i > 0; --i)
MI.removeOperand(i);
- unsigned Opc;
- switch (Opcode) {
- case X86::PTILELOADDV:
- Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD);
- break;
- case X86::PTILELOADDT1V:
- Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDT1);
- break;
- case X86::PTCVTROWD2PSrreV:
- Opc = X86::TCVTROWD2PSrre;
- break;
- case X86::PTCVTROWD2PSrriV:
- Opc = X86::TCVTROWD2PSrri;
- break;
- case X86::PTCVTROWPS2PBF16HrreV:
- Opc = X86::TCVTROWPS2PBF16Hrre;
- break;
- case X86::PTCVTROWPS2PBF16HrriV:
- Opc = X86::TCVTROWPS2PBF16Hrri;
- break;
- case X86::PTCVTROWPS2PBF16LrreV:
- Opc = X86::TCVTROWPS2PBF16Lrre;
- break;
- case X86::PTCVTROWPS2PBF16LrriV:
- Opc = X86::TCVTROWPS2PBF16Lrri;
- break;
- case X86::PTCVTROWPS2PHHrreV:
- Opc = X86::TCVTROWPS2PHHrre;
- break;
- case X86::PTCVTROWPS2PHHrriV:
- Opc = X86::TCVTROWPS2PHHrri;
- break;
- case X86::PTCVTROWPS2PHLrreV:
- Opc = X86::TCVTROWPS2PHLrre;
- break;
- case X86::PTCVTROWPS2PHLrriV:
- Opc = X86::TCVTROWPS2PHLrri;
- break;
- case X86::PTILEMOVROWrreV:
- Opc = X86::TILEMOVROWrre;
- break;
- case X86::PTILEMOVROWrriV:
- Opc = X86::TILEMOVROWrri;
- break;
- default:
- llvm_unreachable("Unexpected Opcode");
- }
+ unsigned Opc = Opcode == X86::PTILELOADDV
+ ? GET_EGPR_IF_ENABLED(X86::TILELOADD)
+ : GET_EGPR_IF_ENABLED(X86::TILELOADDT1);
MI.setDesc(TII->get(Opc));
return true;
}
@@ -770,8 +714,7 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
case X86::PTDPBUUDV: Opc = X86::TDPBUUD; break;
case X86::PTDPBF16PSV: Opc = X86::TDPBF16PS; break;
case X86::PTDPFP16PSV: Opc = X86::TDPFP16PS; break;
- default:
- llvm_unreachable("Unexpected Opcode");
+ default: llvm_unreachable("Impossible Opcode!");
}
MI.setDesc(TII->get(Opc));
MI.tieOperands(0, 1);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 19a85a6d7ec6ce..91e48f1e77db12 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37613,82 +37613,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.eraseFromParent(); // The pseudo is gone now.
return BB;
}
- case X86::PTCVTROWPS2PBF16Hrri:
- case X86::PTCVTROWPS2PBF16Lrri:
- case X86::PTCVTROWPS2PHHrri:
- case X86::PTCVTROWPS2PHLrri:
- case X86::PTCVTROWD2PSrri:
- case X86::PTILEMOVROWrri: {
- const DebugLoc &DL = MI.getDebugLoc();
- unsigned Opc;
- switch (MI.getOpcode()) {
- default:
- llvm_unreachable("Unexpected instruction!");
- case X86::PTCVTROWD2PSrri:
- Opc = X86::TCVTROWD2PSrri;
- break;
- case X86::PTCVTROWPS2PBF16Hrri:
- Opc = X86::TCVTROWPS2PBF16Hrri;
- break;
- case X86::PTCVTROWPS2PHHrri:
- Opc = X86::TCVTROWPS2PHHrri;
- break;
- case X86::PTCVTROWPS2PBF16Lrri:
- Opc = X86::TCVTROWPS2PBF16Lrri;
- break;
- case X86::PTCVTROWPS2PHLrri:
- Opc = X86::TCVTROWPS2PHLrri;
- break;
- case X86::PTILEMOVROWrri:
- Opc = X86::TILEMOVROWrri;
- break;
- }
- MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
- MIB.add(MI.getOperand(0));
- MIB.addReg(TMMImmToTMMReg(MI.getOperand(1).getImm()), RegState::Undef);
- MIB.addImm(MI.getOperand(2).getImm());
-
- MI.eraseFromParent(); // The pseudo is gone now.
- return BB;
- }
- case X86::PTCVTROWPS2PBF16Hrre:
- case X86::PTCVTROWPS2PBF16Lrre:
- case X86::PTCVTROWPS2PHHrre:
- case X86::PTCVTROWPS2PHLrre:
- case X86::PTCVTROWD2PSrre:
- case X86::PTILEMOVROWrre: {
- const DebugLoc &DL = MI.getDebugLoc();
- unsigned Opc;
- switch (MI.getOpcode()) {
- default:
- llvm_unreachable("Unexpected instruction!");
- case X86::PTCVTROWD2PSrre:
- Opc = X86::TCVTROWD2PSrre;
- break;
- case X86::PTCVTROWPS2PBF16Hrre:
- Opc = X86::TCVTROWPS2PBF16Hrre;
- break;
- case X86::PTCVTROWPS2PBF16Lrre:
- Opc = X86::TCVTROWPS2PBF16Lrre;
- break;
- case X86::PTCVTROWPS2PHHrre:
- Opc = X86::TCVTROWPS2PHHrre;
- break;
- case X86::PTCVTROWPS2PHLrre:
- Opc = X86::TCVTROWPS2PHLrre;
- break;
- case X86::PTILEMOVROWrre:
- Opc = X86::TILEMOVROWrre;
- break;
- }
- MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
- MIB.add(MI.getOperand(0));
- MIB.addReg(TMMImmToTMMReg(MI.getOperand(1).getImm()), RegState::Undef);
- MIB.add(MI.getOperand(2));
-
- MI.eraseFromParent(); // The pseudo is gone now.
- return BB;
- }
}
}
diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td
index b954c977f8c6c9..947a8bec2890ef 100644
--- a/llvm/lib/Target/X86/X86InstrAMX.td
+++ b/llvm/lib/Target/X86/X86InstrAMX.td
@@ -369,150 +369,3 @@ let Predicates = [HasAMXTRANSPOSE, In64BitMode] in {
}
}
} // HasAMXTILE, HasAMXTRANSPOSE
-
-multiclass m_tcvtrowd2ps {
- let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
- let SchedRW = [WriteSystem] in {
- def rri : Ii8<0x7, MRMSrcReg, (outs VR512:$dst),
- (ins TILE:$src1, i32u8imm:$src2),
- "tcvtrowd2ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, TA,XS, EVEX, EVEX_V512;
- def rre : I<0x4A, MRMSrcReg4VOp3, (outs VR512:$dst),
- (ins TILE:$src1, GR32:$src2),
- "tcvtrowd2ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, T8,XS, EVEX, VVVV, EVEX_V512;
- }
- } // HasAMXAVX512, HasAVX10_2_512, In64BitMode
-}
-
-defm TCVTROWD2PS : m_tcvtrowd2ps;
-
-let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
- let SchedRW = [WriteSystem] in {
- let usesCustomInserter = 1 in {
- def PTCVTROWD2PSrri : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2),
- [(set VR512:$dst, (int_x86_tcvtrowd2ps timm:$src1, imm:$src2))]>;
- def PTCVTROWD2PSrre : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2),
- [(set VR512:$dst, (int_x86_tcvtrowd2ps timm:$src1, GR32:$src2))]>;
- }
-
- def PTCVTROWD2PSrriV : PseudoI<(outs VR512:$dst),
- (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
- [(set VR512: $dst,
- (int_x86_tcvtrowd2ps_internal GR16:$src1, GR16:$src2,
- TILE:$src3, imm:$src4))]>;
- def PTCVTROWD2PSrreV : PseudoI<(outs VR512:$dst),
- (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
- [(set VR512: $dst,
- (int_x86_tcvtrowd2ps_internal GR16:$src1, GR16:$src2,
- TILE:$src3, GR32:$src4))]>;
- def PTCVTROWPS2PBF16HrriV : PseudoI<(outs VR512:$dst),
- (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
- [(set VR512: $dst,
- (int_x86_tcvtrowps2pbf16h_internal GR16:$src1, GR16:$src2,
- TILE:$src3, imm:$src4))]>;
- def PTCVTROWPS2PBF16HrreV : PseudoI<(outs VR512:$dst),
- (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
- [(set VR512: $dst,
- (int_x86_tcvtrowps2pbf16h_internal GR16:$src1, GR16:$src2,
- TILE:$src3, GR32:$src4))]>;
- def PTCVTROWPS2PBF16LrriV : PseudoI<(outs VR512:$dst),
- (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
- [(set VR512: $dst,
- (int_x86_tcvtrowps2pbf16l_internal GR16:$src1, GR16:$src2,
- TILE:$src3, imm:$src4))]>;
- def PTCVTROWPS2PBF16LrreV : PseudoI<(outs VR512:$dst),
- (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
- [(set VR512: $dst,
- (int_x86_tcvtrowps2pbf16l_internal GR16:$src1, GR16:$src2,
- TILE:$src3, GR32:$src4))]>;
- def PTCVTROWPS2PHHrriV : PseudoI<(outs VR512:$dst),
- (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
- [(set VR512: $dst,
- (int_x86_tcvtrowps2phh_internal GR16:$src1, GR16:$src2,
- TILE:$src3, imm:$src4))]>;
- def PTCVTROWPS2PHHrreV : PseudoI<(outs VR512:$dst),
- (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
- [(set VR512: $dst,
- (int_x86_tcvtrowps2phh_internal GR16:$src1, GR16:$src2,
- TILE:$src3, GR32:$src4))]>;
- def PTCVTROWPS2PHLrriV : PseudoI<(outs VR512:$dst),
- (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
- [(set VR512: $dst,
- (int_x86_tcvtrowps2phl_internal GR16:$src1, GR16:$src2,
- TILE:$src3, imm:$src4))]>;
- def PTCVTROWPS2PHLrreV : PseudoI<(outs VR512:$dst),
- (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
- [(set VR512: $dst,
- (int_x86_tcvtrowps2phl_internal GR16:$src1, GR16:$src2,
- TILE:$src3, GR32:$src4))]>;
- }
-}
-
-multiclass AMXAVX512_BASE<bits<8> Opcode1, bits<8> Opcode2, string Opstr,
- Prefix P1, Prefix P2> {
- let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode], SchedRW = [WriteSystem] in {
- let OpPrefix = P1 in
- def rre : I<Opcode1, MRMSrcReg4VOp3, (outs VR512:$dst),
- (ins TILE:$src1, GR32:$src2),
- !strconcat(Opstr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, EVEX, VVVV, EVEX_V512, T8;
- let OpPrefix = P2 in
- def rri : Ii8<Opcode2, MRMSrcReg, (outs VR512:$dst),
- (ins TILE:$src1, i32u8imm:$src2),
- !strconcat(Opstr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, EVEX, EVEX_V512, TA;
- let usesCustomInserter = 1 in {
- def "P"#NAME#"rre" : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2),
- [(set VR512:$dst,
- (!cast<Intrinsic>("int_x86_"#Opstr) timm:$src1, GR32:$src2))]>;
- def "P"#NAME#"rri" : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2),
- [(set VR512:$dst,
- (!cast<Intrinsic>("int_x86_"#Opstr) timm:$src1, imm:$src2))]>;
- }
- }
-}
-
-defm TCVTROWPS2PHH : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2phh", PS, PS>;
-defm TCVTROWPS2PHL : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2phl", PD, XD>;
-defm TCVTROWPS2PBF16H : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2pbf16h", XD, XD>;
-defm TCVTROWPS2PBF16L : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2pbf16l", XS, XS>;
-
-multiclass m_tilemovrow {
- let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
- let SchedRW = [WriteSystem] in {
- def rri : Ii8<0x7, MRMSrcReg, (outs VR512:$dst),
- (ins TILE:$src1, u8imm:$src2),
- "tilemovrow\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, TA,PD, EVEX, EVEX_V512;
- def rre : I<0x4A, MRMSrcReg4VOp3, (outs VR512:$dst),
- (ins TILE:$src1, GR32:$src2),
- "tilemovrow\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, T8,PD, EVEX, VVVV, EVEX_V512;
- }
- } // HasAMXAVX512, HasAVX10_2_512, In64BitMode
-}
-
-defm TILEMOVROW : m_tilemovrow;
-
-let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
- let SchedRW = [WriteSystem] in {
- let usesCustomInserter = 1 in {
- def PTILEMOVROWrri : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2),
- [(set VR512:$dst, (int_x86_tilemovrow timm:$src1, imm:$src2))]>;
- def PTILEMOVROWrre : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2),
- [(set VR512:$dst, (int_x86_tilemovrow timm:$src1, GR32:$src2))]>;
- }
-
- def PTILEMOVROWrriV : PseudoI<(outs VR512:$dst),
- (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
- [(set VR512: $dst,
- (int_x86_tilemovrow_internal GR16:$src1, GR16:$src2,
- TILE:$src3, imm:$src4))]>;
- def PTILEMOVROWrreV : PseudoI<(outs VR512:$dst),
- (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
- [(set VR512: $dst,
- (int_x86_tilemovrow_internal GR16:$src1, GR16:$src2,
- TILE:$src3, GR32:$src4))]>;
- }
-}
diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td
index 2eb4e4fb941b29..d22e7dadaaa262 100644
--- a/llvm/lib/Target/X86/X86InstrPredicates.td
+++ b/llvm/lib/Target/X86/X86InstrPredicates.td
@@ -185,7 +185,6 @@ def HasAMXINT8 : Predicate<"Subtarget->hasAMXINT8()">;
def HasAMXCOMPLEX : Predicate<"Subtarget->hasAMXCOMPLEX()">;
def HasAMXFP8 : Predicate<"Subtarget->hasAMXFP8()">;
def HasAMXTRANSPOSE : Predicate<"Subtarget->hasAMXTRANSPOSE()">;
-def HasAMXAVX512 : Predicate<"Subtarget->hasAMXAVX512()">;
def HasUINTR : Predicate<"Subtarget->hasUINTR()">;
def HasUSERMSR : Predicate<"Subtarget->hasUSERMSR()">;
def HasCRC32 : Predicate<"Subtarget->hasCRC32()">;
diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp
index af6fb04295bdec..688e886cf3b13a 100644
--- a/llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -266,17 +266,6 @@ std::pair<Value *, Value *> ShapeCalculator::getShape(IntrinsicInst *II,
Col = getColFromRow(II, II->getArgOperand(0), 4);
break;
}
- case Intrinsic::x86_tcvtrowd2ps_internal:
- case Intrinsic::x86_tcvtrowps2pbf16h_internal:
- case Intrinsic::x86_tcvtrowps2pbf16l_internal:
- case Intrinsic::x86_tcvtrowps2phh_internal:
- case Intrinsic::x86_tcvtrowps2phl_internal:
- case Intrinsic::x86_tilemovrow_internal: {
- assert(OpNo == 2 && "Illegal Operand Number.");
- Row = II->getArgOperand(0);
- Col = II->getArgOperand(1);
- break;
- }
}
return std::make_pair(Row, Col);
diff --git a/llvm/lib/Target/X86/X86PreTileConfig.cpp b/llvm/lib/Target/X86/X86PreTileConfig.cpp
index d232a1d706549f..d20bfdcdb7f9c1 100644
--- a/llvm/lib/Target/X86/X86PreTileConfig.cpp
+++ b/llvm/lib/Target/X86/X86PreTileConfig.cpp
@@ -118,22 +118,10 @@ class X86PreTileConfig : public MachineFunctionPass {
bool isAMXInstruction(MachineInstr &MI) {
if (MI.isPHI() || MI.isDebugInstr() || MI.getNumOperands() < 3)
return false;
- switch (MI.getOpcode()) {
- case X86::PTILESTOREDV:
- case X86::PTCVTROWD2PSrreV:
- case X86::PTCVTROWD2PSrriV:
- case X86::PTCVTROWPS2PBF16HrreV:
- case X86::PTCVTROWPS2PBF16HrriV:
- case X86::PTCVTROWPS2PBF16LrreV:
- case X86::PTCVTROWPS2PBF16LrriV:
- case X86::PTCVTROWPS2PHHrreV:
- case X86::PTCVTROWPS2PHHrriV:
- case X86::PTCVTROWPS2PHLrreV:
- case X86::PTCVTROWPS2PHLrriV:
- case X86::PTILEMOVROWrreV:
- case X86::PTILEMOVROWrriV:
+
+ // PTILESTOREDV is the only exception that doesn't def a AMX register.
+ if (MI.getOpcode() == X86::PTILESTOREDV)
return true;
- }
// We can simply check if it is AMX instruction by its def.
// But we should exclude old API which uses physical registers.
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index a973aaaa4806e6..93911bc51a207d 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1880,7 +1880,6 @@ const StringMap<bool> sys::getHostCPUFeatures() {
!getX86CpuIDAndInfoEx(0x1e, 0x1, &EAX, &EBX, &ECX, &EDX);
Features["amx-fp8"] = HasLeaf1E && ((EAX >> 4) & 1) && HasAMXSave;
Features["amx-transpose"] = HasLeaf1E && ((EAX >> 5) & 1) && HasAMXSave;
- Features["amx-avx512"] = HasLeaf1E && ((EAX >> 7) & 1) && HasAMXSave;
bool HasLeaf24 =
MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX);
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index eb55e6fc9134c8..691809b6d4b5ad 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -600,8 +600,6 @@ constexpr FeatureBitset ImpliedFeaturesAMX_INT8 = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesAMX_COMPLEX = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesAMX_FP8 = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesAMX_TRANSPOSE = FeatureAMX_TILE;
-constexpr FeatureBitset ImpliedFeaturesAMX_AVX512 =
- FeatureAMX_TILE | FeatureAVX10_2_512;
constexpr FeatureBitset ImpliedFeaturesHRESET = {};
constexpr FeatureBitset ImpliedFeaturesPREFETCHI = {};
diff --git a/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll b/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll
deleted file mode 100644
index 71f8f231747fe7..00000000000000
--- a/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll
+++ /dev/null
@@ -1,171 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2-512 -mattr=+amx-avx512 -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2-512 -mattr=+amx-avx512 -verify-machineinstrs -enable-ipra | FileCheck -check-prefix=IPRA %s
-; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2-512 -mattr=+amx-avx512 -verify-machineinstrs | FileCheck -check-prefix=O0 %s
-
- at buf = dso_local global [3072 x i8] zeroinitializer, align 64
-
-define internal void @foo() {
-; CHECK-LABEL: foo:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: retq
-;
-; IPRA-LABEL: foo:
-; IPRA: # %bb.0: # %entry
-; IPRA-NEXT: retq
-;
-; O0-LABEL: foo:
-; O0: # %bb.0: # %entry
-; O0-NEXT: retq
-entry:
- ret void
-}
-
-define dso_local <16 x i32> @test_api(i16 signext %0, i16 signext %1) nounwind {
-; CHECK-LABEL: test_api:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pushq %rbp
-; CHECK-NEXT: pushq %r14
-; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: subq $2112, %rsp # imm = 0x840
-; CHECK-NEXT: movl %esi, %ebx
-; CHECK-NEXT: movl %edi, %ebp
-; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vmovups %zmm0, (%rsp)
-; CHECK-NEXT: movb $1, (%rsp)
-; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw %bx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb %bpl, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: ldtilecfg (%rsp)
-; CHECK-NEXT: movl $buf, %eax
-; CHECK-NEXT: movl $32, %ecx
-; CHECK-NEXT: movw $8, %r14w
-; CHECK-NEXT: tileloadd (%rax,%rcx), %tmm0
-; CHECK-NEXT: movabsq $64, %rax
-; CHECK-NEXT: tilestored %tmm0, 1088(%rsp,%rax) # 1024-byte Folded Spill
-; CHECK-NEXT: movl $buf+1024, %eax
-; CHECK-NEXT: tileloadd (%rax,%rcx), %tmm1
-; CHECK-NEXT: movabsq $64, %rax
-; CHECK-NEXT: tilestored %tmm1, 64(%rsp,%rax) # 1024-byte Folded Spill
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: callq foo
-; CHECK-NEXT: ldtilecfg (%rsp)
-; CHECK-NEXT: movabsq $64, %rax
-; CHECK-NEXT: tileloadd 64(%rsp,%rax), %tmm1 # 1024-byte Folded Reload
-; CHECK-NEXT: tilemovrow $2, %tmm1, %zmm0
-; CHECK-NEXT: tileloadd 1088(%rsp,%rax), %tmm0 # 1024-byte Folded Reload
-; CHECK-NEXT: tilemovrow $2, %tmm0, %zmm1
-; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; CHECK-NEXT: addq $2112, %rsp # imm = 0x840
-; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: popq %r14
-; CHECK-NEXT: popq %rbp
-; CHECK-NEXT: tilerelease
-; CHECK-NEXT: retq
-;
-; IPRA-LABEL: test_api:
-; IPRA: # %bb.0:
-; IPRA-NEXT: subq $72, %rsp
-; IPRA-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; IPRA-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp)
-; IPRA-NEXT: movb $1, {{[0-9]+}}(%rsp)
-; IPRA-NEXT: movw $8, {{[0-9]+}}(%rsp)
-; IPRA-NEXT: movb $8, {{[0-9]+}}(%rsp)
-; IPRA-NEXT: movw %si, {{[0-9]+}}(%rsp)
-; IPRA-NEXT: movb %dil, {{[0-9]+}}(%rsp)
-; IPRA-NEXT: ldtilecfg {{[0-9]+}}(%rsp)
-; IPRA-NEXT: movl $buf, %eax
-; IPRA-NEXT: movl $32, %ecx
-; IPRA-NEXT: movw $8, %dx
-; IPRA-NEXT: tileloadd (%rax,%rcx), %tmm0
-; IPRA-NEXT: movl $buf+1024, %eax
-; IPRA-NEXT: tileloadd (%rax,%rcx), %tmm1
-; IPRA-NEXT: callq foo
-; IPRA-NEXT: tilemovrow $2, %tmm1, %zmm0
-; IPRA-NEXT: tilemovrow $2, %tmm0, %zmm1
-; IPRA-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; IPRA-NEXT: addq $72, %rsp
-; IPRA-NEXT: tilerelease
-; IPRA-NEXT: retq
-;
-; O0-LABEL: test_api:
-; O0: # %bb.0:
-; O0-NEXT: pushq %rbp
-; O0-NEXT: movq %rsp, %rbp
-; O0-NEXT: andq $-1024, %rsp # imm = 0xFC00
-; O0-NEXT: subq $4096, %rsp # imm = 0x1000
-; O0-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; O0-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp)
-; O0-NEXT: movb $1, {{[0-9]+}}(%rsp)
-; O0-NEXT: movw %si, %cx
-; O0-NEXT: movw %cx, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; O0-NEXT: movw %di, %ax
-; O0-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; O0-NEXT: movl $buf, %esi
-; O0-NEXT: movl $32, %edi
-; O0-NEXT: movw $8, %dx
-; O0-NEXT: # implicit-def: $al
-; O0-NEXT: movb %al, {{[0-9]+}}(%rsp)
-; O0-NEXT: movw %dx, {{[0-9]+}}(%rsp)
-; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp)
-; O0-NEXT: tileloadd (%rsi,%rdi), %tmm0
-; O0-NEXT: movl $64, %edi
-; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
-; O0-NEXT: movw $8, %dx
-; O0-NEXT: tilestored %tmm0, (%rsi,%rdi)
-; O0-NEXT: movl $32, %esi
-; O0-NEXT: movl $buf+1024, %edx
-; O0-NEXT: movw $8, %ax
-; O0-NEXT: # implicit-def: $al
-; O0-NEXT: movb %al, {{[0-9]+}}(%rsp)
-; O0-NEXT: movw %cx, {{[0-9]+}}(%rsp)
-; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp)
-; O0-NEXT: tileloadd (%rdx,%rsi), %tmm0
-; O0-NEXT: movl $64, %esi
-; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
-; O0-NEXT: movw $8, %ax
-; O0-NEXT: tilestored %tmm0, (%rdx,%rsi)
-; O0-NEXT: vzeroupper
-; O0-NEXT: callq foo
-; O0-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %dx # 2-byte Reload
-; O0-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload
-; O0-NEXT: movl $64, %edi
-; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
-; O0-NEXT: movw $8, %cx
-; O0-NEXT: # implicit-def: $cl
-; O0-NEXT: movb %cl, {{[0-9]+}}(%rsp)
-; O0-NEXT: movw %dx, {{[0-9]+}}(%rsp)
-; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp)
-; O0-NEXT: tileloadd (%rsi,%rdi), %tmm0
-; O0-NEXT: movw $8, %cx
-; O0-NEXT: tilemovrow $2, %tmm0, %zmm0
-; O0-NEXT: movl $64, %esi
-; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
-; O0-NEXT: movw $8, %cx
-; O0-NEXT: # implicit-def: $al
-; O0-NEXT: movb %al, {{[0-9]+}}(%rsp)
-; O0-NEXT: movw %cx, {{[0-9]+}}(%rsp)
-; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp)
-; O0-NEXT: tileloadd (%rdx,%rsi), %tmm0
-; O0-NEXT: movw $8, %cx
-; O0-NEXT: tilemovrow $2, %tmm0, %zmm1
-; O0-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; O0-NEXT: movq %rbp, %rsp
-; O0-NEXT: popq %rbp
-; O0-NEXT: tilerelease
-; O0-NEXT: retq
- %3 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %0, i16 8, ptr @buf, i64 32)
- %4 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %1, ptr getelementptr inbounds ([3072 x i8], ptr @buf, i64 0, i64 1024), i64 32)
- call void @foo()
- %5 = call <16 x i32> @llvm.x86.tilemovrow.internal(i16 8, i16 %1, x86_amx %4, i32 2)
- %6 = call <16 x i32> @llvm.x86.tilemovrow.internal(i16 %0, i16 8, x86_amx %3, i32 2)
- %7 = add <16 x i32> %5, %6
- ret <16 x i32> %7
-}
-
-
-declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, ptr, i64)
-declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
-declare void @llvm.x86.tilestored64.internal(i16, i16, ptr, i64, x86_amx)
-declare <16 x i32> @llvm.x86.tilemovrow.internal(i16, i16, x86_amx, i32)
diff --git a/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll b/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll
deleted file mode 100644
index da7fedee88821b..00000000000000
--- a/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll
+++ /dev/null
@@ -1,116 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+amx-tile,+amx-avx512,+avx10.2-512 | FileCheck %s
-
-define <16 x float> @test_tcvtrowd2ps(i32 %A) {
-; CHECK-LABEL: test_tcvtrowd2ps:
-; CHECK: # %bb.0:
-; CHECK-NEXT: tcvtrowd2ps %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x46,0x48,0x4a,0xc1]
-; CHECK-NEXT: retq # encoding: [0xc3]
- %ret = call <16 x float> @llvm.x86.tcvtrowd2ps(i8 1, i32 %A)
- ret <16 x float> %ret
-}
-
-define <16 x float> @test_tcvtrowd2psi() {
-; CHECK-LABEL: test_tcvtrowd2psi:
-; CHECK: # %bb.0:
-; CHECK-NEXT: tcvtrowd2ps $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7e,0x48,0x07,0xc1,0x7f]
-; CHECK-NEXT: retq # encoding: [0xc3]
- %ret = call <16 x float> @llvm.x86.tcvtrowd2ps(i8 1, i32 127)
- ret <16 x float> %ret
-}
-declare <16 x float> @llvm.x86.tcvtrowd2ps(i8 %A, i32 %B)
-
-define <32 x bfloat> @test_tcvtrowps2pbf16h(i32 %A) {
-; CHECK-LABEL: test_tcvtrowps2pbf16h:
-; CHECK: # %bb.0:
-; CHECK-NEXT: tcvtrowps2pbf16h %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x47,0x48,0x6d,0xc1]
-; CHECK-NEXT: retq # encoding: [0xc3]
- %ret = call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h(i8 1, i32 %A)
- ret <32 x bfloat> %ret
-}
-
-define <32 x bfloat> @test_tcvtrowps2pbf16hi() {
-; CHECK-LABEL: test_tcvtrowps2pbf16hi:
-; CHECK: # %bb.0:
-; CHECK-NEXT: tcvtrowps2pbf16h $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x07,0xc1,0x7f]
-; CHECK-NEXT: retq # encoding: [0xc3]
- %ret = call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h(i8 1, i32 127)
- ret <32 x bfloat> %ret
-}
-declare <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h(i8 %A, i32 %B)
-
-define <32 x bfloat> @test_tcvtrowps2pbf16l(i32 %A) {
-; CHECK-LABEL: test_tcvtrowps2pbf16l:
-; CHECK: # %bb.0:
-; CHECK-NEXT: tcvtrowps2pbf16l %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x46,0x48,0x6d,0xc1]
-; CHECK-NEXT: retq # encoding: [0xc3]
- %ret = call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l(i8 1, i32 %A)
- ret <32 x bfloat> %ret
-}
-
-define <32 x bfloat> @test_tcvtrowps2pbf16li() {
-; CHECK-LABEL: test_tcvtrowps2pbf16li:
-; CHECK: # %bb.0:
-; CHECK-NEXT: tcvtrowps2pbf16l $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7e,0x48,0x77,0xc1,0x7f]
-; CHECK-NEXT: retq # encoding: [0xc3]
- %ret = call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l(i8 1, i32 127)
- ret <32 x bfloat> %ret
-}
-declare <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l(i8 %A, i32 %B)
-
-define <32 x half> @test_tcvtrowps2phh(i32 %A) {
-; CHECK-LABEL: test_tcvtrowps2phh:
-; CHECK: # %bb.0:
-; CHECK-NEXT: tcvtrowps2phh %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x44,0x48,0x6d,0xc1]
-; CHECK-NEXT: retq # encoding: [0xc3]
- %ret = call <32 x half> @llvm.x86.tcvtrowps2phh(i8 1, i32 %A)
- ret <32 x half> %ret
-}
-
-define <32 x half> @test_tcvtrowps2phhi() {
-; CHECK-LABEL: test_tcvtrowps2phhi:
-; CHECK: # %bb.0:
-; CHECK-NEXT: tcvtrowps2phh $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7c,0x48,0x07,0xc1,0x7f]
-; CHECK-NEXT: retq # encoding: [0xc3]
- %ret = call <32 x half> @llvm.x86.tcvtrowps2phh(i8 1, i32 127)
- ret <32 x half> %ret
-}
-declare <32 x half> @llvm.x86.tcvtrowps2phh(i8 %A, i32 %B)
-
-define <32 x half> @test_tcvtrowps2phl(i32 %A) {
-; CHECK-LABEL: test_tcvtrowps2phl:
-; CHECK: # %bb.0:
-; CHECK-NEXT: tcvtrowps2phl %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x45,0x48,0x6d,0xc1]
-; CHECK-NEXT: retq # encoding: [0xc3]
- %ret = call <32 x half> @llvm.x86.tcvtrowps2phl(i8 1, i32 %A)
- ret <32 x half> %ret
-}
-
-define <32 x half> @test_tcvtrowps2phli() {
-; CHECK-LABEL: test_tcvtrowps2phli:
-; CHECK: # %bb.0:
-; CHECK-NEXT: tcvtrowps2phl $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x77,0xc1,0x7f]
-; CHECK-NEXT: retq # encoding: [0xc3]
- %ret = call <32 x half> @llvm.x86.tcvtrowps2phl(i8 1, i32 127)
- ret <32 x half> %ret
-}
-declare <32 x half> @llvm.x86.tcvtrowps2phl(i8 %A, i32 %B)
-
-define <16 x i32> @test_tilemovrow(i32 %A) {
-; CHECK-LABEL: test_tilemovrow:
-; CHECK: # %bb.0:
-; CHECK-NEXT: tilemovrow %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x45,0x48,0x4a,0xc1]
-; CHECK-NEXT: retq # encoding: [0xc3]
- %ret = call <16 x i32> @llvm.x86.tilemovrow(i8 1, i32 %A)
- ret <16 x i32> %ret
-}
-
-define <16 x i32> @test_tilemovrowi() {
-; CHECK-LABEL: test_tilemovrowi:
-; CHECK: # %bb.0:
-; CHECK-NEXT: tilemovrow $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x07,0xc1,0x7f]
-; CHECK-NEXT: retq # encoding: [0xc3]
- %ret = call <16 x i32> @llvm.x86.tilemovrow(i8 1, i32 127)
- ret <16 x i32> %ret
-}
-declare <16 x i32> @llvm.x86.tilemovrow(i8 %A, i32 %B)
diff --git a/llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll b/llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll
deleted file mode 100644
index b4a5c90bbea330..00000000000000
--- a/llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll
+++ /dev/null
@@ -1,61 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+avx10.2-512, \
-; RUN: -mattr=+amx-avx512 -verify-machineinstrs | FileCheck %s
-
-define void @test_amx(i8* %pointer, i8* %base, i32 %index, i64 %stride) {
-; CHECK-LABEL: test_amx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb $1, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb $8, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw $8, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: ldtilecfg -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movw $8, %ax
-; CHECK-NEXT: tileloadd (%rsi,%rcx), %tmm0
-; CHECK-NEXT: tcvtrowd2ps %edx, %tmm0, %zmm0
-; CHECK-NEXT: tcvtrowd2ps $16, %tmm0, %zmm0
-; CHECK-NEXT: tcvtrowps2pbf16h %edx, %tmm0, %zmm0
-; CHECK-NEXT: tcvtrowps2pbf16h $16, %tmm0, %zmm0
-; CHECK-NEXT: tcvtrowps2pbf16l %edx, %tmm0, %zmm0
-; CHECK-NEXT: tcvtrowps2pbf16l $16, %tmm0, %zmm0
-; CHECK-NEXT: tcvtrowps2phh %edx, %tmm0, %zmm0
-; CHECK-NEXT: tcvtrowps2phh $16, %tmm0, %zmm0
-; CHECK-NEXT: tcvtrowps2phl %edx, %tmm0, %zmm0
-; CHECK-NEXT: tcvtrowps2phl $16, %tmm0, %zmm0
-; CHECK-NEXT: tilemovrow %edx, %tmm0, %zmm0
-; CHECK-NEXT: tilemovrow $16, %tmm0, %zmm0
-; CHECK-NEXT: tilestored %tmm0, (%rdi,%rcx)
-; CHECK-NEXT: tilerelease
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
-
- %a = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, i8* %base, i64 %stride)
- call <16 x float> @llvm.x86.tcvtrowd2ps.internal(i16 8, i16 8, x86_amx %a, i32 %index)
- call <16 x float> @llvm.x86.tcvtrowd2ps.internal(i16 8, i16 8, x86_amx %a, i32 16)
- call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h.internal(i16 8, i16 8, x86_amx %a, i32 %index)
- call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h.internal(i16 8, i16 8, x86_amx %a, i32 16)
- call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l.internal(i16 8, i16 8, x86_amx %a, i32 %index)
- call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l.internal(i16 8, i16 8, x86_amx %a, i32 16)
- call <32 x half> @llvm.x86.tcvtrowps2phh.internal(i16 8, i16 8, x86_amx %a, i32 %index)
- call <32 x half> @llvm.x86.tcvtrowps2phh.internal(i16 8, i16 8, x86_amx %a, i32 16)
- call <32 x half> @llvm.x86.tcvtrowps2phl.internal(i16 8, i16 8, x86_amx %a, i32 %index)
- call <32 x half> @llvm.x86.tcvtrowps2phl.internal(i16 8, i16 8, x86_amx %a, i32 16)
- call <16 x i32> @llvm.x86.tilemovrow.internal(i16 8, i16 8, x86_amx %a, i32 %index)
- call <16 x i32> @llvm.x86.tilemovrow.internal(i16 8, i16 8, x86_amx %a, i32 16)
-
- call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %a)
- ret void
-}
-
-declare x86_amx @llvm.x86.tilezero.internal(i16, i16)
-declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, i8*, i64)
-declare x86_amx @llvm.x86.tileloaddt164.internal(i16, i16, i8*, i64)
-declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx)
-
-declare <16 x float> @llvm.x86.tcvtrowd2ps.internal(i16, i16, x86_amx, i32)
-declare <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h.internal(i16, i16, x86_amx, i32)
-declare <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l.internal(i16, i16, x86_amx, i32)
-declare <32 x half> @llvm.x86.tcvtrowps2phh.internal(i16, i16, x86_amx, i32)
-declare <32 x half> @llvm.x86.tcvtrowps2phl.internal(i16, i16, x86_amx, i32)
-declare <16 x i32> @llvm.x86.tilemovrow.internal(i16, i16, x86_amx, i32)
diff --git a/llvm/test/MC/Disassembler/X86/amx-avx512.txt b/llvm/test/MC/Disassembler/X86/amx-avx512.txt
deleted file mode 100644
index 0a162af1b4bc02..00000000000000
--- a/llvm/test/MC/Disassembler/X86/amx-avx512.txt
+++ /dev/null
@@ -1,106 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
-# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
-
-# ATT: tcvtrowd2ps %ecx, %tmm5, %zmm22
-# INTEL: tcvtrowd2ps zmm22, tmm5, ecx
-0x62,0xe2,0x76,0x48,0x4a,0xf5
-
-# ATT: tcvtrowd2ps %ecx, %tmm2, %zmm22
-# INTEL: tcvtrowd2ps zmm22, tmm2, ecx
-0x62,0xe2,0x76,0x48,0x4a,0xf2
-
-# ATT: tcvtrowd2ps $123, %tmm5, %zmm22
-# INTEL: tcvtrowd2ps zmm22, tmm5, 123
-0x62,0xe3,0x7e,0x48,0x07,0xf5,0x7b
-
-# ATT: tcvtrowd2ps $123, %tmm2, %zmm22
-# INTEL: tcvtrowd2ps zmm22, tmm2, 123
-0x62,0xe3,0x7e,0x48,0x07,0xf2,0x7b
-
-# ATT: tcvtrowps2pbf16h %ecx, %tmm5, %zmm22
-# INTEL: tcvtrowps2pbf16h zmm22, tmm5, ecx
-0x62,0xe2,0x77,0x48,0x6d,0xf5
-
-# ATT: tcvtrowps2pbf16h %ecx, %tmm2, %zmm22
-# INTEL: tcvtrowps2pbf16h zmm22, tmm2, ecx
-0x62,0xe2,0x77,0x48,0x6d,0xf2
-
-# ATT: tcvtrowps2pbf16h $123, %tmm5, %zmm22
-# INTEL: tcvtrowps2pbf16h zmm22, tmm5, 123
-0x62,0xe3,0x7f,0x48,0x07,0xf5,0x7b
-
-# ATT: tcvtrowps2pbf16h $123, %tmm2, %zmm22
-# INTEL: tcvtrowps2pbf16h zmm22, tmm2, 123
-0x62,0xe3,0x7f,0x48,0x07,0xf2,0x7b
-
-# ATT: tcvtrowps2pbf16l %ecx, %tmm5, %zmm22
-# INTEL: tcvtrowps2pbf16l zmm22, tmm5, ecx
-0x62,0xe2,0x76,0x48,0x6d,0xf5
-
-# ATT: tcvtrowps2pbf16l %ecx, %tmm2, %zmm22
-# INTEL: tcvtrowps2pbf16l zmm22, tmm2, ecx
-0x62,0xe2,0x76,0x48,0x6d,0xf2
-
-# ATT: tcvtrowps2pbf16l $123, %tmm5, %zmm22
-# INTEL: tcvtrowps2pbf16l zmm22, tmm5, 123
-0x62,0xe3,0x7e,0x48,0x77,0xf5,0x7b
-
-# ATT: tcvtrowps2pbf16l $123, %tmm2, %zmm22
-# INTEL: tcvtrowps2pbf16l zmm22, tmm2, 123
-0x62,0xe3,0x7e,0x48,0x77,0xf2,0x7b
-
-# ATT: tcvtrowps2phh %ecx, %tmm5, %zmm22
-# INTEL: tcvtrowps2phh zmm22, tmm5, ecx
-0x62,0xe2,0x74,0x48,0x6d,0xf5
-
-# ATT: tcvtrowps2phh %ecx, %tmm2, %zmm22
-# INTEL: tcvtrowps2phh zmm22, tmm2, ecx
-0x62,0xe2,0x74,0x48,0x6d,0xf2
-
-# ATT: tcvtrowps2phh $123, %tmm5, %zmm22
-# INTEL: tcvtrowps2phh zmm22, tmm5, 123
-0x62,0xe3,0x7c,0x48,0x07,0xf5,0x7b
-
-# ATT: tcvtrowps2phh $123, %tmm2, %zmm22
-# INTEL: tcvtrowps2phh zmm22, tmm2, 123
-0x62,0xe3,0x7c,0x48,0x07,0xf2,0x7b
-
-# ATT: tcvtrowps2phl %ecx, %tmm5, %zmm22
-# INTEL: tcvtrowps2phl zmm22, tmm5, ecx
-0x62,0xe2,0x75,0x48,0x6d,0xf5
-
-# ATT: tcvtrowps2phl %ecx, %tmm2, %zmm22
-# INTEL: tcvtrowps2phl zmm22, tmm2, ecx
-0x62,0xe2,0x75,0x48,0x6d,0xf2
-
-# ATT: tcvtrowps2phl $123, %tmm5, %zmm22
-# INTEL: tcvtrowps2phl zmm22, tmm5, 123
-0x62,0xe3,0x7f,0x48,0x77,0xf5,0x7b
-
-# ATT: tcvtrowps2phl $123, %tmm2, %zmm22
-# INTEL: tcvtrowps2phl zmm22, tmm2, 123
-0x62,0xe3,0x7f,0x48,0x77,0xf2,0x7b
-
-# ATT: tilemovrow %ecx, %tmm3, %zmm22
-# INTEL: tilemovrow zmm22, tmm3, ecx
-0x62,0xe2,0x75,0x48,0x4a,0xf3
-
-# ATT: tilemovrow %ecx, %tmm2, %zmm22
-# INTEL: tilemovrow zmm22, tmm2, ecx
-0x62,0xe2,0x75,0x48,0x4a,0xf2
-
-# ATT: tilemovrow $123, %tmm3, %zmm22
-# INTEL: tilemovrow zmm22, tmm3, 123
-0x62,0xe3,0x7d,0x48,0x07,0xf3,0x7b
-
-# ATT: tilemovrow $123, %tmm2, %zmm22
-# INTEL: tilemovrow zmm22, tmm2, 123
-0x62,0xe3,0x7d,0x48,0x07,0xf2,0x7b
-
-# ATT: tilemovrow %edx, %tmm0, %zmm22
-# INTEL: tilemovrow zmm22, tmm0, edx
-0x62,0xe2,0x6d,0x48,0x4a,0xf0
-
-# ATT: tilemovrow $123, %tmm0, %zmm22
-# INTEL: tilemovrow zmm22, tmm0, 123
-0x62,0xe3,0x7d,0x48,0x07,0xf0,0x7b
diff --git a/llvm/test/MC/X86/amx-avx512-att.s b/llvm/test/MC/X86/amx-avx512-att.s
deleted file mode 100644
index 6da4ede82c6217..00000000000000
--- a/llvm/test/MC/X86/amx-avx512-att.s
+++ /dev/null
@@ -1,105 +0,0 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding < %s | FileCheck %s
-
-// CHECK: tcvtrowd2ps %ecx, %tmm5, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x76,0x48,0x4a,0xf5]
- tcvtrowd2ps %ecx, %tmm5, %zmm22
-
-// CHECK: tcvtrowd2ps %ecx, %tmm2, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x76,0x48,0x4a,0xf2]
- tcvtrowd2ps %ecx, %tmm2, %zmm22
-
-// CHECK: tcvtrowd2ps $123, %tmm5, %zmm22
-// CHECK: encoding: [0x62,0xe3,0x7e,0x48,0x07,0xf5,0x7b]
- tcvtrowd2ps $123, %tmm5, %zmm22
-
-// CHECK: tcvtrowd2ps $123, %tmm2, %zmm22
-// CHECK: encoding: [0x62,0xe3,0x7e,0x48,0x07,0xf2,0x7b]
- tcvtrowd2ps $123, %tmm2, %zmm22
-
-// CHECK: tcvtrowps2pbf16h %ecx, %tmm5, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x77,0x48,0x6d,0xf5]
- tcvtrowps2pbf16h %ecx, %tmm5, %zmm22
-
-// CHECK: tcvtrowps2pbf16h %ecx, %tmm2, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x77,0x48,0x6d,0xf2]
- tcvtrowps2pbf16h %ecx, %tmm2, %zmm22
-
-// CHECK: tcvtrowps2pbf16h $123, %tmm5, %zmm22
-// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x07,0xf5,0x7b]
- tcvtrowps2pbf16h $123, %tmm5, %zmm22
-
-// CHECK: tcvtrowps2pbf16h $123, %tmm2, %zmm22
-// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x07,0xf2,0x7b]
- tcvtrowps2pbf16h $123, %tmm2, %zmm22
-
-// CHECK: tcvtrowps2pbf16l %ecx, %tmm5, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x76,0x48,0x6d,0xf5]
- tcvtrowps2pbf16l %ecx, %tmm5, %zmm22
-
-// CHECK: tcvtrowps2pbf16l %ecx, %tmm2, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x76,0x48,0x6d,0xf2]
- tcvtrowps2pbf16l %ecx, %tmm2, %zmm22
-
-// CHECK: tcvtrowps2pbf16l $123, %tmm5, %zmm22
-// CHECK: encoding: [0x62,0xe3,0x7e,0x48,0x77,0xf5,0x7b]
- tcvtrowps2pbf16l $123, %tmm5, %zmm22
-
-// CHECK: tcvtrowps2pbf16l $123, %tmm2, %zmm22
-// CHECK: encoding: [0x62,0xe3,0x7e,0x48,0x77,0xf2,0x7b]
- tcvtrowps2pbf16l $123, %tmm2, %zmm22
-
-// CHECK: tcvtrowps2phh %ecx, %tmm5, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x74,0x48,0x6d,0xf5]
- tcvtrowps2phh %ecx, %tmm5, %zmm22
-
-// CHECK: tcvtrowps2phh %ecx, %tmm2, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x74,0x48,0x6d,0xf2]
- tcvtrowps2phh %ecx, %tmm2, %zmm22
-
-// CHECK: tcvtrowps2phh $123, %tmm5, %zmm22
-// CHECK: encoding: [0x62,0xe3,0x7c,0x48,0x07,0xf5,0x7b]
- tcvtrowps2phh $123, %tmm5, %zmm22
-
-// CHECK: tcvtrowps2phh $123, %tmm2, %zmm22
-// CHECK: encoding: [0x62,0xe3,0x7c,0x48,0x07,0xf2,0x7b]
- tcvtrowps2phh $123, %tmm2, %zmm22
-
-// CHECK: tcvtrowps2phl %ecx, %tmm5, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x75,0x48,0x6d,0xf5]
- tcvtrowps2phl %ecx, %tmm5, %zmm22
-
-// CHECK: tcvtrowps2phl %ecx, %tmm2, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x75,0x48,0x6d,0xf2]
- tcvtrowps2phl %ecx, %tmm2, %zmm22
-
-// CHECK: tcvtrowps2phl $123, %tmm5, %zmm22
-// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x77,0xf5,0x7b]
- tcvtrowps2phl $123, %tmm5, %zmm22
-
-// CHECK: tcvtrowps2phl $123, %tmm2, %zmm22
-// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x77,0xf2,0x7b]
- tcvtrowps2phl $123, %tmm2, %zmm22
-
-// CHECK: tilemovrow %ecx, %tmm3, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x75,0x48,0x4a,0xf3]
- tilemovrow %ecx, %tmm3, %zmm22
-
-// CHECK: tilemovrow %ecx, %tmm2, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x75,0x48,0x4a,0xf2]
- tilemovrow %ecx, %tmm2, %zmm22
-
-// CHECK: tilemovrow $123, %tmm3, %zmm22
-// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x07,0xf3,0x7b]
- tilemovrow $123, %tmm3, %zmm22
-
-// CHECK: tilemovrow $123, %tmm2, %zmm22
-// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x07,0xf2,0x7b]
- tilemovrow $123, %tmm2, %zmm22
-
-// CHECK: tilemovrow %edx, %tmm0, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x6d,0x48,0x4a,0xf0]
- tilemovrow %edx, %tmm0, %zmm22
-
-// CHECK: tilemovrow $123, %tmm0, %zmm22
-// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x07,0xf0,0x7b]
- tilemovrow $123, %tmm0, %zmm22
diff --git a/llvm/test/MC/X86/amx-avx512-intel.s b/llvm/test/MC/X86/amx-avx512-intel.s
deleted file mode 100644
index 3a517a6cd1aabb..00000000000000
--- a/llvm/test/MC/X86/amx-avx512-intel.s
+++ /dev/null
@@ -1,105 +0,0 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
-
-// CHECK: tcvtrowd2ps zmm22, tmm5, ecx
-// CHECK: encoding: [0x62,0xe2,0x76,0x48,0x4a,0xf5]
- tcvtrowd2ps zmm22, tmm5, ecx
-
-// CHECK: tcvtrowd2ps zmm22, tmm2, ecx
-// CHECK: encoding: [0x62,0xe2,0x76,0x48,0x4a,0xf2]
- tcvtrowd2ps zmm22, tmm2, ecx
-
-// CHECK: tcvtrowd2ps zmm22, tmm5, 123
-// CHECK: encoding: [0x62,0xe3,0x7e,0x48,0x07,0xf5,0x7b]
- tcvtrowd2ps zmm22, tmm5, 123
-
-// CHECK: tcvtrowd2ps zmm22, tmm2, 123
-// CHECK: encoding: [0x62,0xe3,0x7e,0x48,0x07,0xf2,0x7b]
- tcvtrowd2ps zmm22, tmm2, 123
-
-// CHECK: tcvtrowps2pbf16h zmm22, tmm5, ecx
-// CHECK: encoding: [0x62,0xe2,0x77,0x48,0x6d,0xf5]
- tcvtrowps2pbf16h zmm22, tmm5, ecx
-
-// CHECK: tcvtrowps2pbf16h zmm22, tmm2, ecx
-// CHECK: encoding: [0x62,0xe2,0x77,0x48,0x6d,0xf2]
- tcvtrowps2pbf16h zmm22, tmm2, ecx
-
-// CHECK: tcvtrowps2pbf16h zmm22, tmm5, 123
-// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x07,0xf5,0x7b]
- tcvtrowps2pbf16h zmm22, tmm5, 123
-
-// CHECK: tcvtrowps2pbf16h zmm22, tmm2, 123
-// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x07,0xf2,0x7b]
- tcvtrowps2pbf16h zmm22, tmm2, 123
-
-// CHECK: tcvtrowps2pbf16l zmm22, tmm5, ecx
-// CHECK: encoding: [0x62,0xe2,0x76,0x48,0x6d,0xf5]
- tcvtrowps2pbf16l zmm22, tmm5, ecx
-
-// CHECK: tcvtrowps2pbf16l zmm22, tmm2, ecx
-// CHECK: encoding: [0x62,0xe2,0x76,0x48,0x6d,0xf2]
- tcvtrowps2pbf16l zmm22, tmm2, ecx
-
-// CHECK: tcvtrowps2pbf16l zmm22, tmm5, 123
-// CHECK: encoding: [0x62,0xe3,0x7e,0x48,0x77,0xf5,0x7b]
- tcvtrowps2pbf16l zmm22, tmm5, 123
-
-// CHECK: tcvtrowps2pbf16l zmm22, tmm2, 123
-// CHECK: encoding: [0x62,0xe3,0x7e,0x48,0x77,0xf2,0x7b]
- tcvtrowps2pbf16l zmm22, tmm2, 123
-
-// CHECK: tcvtrowps2phh zmm22, tmm5, ecx
-// CHECK: encoding: [0x62,0xe2,0x74,0x48,0x6d,0xf5]
- tcvtrowps2phh zmm22, tmm5, ecx
-
-// CHECK: tcvtrowps2phh zmm22, tmm2, ecx
-// CHECK: encoding: [0x62,0xe2,0x74,0x48,0x6d,0xf2]
- tcvtrowps2phh zmm22, tmm2, ecx
-
-// CHECK: tcvtrowps2phh zmm22, tmm5, 123
-// CHECK: encoding: [0x62,0xe3,0x7c,0x48,0x07,0xf5,0x7b]
- tcvtrowps2phh zmm22, tmm5, 123
-
-// CHECK: tcvtrowps2phh zmm22, tmm2, 123
-// CHECK: encoding: [0x62,0xe3,0x7c,0x48,0x07,0xf2,0x7b]
- tcvtrowps2phh zmm22, tmm2, 123
-
-// CHECK: tcvtrowps2phl zmm22, tmm5, ecx
-// CHECK: encoding: [0x62,0xe2,0x75,0x48,0x6d,0xf5]
- tcvtrowps2phl zmm22, tmm5, ecx
-
-// CHECK: tcvtrowps2phl zmm22, tmm2, ecx
-// CHECK: encoding: [0x62,0xe2,0x75,0x48,0x6d,0xf2]
- tcvtrowps2phl zmm22, tmm2, ecx
-
-// CHECK: tcvtrowps2phl zmm22, tmm5, 123
-// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x77,0xf5,0x7b]
- tcvtrowps2phl zmm22, tmm5, 123
-
-// CHECK: tcvtrowps2phl zmm22, tmm2, 123
-// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x77,0xf2,0x7b]
- tcvtrowps2phl zmm22, tmm2, 123
-
-// CHECK: tilemovrow zmm22, tmm3, ecx
-// CHECK: encoding: [0x62,0xe2,0x75,0x48,0x4a,0xf3]
- tilemovrow zmm22, tmm3, ecx
-
-// CHECK: tilemovrow zmm22, tmm2, ecx
-// CHECK: encoding: [0x62,0xe2,0x75,0x48,0x4a,0xf2]
- tilemovrow zmm22, tmm2, ecx
-
-// CHECK: tilemovrow zmm22, tmm3, 123
-// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x07,0xf3,0x7b]
- tilemovrow zmm22, tmm3, 123
-
-// CHECK: tilemovrow zmm22, tmm2, 123
-// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x07,0xf2,0x7b]
- tilemovrow zmm22, tmm2, 123
-
-// CHECK: tilemovrow zmm22, tmm0, edx
-// CHECK: encoding: [0x62,0xe2,0x6d,0x48,0x4a,0xf0]
- tilemovrow zmm22, tmm0, edx
-
-// CHECK: tilemovrow zmm22, tmm0, 123
-// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x07,0xf0,0x7b]
- tilemovrow zmm22, tmm0, 123
More information about the cfe-commits
mailing list