[clang] [llvm] [X86][AVX10.2] Decouple AMX-AVX512 from AVX10.2 (PR #148633)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 14 06:39:13 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
@llvm/pr-subscribers-clang
Author: Evgenii Kudriashov (e-kud)
<details>
<summary>Changes</summary>
According to AVX10.2 rev. 4:
> AMX-AVX512's explicit AVX10.2 sensitivity is removed and the instructions are removed in favor of inclusion in the ISE/SDM. Users of AMX-AVX512 ISA should follow enabling and checking rules for both AMX and IntelĀ® AVX-512/AVX10.
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965
We set `amx-avx512` as implying `amx-tile`, `avx512f` and `evex512` when `avx512fp16` and `avx512bf16` need to be specified separately.
---
Patch is 21.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148633.diff
10 Files Affected:
- (modified) clang/include/clang/Basic/BuiltinsX86_64.td (+16-4)
- (modified) clang/lib/Headers/amxavx512intrin.h (+21-11)
- (modified) clang/test/CodeGen/X86/amx_avx512_api.c (+6-2)
- (modified) clang/test/CodeGen/X86/amxavx512-builtins.c (+5-1)
- (modified) llvm/lib/Target/X86/X86.td (+2-1)
- (modified) llvm/lib/Target/X86/X86InstrAMX.td (+7-7)
- (modified) llvm/lib/TargetParser/X86TargetParser.cpp (+1-1)
- (modified) llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll (+9-9)
- (modified) llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll (+2-2)
``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsX86_64.td b/clang/include/clang/Basic/BuiltinsX86_64.td
index f2b35874e3876..fecaaed37a868 100644
--- a/clang/include/clang/Basic/BuiltinsX86_64.td
+++ b/clang/include/clang/Basic/BuiltinsX86_64.td
@@ -290,13 +290,19 @@ let Features = "amx-complex,amx-transpose", Attributes = [NoThrow] in {
def tconjtfp16_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, _Vector<256, int>)">;
}
-let Features = "amx-avx512,avx10.2-512", Attributes = [NoThrow] in {
+let Features = "amx-avx512", Attributes = [NoThrow] in {
def tcvtrowd2ps_internal : X86Builtin<"_Vector<16, float>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
+ def tilemovrow_internal : X86Builtin<"_Vector<16, int>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
+}
+
+let Features = "amx-avx512,avx512bf16", Attributes = [NoThrow] in {
def tcvtrowps2bf16h_internal : X86Builtin<"_Vector<32, __bf16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
def tcvtrowps2bf16l_internal : X86Builtin<"_Vector<32, __bf16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
+}
+
+let Features = "amx-avx512,avx512fp16", Attributes = [NoThrow] in {
def tcvtrowps2phh_internal : X86Builtin<"_Vector<32, _Float16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
def tcvtrowps2phl_internal : X86Builtin<"_Vector<32, _Float16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
- def tilemovrow_internal : X86Builtin<"_Vector<16, int>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
}
let Features = "amx-tf32", Attributes = [NoThrow] in {
@@ -382,13 +388,19 @@ let Features = "amx-complex,amx-transpose", Attributes = [NoThrow] in {
def tconjtfp16 : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char)">;
}
-let Features = "amx-avx512,avx10.2-512", Attributes = [NoThrow] in {
+let Features = "amx-avx512", Attributes = [NoThrow] in {
def tcvtrowd2ps : X86Builtin<"_Vector<16, float>(_Constant unsigned char, unsigned int)">;
+ def tilemovrow : X86Builtin<"_Vector<16, int>(_Constant unsigned char, unsigned int)">;
+}
+
+let Features = "amx-avx512,avx512bf16", Attributes = [NoThrow] in {
def tcvtrowps2bf16h : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned char, unsigned int)">;
def tcvtrowps2bf16l : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned char, unsigned int)">;
+}
+
+let Features = "amx-avx512,avx512fp16", Attributes = [NoThrow] in {
def tcvtrowps2phh : X86Builtin<"_Vector<32, _Float16>(_Constant unsigned char, unsigned int)">;
def tcvtrowps2phl : X86Builtin<"_Vector<32, _Float16>(_Constant unsigned char, unsigned int)">;
- def tilemovrow : X86Builtin<"_Vector<16, int>(_Constant unsigned char, unsigned int)">;
}
let Features = "amx-fp16", Attributes = [NoThrow] in {
diff --git a/clang/lib/Headers/amxavx512intrin.h b/clang/lib/Headers/amxavx512intrin.h
index bbde44fc265b3..e6c58e5c138a1 100644
--- a/clang/lib/Headers/amxavx512intrin.h
+++ b/clang/lib/Headers/amxavx512intrin.h
@@ -16,7 +16,15 @@
#define __DEFAULT_FN_ATTRS_AVX512 \
__attribute__((__always_inline__, __nodebug__, \
- __target__("amx-avx512,avx10.2-512")))
+ __target__("amx-avx512")))
+
+#define __DEFAULT_FN_ATTRS_AVX512BF16 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("amx-avx512,avx512bf16")))
+
+#define __DEFAULT_FN_ATTRS_AVX512FP16 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("amx-avx512,avx512fp16")))
/// Moves a row from a tile register to a zmm destination register, converting
/// the int32 source elements to fp32. The row of the tile is selected by a
@@ -237,25 +245,27 @@ static __inline__ __m512 __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowd2ps_internal(
return __builtin_ia32_tcvtrowd2ps_internal(m, n, src, u);
}
-static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
+static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512BF16
_tile_cvtrowps2bf16h_internal(unsigned short m, unsigned short n,
_tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2bf16h_internal(m, n, src, u);
}
-static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
+static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512BF16
_tile_cvtrowps2bf16l_internal(unsigned short m, unsigned short n,
_tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2bf16l_internal(m, n, src, u);
}
-static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phh_internal(
- unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
+static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512FP16
+_tile_cvtrowps2phh_internal(unsigned short m, unsigned short n,
+ _tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2phh_internal(m, n, src, u);
}
-static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phl_internal(
- unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
+static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512FP16
+_tile_cvtrowps2phl_internal(unsigned short m, unsigned short n,
+ _tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2phl_internal(m, n, src, u);
}
@@ -298,7 +308,7 @@ static __m512 __tile_cvtrowd2ps(__tile1024i src0, unsigned src1) {
/// The 2nd source r32. Size is 4 Bytes.
/// \returns
/// The destination v32bf16 data. Size is 64 Bytes.
-__DEFAULT_FN_ATTRS_AVX512
+__DEFAULT_FN_ATTRS_AVX512BF16
static __m512bh __tile_cvtrowps2bf16h(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2bf16h_internal(src0.row, src0.col, src0.tile, src1);
}
@@ -317,7 +327,7 @@ static __m512bh __tile_cvtrowps2bf16h(__tile1024i src0, unsigned src1) {
/// The 2nd source r32. Size is 4 Bytes.
/// \returns
/// The destination v32bf16 data. Size is 64 Bytes.
-__DEFAULT_FN_ATTRS_AVX512
+__DEFAULT_FN_ATTRS_AVX512BF16
static __m512bh __tile_cvtrowps2bf16l(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2bf16l_internal(src0.row, src0.col, src0.tile, src1);
}
@@ -336,7 +346,7 @@ static __m512bh __tile_cvtrowps2bf16l(__tile1024i src0, unsigned src1) {
/// The 2nd source r32. Size is 4 Bytes.
/// \returns
/// The destination v32fp16 data. Size is 64 Bytes.
-__DEFAULT_FN_ATTRS_AVX512
+__DEFAULT_FN_ATTRS_AVX512FP16
static __m512h __tile_cvtrowps2phh(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2phh_internal(src0.row, src0.col, src0.tile, src1);
}
@@ -355,7 +365,7 @@ static __m512h __tile_cvtrowps2phh(__tile1024i src0, unsigned src1) {
/// The 2nd source r32. Size is 4 Bytes.
/// \returns
/// The destination v32fp16 data. Size is 64 Bytes.
-__DEFAULT_FN_ATTRS_AVX512
+__DEFAULT_FN_ATTRS_AVX512FP16
static __m512h __tile_cvtrowps2phl(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2phl_internal(src0.row, src0.col, src0.tile, src1);
}
diff --git a/clang/test/CodeGen/X86/amx_avx512_api.c b/clang/test/CodeGen/X86/amx_avx512_api.c
index fac41ea6c214f..54bf72a8f389b 100644
--- a/clang/test/CodeGen/X86/amx_avx512_api.c
+++ b/clang/test/CodeGen/X86/amx_avx512_api.c
@@ -1,6 +1,6 @@
// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown \
-// RUN: -target-feature +amx-avx512 -target-feature +avx10.2-512 \
-// RUN: -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK
+// RUN: -target-feature +amx-avx512 -emit-llvm -o - -Werror -pedantic | \
+// RUN: FileCheck %s --check-prefixes=CHECK
#include <immintrin.h>
@@ -16,6 +16,7 @@ __m512 test_tile_cvtrowd2ps(__tile1024i a, unsigned b) {
return __tile_cvtrowd2ps(a, b);
}
+__attribute__((__target__("avx512bf16")))
__m512bh test_tile_cvtrowps2bf16h(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2bf16h
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
@@ -23,6 +24,7 @@ __m512bh test_tile_cvtrowps2bf16h(__tile1024i a, unsigned b) {
return __tile_cvtrowps2bf16h(a, b);
}
+__attribute__((__target__("avx512bf16")))
__m512bh test_tile_cvtrowps2bf16l(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2bf16l
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
@@ -30,6 +32,7 @@ __m512bh test_tile_cvtrowps2bf16l(__tile1024i a, unsigned b) {
return __tile_cvtrowps2bf16l(a, b);
}
+__attribute__((__target__("avx512fp16")))
__m512h test_tile_cvtrowps2phh(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2phh
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
@@ -37,6 +40,7 @@ __m512h test_tile_cvtrowps2phh(__tile1024i a, unsigned b) {
return __tile_cvtrowps2phh(a, b);
}
+__attribute__((__target__("avx512fp16")))
__m512h test_tile_cvtrowps2phl(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2phl
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
diff --git a/clang/test/CodeGen/X86/amxavx512-builtins.c b/clang/test/CodeGen/X86/amxavx512-builtins.c
index d60929994901a..f6b8cb421407e 100644
--- a/clang/test/CodeGen/X86/amxavx512-builtins.c
+++ b/clang/test/CodeGen/X86/amxavx512-builtins.c
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-tile -target-feature +amx-avx512 \
-// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression -flax-vector-conversions=none | FileCheck %s
+// RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression -flax-vector-conversions=none | FileCheck %s
#include <immintrin.h>
#include <stddef.h>
@@ -10,24 +10,28 @@ __m512 test_tile_cvtrowd2ps(unsigned int A) {
return _tile_cvtrowd2ps(1, A);
}
+__attribute__((__target__("avx512bf16")))
__m512bh test_tile_cvtrowps2bf16h(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2bf16h(
// CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2bf16h(i8 1, i32 %{{.*}})
return _tile_cvtrowps2bf16h(1, A);
}
+__attribute__((__target__("avx512bf16")))
__m512bh test_tile_cvtrowps2bf16l(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2bf16l(
// CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2bf16l(i8 1, i32 %{{.*}})
return _tile_cvtrowps2bf16l(1, A);
}
+__attribute__((__target__("avx512fp16")))
__m512h test_tile_cvtrowps2phh(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2phh(
// CHECK: call <32 x half> @llvm.x86.tcvtrowps2phh(i8 1, i32 %{{.*}})
return _tile_cvtrowps2phh(1, A);
}
+__attribute__((__target__("avx512fp16")))
__m512h test_tile_cvtrowps2phl(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2phl(
// CHECK: call <32 x half> @llvm.x86.tcvtrowps2phl(i8 1, i32 %{{.*}})
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 990b381341f07..83b633b73cd5d 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -277,7 +277,8 @@ def FeatureAMXTRANSPOSE : SubtargetFeature<"amx-transpose", "HasAMXTRANSPOSE", "
def FeatureAMXAVX512 : SubtargetFeature<"amx-avx512",
"HasAMXAVX512", "true",
"Support AMX-AVX512 instructions",
- [FeatureAMXTILE]>;
+ [FeatureAMXTILE, FeatureAVX512,
+ FeatureEVEX512]>;
def FeatureAMXTF32 : SubtargetFeature<"amx-tf32", "HasAMXTF32", "true",
"Support AMX-TF32 instructions",
[FeatureAMXTILE]>;
diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td
index 1beaaafb159e3..5d93e6a4089ca 100644
--- a/llvm/lib/Target/X86/X86InstrAMX.td
+++ b/llvm/lib/Target/X86/X86InstrAMX.td
@@ -550,7 +550,7 @@ let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in {
} // HasAMXMOVRS, In64BitMode
multiclass m_tcvtrowd2ps {
- let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
+ let Predicates = [HasAMXAVX512, In64BitMode] in {
let SchedRW = [WriteSystem] in {
def rri : Ii8<0x7, MRMSrcReg, (outs VR512:$dst),
(ins TILE:$src1, i32u8imm:$src2),
@@ -561,12 +561,12 @@ multiclass m_tcvtrowd2ps {
"tcvtrowd2ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, T8,XS, EVEX, VVVV, EVEX_V512;
}
- } // HasAMXAVX512, HasAVX10_2_512, In64BitMode
+ } // HasAMXAVX512, In64BitMode
}
defm TCVTROWD2PS : m_tcvtrowd2ps;
-let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
+let Predicates = [HasAMXAVX512, In64BitMode] in {
let SchedRW = [WriteSystem] in {
let usesCustomInserter = 1 in {
def PTCVTROWD2PSrri : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2),
@@ -630,7 +630,7 @@ let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
multiclass AMXAVX512_BASE<bits<8> Opcode1, bits<8> Opcode2, string Opstr,
Prefix P1, Prefix P2> {
- let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode], SchedRW = [WriteSystem] in {
+ let Predicates = [HasAMXAVX512, In64BitMode], SchedRW = [WriteSystem] in {
let OpPrefix = P1 in
def rre : I<Opcode1, MRMSrcReg4VOp3, (outs VR512:$dst),
(ins TILE:$src1, GR32:$src2),
@@ -658,7 +658,7 @@ defm TCVTROWPS2BF16H : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2bf16h", XD, XD>;
defm TCVTROWPS2BF16L : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2bf16l", XS, XS>;
multiclass m_tilemovrow {
- let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
+ let Predicates = [HasAMXAVX512, In64BitMode] in {
let SchedRW = [WriteSystem] in {
def rri : Ii8<0x7, MRMSrcReg, (outs VR512:$dst),
(ins TILE:$src1, u8imm:$src2),
@@ -669,12 +669,12 @@ multiclass m_tilemovrow {
"tilemovrow\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, T8,PD, EVEX, VVVV, EVEX_V512;
}
- } // HasAMXAVX512, HasAVX10_2_512, In64BitMode
+ } // HasAMXAVX512, In64BitMode
}
defm TILEMOVROW : m_tilemovrow;
-let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
+let Predicates = [HasAMXAVX512, In64BitMode] in {
let SchedRW = [WriteSystem] in {
let usesCustomInserter = 1 in {
def PTILEMOVROWrri : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2),
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index 57fbc71fa22ee..f0349fec8af4e 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -616,7 +616,7 @@ constexpr FeatureBitset ImpliedFeaturesAMX_FP8 = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesAMX_TRANSPOSE = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesAMX_MOVRS = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesAMX_AVX512 =
- FeatureAMX_TILE | FeatureAVX10_2_512;
+ FeatureAMX_TILE | FeatureAVX512F | FeatureEVEX512;
constexpr FeatureBitset ImpliedFeaturesAMX_TF32 = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesHRESET = {};
diff --git a/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll b/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll
index 71f8f231747fe..9e73d8c494443 100644
--- a/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll
+++ b/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2-512 -mattr=+amx-avx512 -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2-512 -mattr=+amx-avx512 -verify-machineinstrs -enable-ipra | FileCheck -check-prefix=IPRA %s
-; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2-512 -mattr=+amx-avx512 -verify-machineinstrs | FileCheck -check-prefix=O0 %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8,+amx-avx512 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8,+amx-avx512 -verify-machineinstrs -enable-ipra | FileCheck -check-prefix=IPRA %s
+; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-int8,+amx-avx512 -verify-machineinstrs | FileCheck -check-prefix=O0 %s
@buf = dso_local global [3072 x i8] zeroinitializer, align 64
@@ -95,7 +95,7 @@ define dso_local <16 x i32> @test_api(i16 signext %0, i16 signext %1) nounwind {
; O0-NEXT: movq %rsp, %rbp
; O0-NEXT: andq $-1024, %rsp # imm = 0xFC00
; O0-NEXT: subq $4096, %rsp # imm = 0x1000
-; O0-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; O0-NEXT: vxorps %xmm0, %xmm0, %xmm0
; O0-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp)
; O0-NEXT: movb $1, {{[0-9]+}}(%rsp)
; O0-NEXT: movw %si, %cx
diff --git a/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll b/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll
index 8f82bd2587ec3..94db7609dfd5d 100644
--- a/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+amx-tile,+amx-avx512,+avx10.2-512 | FileCheck %s
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+amx-tile,+amx-avx512 | FileCheck %s
define <16 x float> @test_tcvtrowd2ps(i32 %A) {
; CHECK-LABEL: test_tcvtrowd2ps:
@@ -20,7 +20,7 @@ define <16 x float> @test_tcvtrowd2psi() {
}
declare <16 x float> @llvm.x86.tcvtrowd2ps(i8 %A, i32 %B)
-define <32 x bfloat> @test_tcvtrowps2bf16h(i32 %A) {
+define <32 x bfloat> @test_tcvtrowps2bf16h(i32 %A) "target-features"="+avx512bf16" {
; CHECK-LABEL: test_tcvtrowps2bf16h:
; CHECK: # %bb.0:
; CHECK-NEXT: tcvtrowps2bf16h %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x47,0x48,0x6d,0xc1]
@@ -29,7 +29,7 @@ define <32 x bfloat> @test_tcvtrowps2bf16h(i32 %A) {
ret <32 x bfloat> %ret
}
-define <32 x bfloat> @test_tcvtrowps2bf16hi() {
+define <32 x bfloat> @test_tcvtrowps2bf16hi() "target-features"="+avx512bf16" {
; CHECK-LABEL: test_tcvtrowps2bf16hi:
; CHECK: # %bb.0:
; CHECK-NEXT: tcvtrowps2bf16h $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x07,0xc1,0x7f]
@@ -39,7 +39,7 @@ define <32 x bfloat> @test_tcvtrowps2bf16hi() {
}
declare <32 x bfloat> @llvm.x86.tcvtrowps2bf16h(i8 %A, i32 %B)
-define <32 x bfloat> @test_tcvtrowps2bf16l(i32 %A) {
+define <32 x bfloat> @test_tcvtrowps2bf16l(i32 %A) "target-features"="+avx512bf16" {
; CHECK-LABEL: test_tcvtrowps2bf16l:
; CHECK: # %bb.0:
; CHECK-NEXT: tcvtrowps2bf16l %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x46,0x48,0x6d,0xc1]
@@ -48,7 +48,7 @@ define <32 x bfloat> @test_tcvtrowps2bf16l(i32 %A) {
ret <32 x bfloat> %ret
}
-define <32 x bfloat> @test_tcvtrowps2bf16li() {
+define <32 x bfloat> @test_tcvtrowps2bf16li() "target-features"="+avx512bf16" {
; CHECK-LABEL: test_tcvtrowps2bf16li:
; CHECK: # %bb.0:
; CHECK-NEXT: tcvtrowps2bf16l $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7e,0x48,0x77,0xc1,0x7f]
@@ -58,7 +58,7 @@ define <32 x bfloat> @test_tcvtrowps2bf16li() {
}
declare <32 x bfloat> @llvm.x86.tcvtrowps2bf16l(i8 %A, i32 %B)
-define <32 x half> @test_tcvtrowps2phh(i32 %A) {
+define <32 x half> @test_tcvtrowps2phh(i32 %A) "target-features"="+avx512fp16" {
; CHECK-LABEL: test_tcvtrowps2phh:
; CHECK: # %bb.0:
; CHECK-NEXT: tcvtrowps2phh %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x44,0x48,0x6d,0xc1]
@@ -67,7 +67,7 @@ define <32 x half> @test_tcvtrowps2phh(i32 %A) {
ret <32 x half> %ret
}
-define <32 x half> @test_tcvtrowps2phhi() {
+define <32 x half> @test_tcvtrowps2phhi() "target-features"="+avx512fp16" {
; CHECK-LABEL: test_tcvtrowps2phhi:
; CHECK: # %bb.0:
; CHECK-NEXT: tcvtrowps2phh $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7c,0x4...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/148633
More information about the llvm-commits
mailing list