[clang] aee2a35 - [X86] Add AVX-NE-CONVERT instructions.

Freddy Ye via cfe-commits cfe-commits at lists.llvm.org
Mon Oct 31 08:43:30 PDT 2022


Author: Freddy Ye
Date: 2022-10-31T23:39:38+08:00
New Revision: aee2a35ac4ab4fe62bb0ce4e314966ab9207efd1

URL: https://github.com/llvm/llvm-project/commit/aee2a35ac4ab4fe62bb0ce4e314966ab9207efd1
DIFF: https://github.com/llvm/llvm-project/commit/aee2a35ac4ab4fe62bb0ce4e314966ab9207efd1.diff

LOG: [X86] Add AVX-NE-CONVERT instructions.

For more details about these instructions, please refer to the latest ISE document: https://www.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html

Reviewed By: pengfei

Differential Revision: https://reviews.llvm.org/D135930

Added: 
    clang/lib/Headers/avxneconvertintrin.h
    clang/test/CodeGen/X86/avxneconvert-builtins.c
    llvm/test/CodeGen/X86/avxneconvert-intrinsics-shared.ll
    llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll
    llvm/test/MC/Disassembler/X86/avx_ne_convert-32.txt
    llvm/test/MC/Disassembler/X86/avx_ne_convert-64.txt
    llvm/test/MC/X86/avx_ne_convert-32-att.s
    llvm/test/MC/X86/avx_ne_convert-32-intel.s
    llvm/test/MC/X86/avx_ne_convert-64-att.s
    llvm/test/MC/X86/avx_ne_convert-64-intel.s

Modified: 
    clang/docs/ReleaseNotes.rst
    clang/include/clang/Basic/BuiltinsX86.def
    clang/include/clang/Driver/Options.td
    clang/lib/Basic/Targets/X86.cpp
    clang/lib/Basic/Targets/X86.h
    clang/lib/Headers/CMakeLists.txt
    clang/lib/Headers/avx512vlbf16intrin.h
    clang/lib/Headers/cpuid.h
    clang/lib/Headers/immintrin.h
    clang/test/CodeGen/X86/avx512vlbf16-builtins.c
    clang/test/CodeGen/attr-target-x86.c
    clang/test/Driver/x86-target-features.c
    clang/test/Preprocessor/x86_target_features.c
    llvm/docs/ReleaseNotes.rst
    llvm/include/llvm/IR/IntrinsicsX86.td
    llvm/include/llvm/Support/X86TargetParser.def
    llvm/lib/Support/Host.cpp
    llvm/lib/Support/X86TargetParser.cpp
    llvm/lib/Target/X86/X86.td
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/lib/Target/X86/X86InstrAVX512.td
    llvm/lib/Target/X86/X86InstrInfo.td
    llvm/lib/Target/X86/X86InstrSSE.td

Removed: 
    


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 641f75eeeb64c..e00ea45b986dd 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -662,6 +662,14 @@ X86 Support in Clang
   * Support intrinsic of ``_mm(256)_dpbssd(s)_epi32``.
   * Support intrinsic of ``_mm(256)_dpbsud(s)_epi32``.
   * Support intrinsic of ``_mm(256)_dpbuud(s)_epi32``.
+- Support ISA of ``AVX-NE-CONVERT``.
+  * Support intrinsic of ``_mm(256)_bcstnebf16_ps``.
+  * Support intrinsic of ``_mm(256)_bcstnesh_ps``.
+  * Support intrinsic of ``_mm(256)_cvtneebf16_ps``.
+  * Support intrinsic of ``_mm(256)_cvtneeph_ps``.
+  * Support intrinsic of ``_mm(256)_cvtneobf16_ps``.
+  * Support intrinsic of ``_mm(256)_cvtneoph_ps``.
+  * Support intrinsic of ``_mm(256)_cvtneps_avx_pbh``.
 
 WebAssembly Support in Clang
 ----------------------------

diff  --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 3fc77c95b7664..fff7d58fb0d3c 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2116,6 +2116,22 @@ TARGET_HEADER_BUILTIN(__readgsword,  "UsUNi", "nh", "intrin.h", ALL_MS_LANGUAGES
 TARGET_HEADER_BUILTIN(__readgsdword, "UNiUNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(__readgsqword, "ULLiUNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 
+// AVX-NE-CONVERT
+TARGET_BUILTIN(__builtin_ia32_vbcstnebf162ps128, "V4fyC*", "nV:128:", "avxneconvert")
+TARGET_BUILTIN(__builtin_ia32_vbcstnebf162ps256, "V8fyC*", "nV:256:", "avxneconvert")
+TARGET_BUILTIN(__builtin_ia32_vbcstnesh2ps128, "V4fxC*", "nV:128:", "avxneconvert")
+TARGET_BUILTIN(__builtin_ia32_vbcstnesh2ps256, "V8fxC*", "nV:256:", "avxneconvert")
+TARGET_BUILTIN(__builtin_ia32_vcvtneebf162ps128, "V4fV8yC*", "nV:128:", "avxneconvert")
+TARGET_BUILTIN(__builtin_ia32_vcvtneebf162ps256, "V8fV16yC*", "nV:256:", "avxneconvert")
+TARGET_BUILTIN(__builtin_ia32_vcvtneeph2ps128, "V4fV8xC*", "nV:128:", "avxneconvert")
+TARGET_BUILTIN(__builtin_ia32_vcvtneeph2ps256, "V8fV16xC*", "nV:256:", "avxneconvert")
+TARGET_BUILTIN(__builtin_ia32_vcvtneobf162ps128, "V4fV8yC*", "nV:128:", "avxneconvert")
+TARGET_BUILTIN(__builtin_ia32_vcvtneobf162ps256, "V8fV16yC*", "nV:256:", "avxneconvert")
+TARGET_BUILTIN(__builtin_ia32_vcvtneoph2ps128, "V4fV8xC*", "nV:128:", "avxneconvert")
+TARGET_BUILTIN(__builtin_ia32_vcvtneoph2ps256, "V8fV16xC*", "nV:256:", "avxneconvert")
+TARGET_BUILTIN(__builtin_ia32_vcvtneps2bf16128, "V8yV4f", "nV:128:", "avx512bf16,avx512vl|avxneconvert")
+TARGET_BUILTIN(__builtin_ia32_vcvtneps2bf16256, "V8yV8f", "nV:256:", "avx512bf16,avx512vl|avxneconvert")
+
 TARGET_HEADER_BUILTIN(_InterlockedAnd64,         "WiWiD*Wi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(_InterlockedDecrement64,   "WiWiD*",   "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(_InterlockedExchange64,    "WiWiD*Wi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")

diff  --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index aa199e4608741..679c565126fca 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4592,6 +4592,8 @@ def mavx512vp2intersect : Flag<["-"], "mavx512vp2intersect">, Group<m_x86_Featur
 def mno_avx512vp2intersect : Flag<["-"], "mno-avx512vp2intersect">, Group<m_x86_Features_Group>;
 def mavxifma : Flag<["-"], "mavxifma">, Group<m_x86_Features_Group>;
 def mno_avxifma : Flag<["-"], "mno-avxifma">, Group<m_x86_Features_Group>;
+def mavxneconvert : Flag<["-"], "mavxneconvert">, Group<m_x86_Features_Group>;
+def mno_avxneconvert : Flag<["-"], "mno-avxneconvert">, Group<m_x86_Features_Group>;
 def mavxvnniint8 : Flag<["-"], "mavxvnniint8">, Group<m_x86_Features_Group>;
 def mno_avxvnniint8 : Flag<["-"], "mno-avxvnniint8">, Group<m_x86_Features_Group>;
 def mavxvnni : Flag<["-"], "mavxvnni">, Group<m_x86_Features_Group>;

diff  --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 442503a13025c..2d3f3d10c5716 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -340,6 +340,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasRAOINT = true;
     } else if (Feature == "+avxifma") {
       HasAVXIFMA = true;
+    } else if (Feature == "+avxneconvert") {
+      HasAVXNECONVERT= true;
     } else if (Feature == "+avxvnni") {
       HasAVXVNNI = true;
     } else if (Feature == "+avxvnniint8") {
@@ -796,6 +798,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__RAOINT__");
   if (HasAVXIFMA)
     Builder.defineMacro("__AVXIFMA__");
+  if (HasAVXNECONVERT)
+    Builder.defineMacro("__AVXNECONVERT__");
   if (HasAVXVNNI)
     Builder.defineMacro("__AVXVNNI__");
   if (HasAVXVNNIINT8)
@@ -923,6 +927,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
       .Case("avx512ifma", true)
       .Case("avx512vp2intersect", true)
       .Case("avxifma", true)
+      .Case("avxneconvert", true)
       .Case("avxvnni", true)
       .Case("avxvnniint8", true)
       .Case("bmi", true)
@@ -1023,7 +1028,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("avx512ifma", HasAVX512IFMA)
       .Case("avx512vp2intersect", HasAVX512VP2INTERSECT)
       .Case("avxifma", HasAVXIFMA)
-      .Case("avxvnni", HasAVXVNNI)
+      .Case("avxneconvert", HasAVXNECONVERT)
       .Case("avxvnni", HasAVXVNNI)
       .Case("avxvnniint8", HasAVXVNNIINT8)
       .Case("bmi", HasBMI)

diff  --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 825087838941f..71ab946018584 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -142,6 +142,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
   bool HasCMPCCXADD = false;
   bool HasRAOINT = false;
   bool HasAVXVNNIINT8 = false;
+  bool HasAVXNECONVERT = false;
   bool HasKL = false;      // For key locker
   bool HasWIDEKL = false; // For wide key locker
   bool HasHRESET = false;

diff  --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index fdf3024045779..f69bf14891440 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -144,6 +144,7 @@ set(x86_files
   avx512vpopcntdqvlintrin.h
   avxifmaintrin.h
   avxintrin.h
+  avxneconvertintrin.h
   avxvnniint8intrin.h
   avxvnniintrin.h
   bmi2intrin.h

diff  --git a/clang/lib/Headers/avx512vlbf16intrin.h b/clang/lib/Headers/avx512vlbf16intrin.h
index 9edd5c5542ab6..f5b8911fac2ae 100644
--- a/clang/lib/Headers/avx512vlbf16intrin.h
+++ b/clang/lib/Headers/avx512vlbf16intrin.h
@@ -160,12 +160,8 @@ _mm256_maskz_cvtne2ps_pbh(__mmask16 __U, __m256 __A, __m256 __B) {
 ///    A 128-bit vector of [4 x float].
 /// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from
 ///    conversion of __A, and higher 64 bits are 0.
-static __inline__ __m128bh __DEFAULT_FN_ATTRS128
-_mm_cvtneps_pbh(__m128 __A) {
-  return (__m128bh)__builtin_ia32_cvtneps2bf16_128_mask((__v4sf) __A,
-                                                  (__v8bf)_mm_undefined_si128(),
-                                                  (__mmask8)-1);
-}
+#define _mm_cvtneps_pbh(A)                                                     \
+  ((__m128bh)__builtin_ia32_vcvtneps2bf16128((__v4sf)(A)))
 
 /// Convert Packed Single Data to Packed BF16 Data.
 ///
@@ -218,12 +214,8 @@ _mm_maskz_cvtneps_pbh(__mmask8 __U, __m128 __A) {
 /// \param __A
 ///    A 256-bit vector of [8 x float].
 /// \returns A 128-bit vector of [8 x bfloat] comes from conversion of __A.
-static __inline__ __m128bh __DEFAULT_FN_ATTRS256
-_mm256_cvtneps_pbh(__m256 __A) {
-  return (__m128bh)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A,
-                                                  (__v8bf)_mm_undefined_si128(),
-                                                  (__mmask8)-1);
-}
+#define _mm256_cvtneps_pbh(A)                                                  \
+  ((__m128bh)__builtin_ia32_vcvtneps2bf16256((__v8sf)(A)))
 
 /// Convert Packed Single Data to Packed BF16 Data.
 ///

diff  --git a/clang/lib/Headers/avxneconvertintrin.h b/clang/lib/Headers/avxneconvertintrin.h
new file mode 100644
index 0000000000000..1bef1c8937879
--- /dev/null
+++ b/clang/lib/Headers/avxneconvertintrin.h
@@ -0,0 +1,484 @@
+/*===-------------- avxneconvertintrin.h - AVXNECONVERT --------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <avxneconvertintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifdef __SSE2__
+
+#ifndef __AVXNECONVERTINTRIN_H
+#define __AVXNECONVERTINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avxneconvert"),   \
+                 __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avxneconvert"),   \
+                 __min_vector_width__(256)))
+
+/// Convert scalar BF16 (16-bit) floating-point element
+/// stored at memory locations starting at location \a __A to a
+/// single-precision (32-bit) floating-point, broadcast it to packed
+/// single-precision (32-bit) floating-point elements, and store the results in
+/// \a dst.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// _mm_bcstnebf16_ps(const void *__A);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VBCSTNEBF162PS instruction.
+///
+/// \param __A
+///    A pointer to a 16-bit memory location. The address of the memory
+///    location does not have to be aligned.
+/// \returns
+///    A 128-bit vector of [4 x float].
+///
+/// \code{.operation}
+/// b := Convert_BF16_To_FP32(MEM[__A+15:__A])
+/// FOR j := 0 to 3
+///   m := j*32
+///   dst[m+31:m] := b
+/// ENDFOR
+/// dst[MAX:128] := 0
+/// \endcode
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
+_mm_bcstnebf16_ps(const void *__A) {
+  return (__m128)__builtin_ia32_vbcstnebf162ps128((const __bf16 *)__A);
+}
+
+/// Convert scalar BF16 (16-bit) floating-point element
+/// stored at memory locations starting at location \a __A to a
+/// single-precision (32-bit) floating-point, broadcast it to packed
+/// single-precision (32-bit) floating-point elements, and store the results in
+/// \a dst.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// _mm256_bcstnebf16_ps(const void *__A);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VBCSTNEBF162PS instruction.
+///
+/// \param __A
+///    A pointer to a 16-bit memory location. The address of the memory
+///    location does not have to be aligned.
+/// \returns
+///    A 256-bit vector of [8 x float].
+///
+/// \code{.operation}
+/// b := Convert_BF16_To_FP32(MEM[__A+15:__A])
+/// FOR j := 0 to 7
+///   m := j*32
+///   dst[m+31:m] := b
+/// ENDFOR
+/// dst[MAX:256] := 0
+/// \endcode
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
+_mm256_bcstnebf16_ps(const void *__A) {
+  return (__m256)__builtin_ia32_vbcstnebf162ps256((const __bf16 *)__A);
+}
+
+/// Convert scalar half-precision (16-bit) floating-point element
+/// stored at memory locations starting at location \a __A to a
+/// single-precision (32-bit) floating-point, broadcast it to packed
+/// single-precision (32-bit) floating-point elements, and store the results in
+/// \a dst.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// _mm_bcstnesh_ps(const void *__A);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VBCSTNESH2PS instruction.
+///
+/// \param __A
+///    A pointer to a 16-bit memory location. The address of the memory
+///    location does not have to be aligned.
+/// \returns
+///    A 128-bit vector of [4 x float].
+///
+/// \code{.operation}
+/// b := Convert_FP16_To_FP32(MEM[__A+15:__A])
+/// FOR j := 0 to 3
+///   m := j*32
+///   dst[m+31:m] := b
+/// ENDFOR
+/// dst[MAX:128] := 0
+/// \endcode
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
+_mm_bcstnesh_ps(const void *__A) {
+  return (__m128)__builtin_ia32_vbcstnesh2ps128((const _Float16 *)__A);
+}
+
+/// Convert scalar half-precision (16-bit) floating-point element
+/// stored at memory locations starting at location \a __A to a
+/// single-precision (32-bit) floating-point, broadcast it to packed
+/// single-precision (32-bit) floating-point elements, and store the results in
+/// \a dst.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// _mm256_bcstnesh_ps(const void *__A);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VBCSTNESH2PS instruction.
+///
+/// \param __A
+///    A pointer to a 16-bit memory location. The address of the memory
+///    location does not have to be aligned.
+/// \returns
+///    A 256-bit vector of [8 x float].
+///
+/// \code{.operation}
+/// b := Convert_FP16_To_FP32(MEM[__A+15:__A])
+/// FOR j := 0 to 7
+///   m := j*32
+///   dst[m+31:m] := b
+/// ENDFOR
+/// dst[MAX:256] := 0
+/// \endcode
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
+_mm256_bcstnesh_ps(const void *__A) {
+  return (__m256)__builtin_ia32_vbcstnesh2ps256((const _Float16 *)__A);
+}
+
+/// Convert packed BF16 (16-bit) floating-point even-indexed elements
+/// stored at memory locations starting at location \a __A to packed
+/// single-precision (32-bit) floating-point elements, and store the results in
+/// \a dst.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// _mm_cvtneebf16_ps(const __m128bh *__A);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VCVTNEEBF162PS instruction.
+///
+/// \param __A
+///    A pointer to a 128-bit memory location containing 8 consecutive
+///    BF16 (16-bit) floating-point values.
+/// \returns
+///    A 128-bit vector of [4 x float].
+///
+/// \code{.operation}
+/// FOR j := 0 to 3
+/// 	k := j*2
+/// 	i := k*16
+/// 	m := j*32
+/// 	dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+i+15:__A+i])
+/// ENDFOR
+/// dst[MAX:128] := 0
+/// \endcode
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
+_mm_cvtneebf16_ps(const __m128bh *__A) {
+  return (__m128)__builtin_ia32_vcvtneebf162ps128((const __v8bf *)__A);
+}
+
+/// Convert packed BF16 (16-bit) floating-point even-indexed elements
+/// stored at memory locations starting at location \a __A to packed
+/// single-precision (32-bit) floating-point elements, and store the results in
+/// \a dst.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// _mm256_cvtneebf16_ps(const __m256bh *__A);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VCVTNEEBF162PS instruction.
+///
+/// \param __A
+///    A pointer to a 256-bit memory location containing 16 consecutive
+///    BF16 (16-bit) floating-point values.
+/// \returns
+///    A 256-bit vector of [8 x float].
+///
+/// \code{.operation}
+/// FOR j := 0 to 7
+/// 	k := j*2
+/// 	i := k*16
+/// 	m := j*32
+/// 	dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+i+15:__A+i])
+/// ENDFOR
+/// dst[MAX:256] := 0
+/// \endcode
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
+_mm256_cvtneebf16_ps(const __m256bh *__A) {
+  return (__m256)__builtin_ia32_vcvtneebf162ps256((const __v16bf *)__A);
+}
+
+/// Convert packed half-precision (16-bit) floating-point even-indexed elements
+/// stored at memory locations starting at location \a __A to packed
+/// single-precision (32-bit) floating-point elements, and store the results in
+/// \a dst.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// _mm_cvtneeph_ps(const __m128h *__A);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VCVTNEEPH2PS instruction.
+///
+/// \param __A
+///    A pointer to a 128-bit memory location containing 8 consecutive
+///    half-precision (16-bit) floating-point values.
+/// \returns
+///    A 128-bit vector of [4 x float].
+///
+/// \code{.operation}
+/// FOR j := 0 to 3
+/// 	k := j*2
+/// 	i := k*16
+/// 	m := j*32
+/// 	dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+i+15:__A+i])
+/// ENDFOR
+/// dst[MAX:128] := 0
+/// \endcode
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
+_mm_cvtneeph_ps(const __m128h *__A) {
+  return (__m128)__builtin_ia32_vcvtneeph2ps128((const __v8hf *)__A);
+}
+
+/// Convert packed half-precision (16-bit) floating-point even-indexed elements
+/// stored at memory locations starting at location \a __A to packed
+/// single-precision (32-bit) floating-point elements, and store the results in
+/// \a dst.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// _mm256_cvtneeph_ps(const __m256h *__A);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VCVTNEEPH2PS instruction.
+///
+/// \param __A
+///    A pointer to a 256-bit memory location containing 16 consecutive
+///    half-precision (16-bit) floating-point values.
+/// \returns
+///    A 256-bit vector of [8 x float].
+///
+/// \code{.operation}
+/// FOR j := 0 to 7
+/// 	k := j*2
+/// 	i := k*16
+/// 	m := j*32
+/// 	dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+i+15:__A+i])
+/// ENDFOR
+/// dst[MAX:256] := 0
+/// \endcode
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
+_mm256_cvtneeph_ps(const __m256h *__A) {
+  return (__m256)__builtin_ia32_vcvtneeph2ps256((const __v16hf *)__A);
+}
+
+/// Convert packed BF16 (16-bit) floating-point odd-indexed elements
+/// stored at memory locations starting at location \a __A to packed
+/// single-precision (32-bit) floating-point elements, and store the results in
+/// \a dst.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// _mm_cvtneobf16_ps(const __m128bh *__A);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VCVTNEOBF162PS instruction.
+///
+/// \param __A
+///    A pointer to a 128-bit memory location containing 8 consecutive
+///    BF16 (16-bit) floating-point values.
+/// \returns
+///    A 128-bit vector of [4 x float].
+///
+/// \code{.operation}
+/// FOR j := 0 to 3
+/// 	k := j*2+1
+/// 	i := k*16
+/// 	m := j*32
+/// 	dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+i+15:__A+i])
+/// ENDFOR
+/// dst[MAX:128] := 0
+/// \endcode
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
+_mm_cvtneobf16_ps(const __m128bh *__A) {
+  return (__m128)__builtin_ia32_vcvtneobf162ps128((const __v8bf *)__A);
+}
+
+/// Convert packed BF16 (16-bit) floating-point odd-indexed elements
+/// stored at memory locations starting at location \a __A to packed
+/// single-precision (32-bit) floating-point elements, and store the results in
+/// \a dst.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// _mm256_cvtneobf16_ps(const __m256bh *__A);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VCVTNEOBF162PS instruction.
+///
+/// \param __A
+///    A pointer to a 256-bit memory location containing 16 consecutive
+///    BF16 (16-bit) floating-point values.
+/// \returns
+///    A 256-bit vector of [8 x float].
+///
+/// \code{.operation}
+/// FOR j := 0 to 7
+/// 	k := j*2+1
+/// 	i := k*16
+/// 	m := j*32
+/// 	dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+i+15:__A+i])
+/// ENDFOR
+/// dst[MAX:256] := 0
+/// \endcode
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
+_mm256_cvtneobf16_ps(const __m256bh *__A) {
+  return (__m256)__builtin_ia32_vcvtneobf162ps256((const __v16bf *)__A);
+}
+
+/// Convert packed half-precision (16-bit) floating-point odd-indexed elements
+/// stored at memory locations starting at location \a __A to packed
+/// single-precision (32-bit) floating-point elements, and store the results in
+/// \a dst.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// _mm_cvtneoph_ps(const __m128h *__A);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VCVTNEOPH2PS instruction.
+///
+/// \param __A
+///    A pointer to a 128-bit memory location containing 8 consecutive
+///    half-precision (16-bit) floating-point values.
+/// \returns
+///    A 128-bit vector of [4 x float].
+///
+/// \code{.operation}
+/// FOR j := 0 to 3
+/// 	k := j*2+1
+/// 	i := k*16
+/// 	m := j*32
+/// 	dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+i+15:__A+i])
+/// ENDFOR
+/// dst[MAX:128] := 0
+/// \endcode
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
+_mm_cvtneoph_ps(const __m128h *__A) {
+  return (__m128)__builtin_ia32_vcvtneoph2ps128((const __v8hf *)__A);
+}
+
+/// Convert packed half-precision (16-bit) floating-point odd-indexed elements
+/// stored at memory locations starting at location \a __A to packed
+/// single-precision (32-bit) floating-point elements, and store the results in
+/// \a dst.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// _mm256_cvtneoph_ps(const __m256h *__A);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VCVTNEOPH2PS instruction.
+///
+/// \param __A
+///    A pointer to a 256-bit memory location containing 16 consecutive
+///    half-precision (16-bit) floating-point values.
+/// \returns
+///    A 256-bit vector of [8 x float].
+///
+/// \code{.operation}
+/// FOR j := 0 to 7
+/// 	k := j*2+1
+/// 	i := k*16
+/// 	m := j*32
+/// 	dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+i+15:__A+i])
+/// ENDFOR
+/// dst[MAX:256] := 0
+/// \endcode
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
+_mm256_cvtneoph_ps(const __m256h *__A) {
+  return (__m256)__builtin_ia32_vcvtneoph2ps256((const __v16hf *)__A);
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in \a __A
+/// to packed BF16 (16-bit) floating-point elements, and store the results in \a
+/// dst.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// _mm_cvtneps_avx_pbh(__m128 __A);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VCVTNEPS2BF16 instruction.
+///
+/// \param __A
+///    A 128-bit vector of [4 x float].
+/// \returns
+///    A 128-bit vector of [8 x bfloat].
+///
+/// \code{.operation}
+/// FOR j := 0 to 3
+/// 	dst.word[j] := Convert_FP32_To_BF16(__A.fp32[j])
+/// ENDFOR
+/// dst[MAX:128] := 0
+/// \endcode
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+_mm_cvtneps_avx_pbh(__m128 __A) {
+  return (__m128bh)__builtin_ia32_vcvtneps2bf16128((__v4sf)__A);
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in \a __A
+/// to packed BF16 (16-bit) floating-point elements, and store the results in \a
+/// dst.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// _mm256_cvtneps_avx_pbh(__m256 __A);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VCVTNEPS2BF16 instruction.
+///
+/// \param __A
+///    A 256-bit vector of [8 x float].
+/// \returns
+///    A 128-bit vector of [8 x bfloat].
+///
+/// \code{.operation}
+/// FOR j := 0 to 7
+/// 	dst.word[j] := Convert_FP32_To_BF16(a.fp32[j])
+/// ENDFOR
+/// dst[MAX:128] := 0
+/// \endcode
+static __inline__ __m128bh __DEFAULT_FN_ATTRS256
+_mm256_cvtneps_avx_pbh(__m256 __A) {
+  return (__m128bh)__builtin_ia32_vcvtneps2bf16256((__v8sf)__A);
+}
+
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
+
+#endif // __AVXNECONVERTINTRIN_H
+#endif // __SSE2__

diff  --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h
index f5884c23eedcf..1ad6853a97c9d 100644
--- a/clang/lib/Headers/cpuid.h
+++ b/clang/lib/Headers/cpuid.h
@@ -210,6 +210,7 @@
 
 /* Features in %edx for leaf 7 sub-leaf 1 */
 #define bit_AVXVNNIINT8   0x00000010
+#define bit_AVXNECONVERT  0x00000020
 #define bit_PREFETCHI     0x00004000
 
 /* Features in %eax for leaf 13 sub-leaf 1 */

diff  --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 1204dc700c63a..9d87dd780b650 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -264,6 +264,11 @@
 #include <avxvnniint8intrin.h>
 #endif
 
+#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
+    defined(__AVXNECONVERT__)
+#include <avxneconvertintrin.h>
+#endif
+
 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
     defined(__RDPID__)
 /// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).

diff  --git a/clang/test/CodeGen/X86/avx512vlbf16-builtins.c b/clang/test/CodeGen/X86/avx512vlbf16-builtins.c
index 539c6f8c43b2b..f62ba46cfd95c 100644
--- a/clang/test/CodeGen/X86/avx512vlbf16-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbf16-builtins.c
@@ -75,7 +75,7 @@ __m512bh test_mm512_mask_cvtne2ps2bf16(__m512bh C, __mmask32 U, __m512 A, __m512
 
 __m128bh test_mm_cvtneps2bf16(__m128 A) {
   // CHECK-LABEL: @test_mm_cvtneps2bf16
-  // CHECK: @llvm.x86.avx512bf16.mask.cvtneps2bf16.128
+  // CHECK: @llvm.x86.vcvtneps2bf16128
   // CHECK: ret <8 x bfloat> %{{.*}}
   return _mm_cvtneps_pbh(A);
 }
@@ -96,7 +96,7 @@ __m128bh test_mm_maskz_cvtneps2bf16(__m128 A, __mmask8 U) {
 
 __m128bh test_mm256_cvtneps2bf16(__m256 A) {
   // CHECK-LABEL: @test_mm256_cvtneps2bf16
-  // CHECK: @llvm.x86.avx512bf16.cvtneps2bf16.256
+  // CHECK: @llvm.x86.vcvtneps2bf16256
   // CHECK: ret <8 x bfloat> %{{.*}}
   return _mm256_cvtneps_pbh(A);
 }

diff  --git a/clang/test/CodeGen/X86/avxneconvert-builtins.c b/clang/test/CodeGen/X86/avxneconvert-builtins.c
new file mode 100644
index 0000000000000..8f2d0b4573dde
--- /dev/null
+++ b/clang/test/CodeGen/X86/avxneconvert-builtins.c
@@ -0,0 +1,91 @@
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx2 -target-feature +avxneconvert \
+// RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s
+// RUN: %clang_cc1 %s -ffreestanding -triple=i386-unknown-unknown  -target-feature +avx2 -target-feature +avxneconvert \
+// RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m128 test_mm_bcstnebf16_ps(const void *__A) {
+  // CHECK-LABEL: @test_mm_bcstnebf16_ps(
+  // CHECK: call <4 x float> @llvm.x86.vbcstnebf162ps128(ptr %{{.*}})
+  return _mm_bcstnebf16_ps(__A);
+}
+
+__m256 test_mm256_bcstnebf16_ps(const void *__A) {
+  // CHECK-LABEL: @test_mm256_bcstnebf16_ps(
+  // CHECK: call <8 x float> @llvm.x86.vbcstnebf162ps256(ptr %{{.*}})
+  return _mm256_bcstnebf16_ps(__A);
+}
+
+__m128 test_mm_bcstnesh_ps(const void *__A) {
+  // CHECK-LABEL: @test_mm_bcstnesh_ps(
+  // CHECK: call <4 x float> @llvm.x86.vbcstnesh2ps128(ptr %{{.*}})
+  return _mm_bcstnesh_ps(__A);
+}
+
+__m256 test_mm256_bcstnesh_ps(const void *__A) {
+  // CHECK-LABEL: @test_mm256_bcstnesh_ps(
+  // CHECK: call <8 x float> @llvm.x86.vbcstnesh2ps256(ptr %{{.*}})
+  return _mm256_bcstnesh_ps(__A);
+}
+
+__m128 test_mm_cvtneebf16_ps(const __m128bh *__A) {
+  // CHECK-LABEL: @test_mm_cvtneebf16_ps(
+  // CHECK: call <4 x float> @llvm.x86.vcvtneebf162ps128(ptr %{{.*}})
+  return _mm_cvtneebf16_ps(__A);
+}
+
+__m256 test_mm256_cvtneebf16_ps(const __m256bh *__A) {
+  // CHECK-LABEL: @test_mm256_cvtneebf16_ps(
+  // CHECK: call <8 x float> @llvm.x86.vcvtneebf162ps256(ptr %{{.*}})
+  return _mm256_cvtneebf16_ps(__A);
+}
+
+__m128 test_mm_cvtneeph_ps(const __m128h *__A) {
+  // CHECK-LABEL: @test_mm_cvtneeph_ps(
+  // CHECK: call <4 x float> @llvm.x86.vcvtneeph2ps128(ptr %{{.*}})
+  return _mm_cvtneeph_ps(__A);
+}
+
+__m256 test_mm256_cvtneeph_ps(const __m256h *__A) {
+  // CHECK-LABEL: @test_mm256_cvtneeph_ps(
+  // CHECK: call <8 x float> @llvm.x86.vcvtneeph2ps256(ptr %{{.*}})
+  return _mm256_cvtneeph_ps(__A);
+}
+
+__m128 test_mm_cvtneobf16_ps(const __m128bh *__A) {
+  // CHECK-LABEL: @test_mm_cvtneobf16_ps(
+  // CHECK: call <4 x float> @llvm.x86.vcvtneobf162ps128(ptr %{{.*}})
+  return _mm_cvtneobf16_ps(__A);
+}
+
+__m256 test_mm256_cvtneobf16_ps(const __m256bh *__A) {
+  // CHECK-LABEL: @test_mm256_cvtneobf16_ps(
+  // CHECK: call <8 x float> @llvm.x86.vcvtneobf162ps256(ptr %{{.*}})
+  return _mm256_cvtneobf16_ps(__A);
+}
+
+__m128 test_mm_cvtneoph_ps(const __m128h *__A) {
+  // CHECK-LABEL: @test_mm_cvtneoph_ps(
+  // CHECK: call <4 x float> @llvm.x86.vcvtneoph2ps128(ptr %{{.*}})
+  return _mm_cvtneoph_ps(__A);
+}
+
+__m256 test_mm256_cvtneoph_ps(const __m256h *__A) {
+  // CHECK-LABEL: @test_mm256_cvtneoph_ps(
+  // CHECK: call <8 x float> @llvm.x86.vcvtneoph2ps256(ptr %{{.*}})
+  return _mm256_cvtneoph_ps(__A);
+}
+
+__m128bh test_mm_cvtneps_avx_pbh(__m128 __A) {
+  // CHECK-LABEL: @test_mm_cvtneps_avx_pbh(
+  // CHECK: call <8 x bfloat> @llvm.x86.vcvtneps2bf16128(<4 x float> %{{.*}})
+  return _mm_cvtneps_avx_pbh(__A);
+}
+
+__m128bh test_mm256_cvtneps_avx_pbh(__m256 __A) {
+  // CHECK-LABEL: @test_mm256_cvtneps_avx_pbh(
+  // CHECK: call <8 x bfloat> @llvm.x86.vcvtneps2bf16256(<8 x float> %{{.*}})
+  return _mm256_cvtneps_avx_pbh(__A);
+}

diff  --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c
index 9a8a6643a6c60..dbbbd11402349 100644
--- a/clang/test/CodeGen/attr-target-x86.c
+++ b/clang/test/CodeGen/attr-target-x86.c
@@ -54,9 +54,9 @@ void __attribute__((target("arch=x86-64-v4"))) x86_64_v4(void) {}
 // CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="i686"
 // CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
 // CHECK-NOT: tune-cpu
-// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxvnni,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
+// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
 // CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
-// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxvnni,-avxvnniint8,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
+// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint8,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
 // CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
 // CHECK-NOT: tune-cpu
 // CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-3dnow,-3dnowa,-mmx"

diff  --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c
index 2c69d7903b012..d5e40ffb1807b 100644
--- a/clang/test/Driver/x86-target-features.c
+++ b/clang/test/Driver/x86-target-features.c
@@ -337,6 +337,11 @@
 // AVX-VNNIINT8: "-target-feature" "+avxvnniint8"
 // NO-AVX-VNNIINT8: "-target-feature" "-avxvnniint8"
 
+// RUN: %clang --target=i386 -mavxneconvert %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVXNECONVERT %s
+// RUN: %clang --target=i386 -mno-avxneconvert %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVXNECONVERT %s
+// AVXNECONVERT: "-target-feature" "+avxneconvert"
+// NO-AVXNECONVERT: "-target-feature" "-avxneconvert"
+
 // RUN: %clang --target=i386 -march=i386 -mcrc32 %s -### 2>&1 | FileCheck -check-prefix=CRC32 %s
 // RUN: %clang --target=i386 -march=i386 -mno-crc32 %s -### 2>&1 | FileCheck -check-prefix=NO-CRC32 %s
 // CRC32: "-target-feature" "+crc32"

diff  --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c
index 46e76a3517afd..1b4a36a4b2505 100644
--- a/clang/test/Preprocessor/x86_target_features.c
+++ b/clang/test/Preprocessor/x86_target_features.c
@@ -634,6 +634,20 @@
 // AVXVNNIINT8NOAVX2-NOT: #define __AVX2__ 1
 // AVXVNNIINT8NOAVX2-NOT: #define __AVXVNNIINT8__ 1
 
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavxneconvert -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVXNECONVERT %s
+
+// AVXNECONVERT: #define __AVX2__ 1
+// AVXNECONVERT: #define __AVXNECONVERT__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mno-avxneconvert -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOAVXNECONVERT %s
+
+// NOAVXNECONVERT-NOT: #define __AVXNECONVERT__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavxneconvert -mno-avx2 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVXNECONVERTNOAVX2 %s
+
+// AVXNECONVERTNOAVX2-NOT: #define __AVX2__ 1
+// AVXNECONVERTNOAVX2-NOT: #define __AVXNECONVERT__ 1
+
 // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 -x c -E -dM -o - %s | FileCheck -check-prefix=CRC32 %s
 
 // CRC32: #define __CRC32__ 1

diff  --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 0b9e08a3e830b..6f0a64fd43468 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -138,13 +138,14 @@ Changes to the Windows Target
 
 Changes to the X86 Backend
 --------------------------
-* Support ISA of ``AVX-IFMA``.
 
 * Add support for the ``RDMSRLIST and WRMSRLIST`` instructions.
 * Add support for the ``WRMSRNS`` instruction.
 * Support ISA of ``AMX-FP16`` which contains ``tdpfp16ps`` instruction.
 * Support ISA of ``CMPCCXADD``.
+* Support ISA of ``AVX-IFMA``.
 * Support ISA of ``AVX-VNNI-INT8``.
+* Support ISA of ``AVX-NE-CONVERT``.
 
 Changes to the OCaml bindings
 -----------------------------

diff  --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 2fef04ebf5ac1..26a9a9bafaba1 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -5234,6 +5234,34 @@ let TargetPrefix = "x86" in {
                           Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty],
                                     [ImmArg<ArgIndex<0>>,
                                     ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
+def int_x86_vbcstnebf162ps128 : ClangBuiltin<"__builtin_ia32_vbcstnebf162ps128">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+def int_x86_vbcstnebf162ps256 : ClangBuiltin<"__builtin_ia32_vbcstnebf162ps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+def int_x86_vbcstnesh2ps128 : ClangBuiltin<"__builtin_ia32_vbcstnesh2ps128">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+def int_x86_vbcstnesh2ps256 : ClangBuiltin<"__builtin_ia32_vbcstnesh2ps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+def int_x86_vcvtneebf162ps128 : ClangBuiltin<"__builtin_ia32_vcvtneebf162ps128">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+def int_x86_vcvtneebf162ps256 : ClangBuiltin<"__builtin_ia32_vcvtneebf162ps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+def int_x86_vcvtneeph2ps128 : ClangBuiltin<"__builtin_ia32_vcvtneeph2ps128">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+def int_x86_vcvtneeph2ps256 : ClangBuiltin<"__builtin_ia32_vcvtneeph2ps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+def int_x86_vcvtneobf162ps128 : ClangBuiltin<"__builtin_ia32_vcvtneobf162ps128">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+def int_x86_vcvtneobf162ps256 : ClangBuiltin<"__builtin_ia32_vcvtneobf162ps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+def int_x86_vcvtneoph2ps128 : ClangBuiltin<"__builtin_ia32_vcvtneoph2ps128">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+def int_x86_vcvtneoph2ps256 : ClangBuiltin<"__builtin_ia32_vcvtneoph2ps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+def int_x86_vcvtneps2bf16128 : ClangBuiltin<"__builtin_ia32_vcvtneps2bf16128">,
+        Intrinsic<[llvm_v8bf16_ty], [llvm_v4f32_ty], [ IntrNoMem ]>;
+def int_x86_vcvtneps2bf16256 : ClangBuiltin<"__builtin_ia32_vcvtneps2bf16256">,
+        Intrinsic<[llvm_v8bf16_ty], [llvm_v8f32_ty], [ IntrNoMem ]>;
 }
 //===----------------------------------------------------------------------===//
 // RAO-INT intrinsics

diff  --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def
index 2c656e19d0d19..6b6c740a9b1f8 100644
--- a/llvm/include/llvm/Support/X86TargetParser.def
+++ b/llvm/include/llvm/Support/X86TargetParser.def
@@ -205,6 +205,7 @@ X86_FEATURE       (RAOINT,          "raoint")
 X86_FEATURE       (AVX512FP16,      "avx512fp16")
 X86_FEATURE       (AMX_FP16,        "amx-fp16")
 X86_FEATURE       (CMPCCXADD,       "cmpccxadd")
+X86_FEATURE       (AVXNECONVERT,    "avxneconvert")
 X86_FEATURE       (AVXVNNI,         "avxvnni")
 X86_FEATURE       (AVXIFMA,         "avxifma")
 X86_FEATURE       (AVXVNNIINT8,     "avxvnniint8")

diff  --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp
index d1f01fce62a15..70cae1e221b2b 100644
--- a/llvm/lib/Support/Host.cpp
+++ b/llvm/lib/Support/Host.cpp
@@ -1813,6 +1813,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   Features["hreset"]     = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
   Features["avxifma"]    = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave;
   Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave;
+  Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave;
   Features["prefetchi"]  = HasLeaf7Subleaf1 && ((EDX >> 14) & 1);
 
   bool HasLeafD = MaxLevel >= 0xd &&

diff  --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp
index e6074b1e904ec..20bcfb3b9094a 100644
--- a/llvm/lib/Support/X86TargetParser.cpp
+++ b/llvm/lib/Support/X86TargetParser.cpp
@@ -582,11 +582,12 @@ constexpr FeatureBitset ImpliedFeaturesAMX_FP16 = FeatureAMX_TILE;
 constexpr FeatureBitset ImpliedFeaturesAMX_INT8 = FeatureAMX_TILE;
 constexpr FeatureBitset ImpliedFeaturesHRESET = {};
 
-constexpr FeatureBitset ImpliedFeaturesAVXVNNIINT8 = FeatureAVX2;
 constexpr FeatureBitset ImpliedFeaturesPREFETCHI = {};
 constexpr FeatureBitset ImpliedFeaturesCMPCCXADD = {};
 constexpr FeatureBitset ImpliedFeaturesRAOINT = {};
+constexpr FeatureBitset ImpliedFeaturesAVXVNNIINT8 = FeatureAVX2;
 constexpr FeatureBitset ImpliedFeaturesAVXIFMA = FeatureAVX2;
+constexpr FeatureBitset ImpliedFeaturesAVXNECONVERT = FeatureAVX2;
 constexpr FeatureBitset ImpliedFeaturesAVX512FP16 =
     FeatureAVX512BW | FeatureAVX512DQ | FeatureAVX512VL;
 // Key Locker Features

diff  --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index a860352acad62..b137529269bc8 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -272,6 +272,9 @@ def FeatureCMPCCXADD : SubtargetFeature<"cmpccxadd", "HasCMPCCXADD", "true",
 def FeatureRAOINT : SubtargetFeature<"raoint", "HasRAOINT", "true",
                                      "Support RAO-INT instructions",
                                      []>;
+def FeatureAVXNECONVERT : SubtargetFeature<"avxneconvert", "HasAVXNECONVERT", "true",
+                                           "Support AVX-NE-CONVERT instructions",
+                                           [FeatureAVX2]>;
 def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
                                       "Invalidate Process-Context Identifier">;
 def FeatureSGX     : SubtargetFeature<"sgx", "HasSGX", "true",

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c5a591a23f3cf..eae7164e819a6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2178,15 +2178,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     }
   }
 
-  if (!Subtarget.useSoftFloat() && Subtarget.hasBF16()) {
+  if (!Subtarget.useSoftFloat() &&
+      (Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16())) {
     addRegisterClass(MVT::v8bf16, &X86::VR128XRegClass);
     addRegisterClass(MVT::v16bf16, &X86::VR256XRegClass);
-    addRegisterClass(MVT::v32bf16, &X86::VR512RegClass);
     // We set the type action of bf16 to TypeSoftPromoteHalf, but we don't
     // provide the method to promote BUILD_VECTOR. Set the operation action
     // Custom to do the customization later.
     setOperationAction(ISD::BUILD_VECTOR, MVT::bf16, Custom);
-    for (auto VT : { MVT::v8bf16, MVT::v16bf16, MVT::v32bf16 }) {
+    for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
       setF16Action(VT, Expand);
       setOperationAction(ISD::FADD, VT, Expand);
       setOperationAction(ISD::FSUB, VT, Expand);
@@ -2197,6 +2197,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     addLegalFPImmediate(APFloat::getZero(APFloat::BFloat()));
   }
 
+  if (!Subtarget.useSoftFloat() && Subtarget.hasBF16()) {
+    addRegisterClass(MVT::v32bf16, &X86::VR512RegClass);
+    setF16Action(MVT::v32bf16, Expand);
+    setOperationAction(ISD::FADD, MVT::v32bf16, Expand);
+    setOperationAction(ISD::FSUB, MVT::v32bf16, Expand);
+    setOperationAction(ISD::FMUL, MVT::v32bf16, Expand);
+    setOperationAction(ISD::FDIV, MVT::v32bf16, Expand);
+    setOperationAction(ISD::BUILD_VECTOR, MVT::v32bf16, Custom);
+  }
+
   if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
     setTruncStoreAction(MVT::v4i64, MVT::v4i8,  Legal);
     setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);

diff  --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 2cf4f3ac67f23..35664b606ea1d 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -12947,6 +12947,16 @@ let Predicates = [HasBF16, HasVLX] in {
   def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
                               v8bf16x_info.ImmAllZerosV, VK4WM:$mask),
             (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
+
+  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (v4f32 VR128X:$src))),
+            (VCVTNEPS2BF16Z128rr VR128X:$src)>;
+  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src))),
+            (VCVTNEPS2BF16Z128rm addr:$src)>;
+
+  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (v8f32 VR256X:$src))),
+            (VCVTNEPS2BF16Z256rr VR256X:$src)>;
+  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src))),
+            (VCVTNEPS2BF16Z256rm addr:$src)>;
 }
 
 let Constraints = "$src1 = $dst" in {

diff  --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 9ce2afec869b4..93405cab6e34b 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -993,6 +993,7 @@ def HasPCONFIG   : Predicate<"Subtarget->hasPCONFIG()">;
 def HasENQCMD    : Predicate<"Subtarget->hasENQCMD()">;
 def HasAMXFP16   : Predicate<"Subtarget->hasAMXFP16()">;
 def HasCMPCCXADD : Predicate<"Subtarget->hasCMPCCXADD()">;
+def HasAVXNECONVERT : Predicate<"Subtarget->hasAVXNECONVERT()">;
 def HasKL        : Predicate<"Subtarget->hasKL()">;
 def HasRAOINT    : Predicate<"Subtarget->hasRAOINT()">;
 def HasWIDEKL    : Predicate<"Subtarget->hasWIDEKL()">;

diff  --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 99d575b570bb1..91d29cf5d2ef1 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -8123,6 +8123,7 @@ let isCommutable = 0 in {
                                              X86GF2P8affineqb>, TAPD;
 }
 
+// AVX-IFMA
 let Predicates = [HasAVXIFMA, NoVLX_Or_NoIFMA], Constraints = "$src1 = $dst",
     checkVEXPredicate = 1 in
 multiclass avx_ifma_rm<bits<8> opc, string OpcodeStr, SDNode OpNode> {
@@ -8161,6 +8162,7 @@ multiclass avx_ifma_rm<bits<8> opc, string OpcodeStr, SDNode OpNode> {
 defm VPMADD52HUQ : avx_ifma_rm<0xb5, "vpmadd52huq", x86vpmadd52h>, VEX_W, ExplicitVEXPrefix;
 defm VPMADD52LUQ : avx_ifma_rm<0xb4, "vpmadd52luq", x86vpmadd52l>, VEX_W, ExplicitVEXPrefix;
 
+// AVX-VNNI-INT8
 let Constraints = "$src1 = $dst" in
 multiclass avx_dotprod_rm<bits<8> Opc, string OpcodeStr, ValueType OpVT,
                           RegisterClass RC, PatFrag MemOpFrag,
@@ -8219,3 +8221,59 @@ let Predicates = [HasAVXVNNIINT8] in {
                                    i256mem, X86vpdpbsuds, SchedWriteVecIMul.YMM,
                                    0>, VEX_L, T8XS;
 }
+
+// AVX-NE-CONVERT
+multiclass AVX_NE_CONVERT_BASE<bits<8> Opcode, string OpcodeStr,
+                  X86MemOperand MemOp128, X86MemOperand MemOp256> {
+  def rm : I<Opcode, MRMSrcMem, (outs VR128:$dst), (ins MemOp128:$src),
+              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+              [(set VR128:$dst,
+                (!cast<Intrinsic>("int_x86_"#OpcodeStr#"128") addr:$src))]>,
+              Sched<[WriteCvtPH2PS]>, VEX;
+  def Yrm : I<Opcode, MRMSrcMem, (outs VR256:$dst), (ins MemOp256:$src),
+              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+              [(set VR256:$dst,
+                (!cast<Intrinsic>("int_x86_"#OpcodeStr#"256") addr:$src))]>,
+              Sched<[WriteCvtPH2PSY]>, VEX, VEX_L;
+}
+
+multiclass VCVTNEPS2BF16_BASE {
+  def rr : I<0x72, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+             "vcvtneps2bf16\t{$src, $dst|$dst, $src}",
+             [(set VR128:$dst, (int_x86_vcvtneps2bf16128 VR128:$src))]>,
+             Sched<[WriteCvtPH2PS]>;
+  def rm : I<0x72, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+             "vcvtneps2bf16{x}\t{$src, $dst|$dst, $src}",
+             [(set VR128:$dst, (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src)))]>,
+             Sched<[WriteCvtPH2PS]>;
+  def Yrr : I<0x72, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+             "vcvtneps2bf16\t{$src, $dst|$dst, $src}",
+             [(set VR128:$dst, (int_x86_vcvtneps2bf16256 VR256:$src))]>,
+             Sched<[WriteCvtPH2PSY]>, VEX_L;
+  def Yrm : I<0x72, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+             "vcvtneps2bf16{y}\t{$src, $dst|$dst, $src}",
+             [(set VR128:$dst, (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src)))]>,
+             Sched<[WriteCvtPH2PSY]>, VEX_L;
+}
+
+let Predicates = [HasAVXNECONVERT] in {
+  defm VBCSTNEBF162PS : AVX_NE_CONVERT_BASE<0xb1, "vbcstnebf162ps", f16mem,
+       f16mem>, T8XS;
+  defm VBCSTNESH2PS : AVX_NE_CONVERT_BASE<0xb1, "vbcstnesh2ps", f16mem, f16mem>,
+       T8PD;
+  defm VCVTNEEBF162PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneebf162ps", f128mem,
+       f256mem>, T8XS;
+  defm VCVTNEEPH2PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneeph2ps", f128mem,
+       f256mem>, T8PD;
+  defm VCVTNEOBF162PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneobf162ps", f128mem,
+       f256mem>, T8XD;
+  defm VCVTNEOPH2PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneoph2ps", f128mem,
+       f256mem>, T8PS;
+  let checkVEXPredicate = 1 in
+  defm VCVTNEPS2BF16 : VCVTNEPS2BF16_BASE, VEX, T8XS, ExplicitVEXPrefix;
+}
+
+def : InstAlias<"vcvtneps2bf16x\t{$src, $dst|$dst, $src}",
+                (VCVTNEPS2BF16rr VR128:$dst, VR128:$src), 0, "att">;
+def : InstAlias<"vcvtneps2bf16y\t{$src, $dst|$dst, $src}",
+                (VCVTNEPS2BF16Yrr VR128:$dst, VR256:$src), 0, "att">;

diff  --git a/llvm/test/CodeGen/X86/avxneconvert-intrinsics-shared.ll b/llvm/test/CodeGen/X86/avxneconvert-intrinsics-shared.ll
new file mode 100644
index 0000000000000..d88e95c34a4a3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avxneconvert-intrinsics-shared.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avxneconvert,+avx512bf16,+avx512vl | FileCheck %s --check-prefix=AVX512BF16-COMMON
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avxneconvert,+avx512bf16,+avx512vl | FileCheck %s --check-prefix=AVX512BF16-COMMON
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx512bf16,+avx512vl | FileCheck %s --check-prefix=AVX512BF16
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx512bf16,+avx512vl | FileCheck %s --check-prefix=AVX512BF16
+
+define <8 x bfloat> @test_int_x86_vcvtneps2bf16128(<4 x float> %A) {
+; AVX512BF16-COMMON-LABEL: test_int_x86_vcvtneps2bf16128:
+; AVX512BF16-COMMON:       # %bb.0:
+; AVX512BF16-COMMON-NEXT:    {vex} vcvtneps2bf16 %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7a,0x72,0xc0]
+; AVX512BF16-COMMON-NEXT:    # kill: def $xmm1 killed $xmm0
+; AVX512BF16-COMMON-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+;
+; AVX512BF16-LABEL: test_int_x86_vcvtneps2bf16128:
+; AVX512BF16:       # %bb.0:
+; AVX512BF16-NEXT:    vcvtneps2bf16 %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x72,0xc0]
+; AVX512BF16-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x bfloat> @llvm.x86.vcvtneps2bf16128(<4 x float> %A)
+  ret <8 x bfloat> %ret
+}
+declare <8 x bfloat> @llvm.x86.vcvtneps2bf16128(<4 x float> %A)
+
+define <8 x bfloat> @test_int_x86_vcvtneps2bf16256(<8 x float> %A) {
+; AVX512BF16-COMMON-LABEL: test_int_x86_vcvtneps2bf16256:
+; AVX512BF16-COMMON:       # %bb.0:
+; AVX512BF16-COMMON-NEXT:    {vex} vcvtneps2bf16 %ymm0, %xmm0 # encoding: [0xc4,0xe2,0x7e,0x72,0xc0]
+; AVX512BF16-COMMON-NEXT:    # kill: def $xmm1 killed $xmm0
+; AVX512BF16-COMMON-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; AVX512BF16-COMMON-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+;
+; AVX512BF16-LABEL: test_int_x86_vcvtneps2bf16256:
+; AVX512BF16:       # %bb.0:
+; AVX512BF16-NEXT:    vcvtneps2bf16 %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x72,0xc0]
+; AVX512BF16-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; AVX512BF16-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x bfloat> @llvm.x86.vcvtneps2bf16256(<8 x float> %A)
+  ret <8 x bfloat> %ret
+}
+declare <8 x bfloat> @llvm.x86.vcvtneps2bf16256(<8 x float> %A)
+

diff  --git a/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll b/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll
new file mode 100644
index 0000000000000..e7bc936546254
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll
@@ -0,0 +1,219 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avxneconvert | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avxneconvert | FileCheck %s --check-prefixes=CHECK,X86
+
+define <4 x float> @test_int_x86_vbcstnebf162ps128(i8* %A) {
+; X64-LABEL: test_int_x86_vbcstnebf162ps128:
+; X64:       # %bb.0:
+; X64-NEXT:    vbcstnebf162ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x7a,0xb1,0x07]
+; X64-NEXT:    retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_vbcstnebf162ps128:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT:    vbcstnebf162ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x7a,0xb1,0x00]
+; X86-NEXT:    retl # encoding: [0xc3]
+  %ret = call <4 x float> @llvm.x86.vbcstnebf162ps128(i8* %A)
+  ret <4 x float> %ret
+}
+declare <4 x float> @llvm.x86.vbcstnebf162ps128(i8* %A)
+
+define <8 x float> @test_int_x86_vbcstnebf162ps256(i8* %A) {
+; X64-LABEL: test_int_x86_vbcstnebf162ps256:
+; X64:       # %bb.0:
+; X64-NEXT:    vbcstnebf162ps (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7e,0xb1,0x07]
+; X64-NEXT:    retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_vbcstnebf162ps256:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT:    vbcstnebf162ps (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7e,0xb1,0x00]
+; X86-NEXT:    retl # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.vbcstnebf162ps256(i8* %A)
+  ret <8 x float> %ret
+}
+declare <8 x float> @llvm.x86.vbcstnebf162ps256(i8* %A)
+
+define <4 x float> @test_int_x86_vbcstnesh2ps128(i8* %A) {
+; X64-LABEL: test_int_x86_vbcstnesh2ps128:
+; X64:       # %bb.0:
+; X64-NEXT:    vbcstnesh2ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0xb1,0x07]
+; X64-NEXT:    retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_vbcstnesh2ps128:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT:    vbcstnesh2ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0xb1,0x00]
+; X86-NEXT:    retl # encoding: [0xc3]
+  %ret = call <4 x float> @llvm.x86.vbcstnesh2ps128(i8* %A)
+  ret <4 x float> %ret
+}
+declare <4 x float> @llvm.x86.vbcstnesh2ps128(i8* %A)
+
+define <8 x float> @test_int_x86_vbcstnesh2ps256(i8* %A) {
+; X64-LABEL: test_int_x86_vbcstnesh2ps256:
+; X64:       # %bb.0:
+; X64-NEXT:    vbcstnesh2ps (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0xb1,0x07]
+; X64-NEXT:    retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_vbcstnesh2ps256:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT:    vbcstnesh2ps (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0xb1,0x00]
+; X86-NEXT:    retl # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.vbcstnesh2ps256(i8* %A)
+  ret <8 x float> %ret
+}
+declare <8 x float> @llvm.x86.vbcstnesh2ps256(i8* %A)
+
+define <4 x float> @test_int_x86_vcvtneebf162ps128(i8* %A) {
+; X64-LABEL: test_int_x86_vcvtneebf162ps128:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtneebf162ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x7a,0xb0,0x07]
+; X64-NEXT:    retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_vcvtneebf162ps128:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT:    vcvtneebf162ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x7a,0xb0,0x00]
+; X86-NEXT:    retl # encoding: [0xc3]
+  %ret = call <4 x float> @llvm.x86.vcvtneebf162ps128(i8* %A)
+  ret <4 x float> %ret
+}
+declare <4 x float> @llvm.x86.vcvtneebf162ps128(i8* %A)
+
+define <8 x float> @test_int_x86_vcvtneebf162ps256(i8* %A) {
+; X64-LABEL: test_int_x86_vcvtneebf162ps256:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtneebf162ps (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7e,0xb0,0x07]
+; X64-NEXT:    retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_vcvtneebf162ps256:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT:    vcvtneebf162ps (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7e,0xb0,0x00]
+; X86-NEXT:    retl # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.vcvtneebf162ps256(i8* %A)
+  ret <8 x float> %ret
+}
+declare <8 x float> @llvm.x86.vcvtneebf162ps256(i8* %A)
+
+define <4 x float> @test_int_x86_vcvtneeph2ps128(i8* %A) {
+; X64-LABEL: test_int_x86_vcvtneeph2ps128:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtneeph2ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0xb0,0x07]
+; X64-NEXT:    retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_vcvtneeph2ps128:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT:    vcvtneeph2ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0xb0,0x00]
+; X86-NEXT:    retl # encoding: [0xc3]
+  %ret = call <4 x float> @llvm.x86.vcvtneeph2ps128(i8* %A)
+  ret <4 x float> %ret
+}
+declare <4 x float> @llvm.x86.vcvtneeph2ps128(i8* %A)
+
+define <8 x float> @test_int_x86_vcvtneeph2ps256(i8* %A) {
+; X64-LABEL: test_int_x86_vcvtneeph2ps256:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtneeph2ps (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0xb0,0x07]
+; X64-NEXT:    retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_vcvtneeph2ps256:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT:    vcvtneeph2ps (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0xb0,0x00]
+; X86-NEXT:    retl # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.vcvtneeph2ps256(i8* %A)
+  ret <8 x float> %ret
+}
+declare <8 x float> @llvm.x86.vcvtneeph2ps256(i8* %A)
+
+define <4 x float> @test_int_x86_vcvtneobf162ps128(i8* %A) {
+; X64-LABEL: test_int_x86_vcvtneobf162ps128:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtneobf162ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x7b,0xb0,0x07]
+; X64-NEXT:    retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_vcvtneobf162ps128:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT:    vcvtneobf162ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x7b,0xb0,0x00]
+; X86-NEXT:    retl # encoding: [0xc3]
+  %ret = call <4 x float> @llvm.x86.vcvtneobf162ps128(i8* %A)
+  ret <4 x float> %ret
+}
+declare <4 x float> @llvm.x86.vcvtneobf162ps128(i8* %A)
+
+define <8 x float> @test_int_x86_vcvtneobf162ps256(i8* %A) {
+; X64-LABEL: test_int_x86_vcvtneobf162ps256:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtneobf162ps (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7f,0xb0,0x07]
+; X64-NEXT:    retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_vcvtneobf162ps256:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT:    vcvtneobf162ps (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7f,0xb0,0x00]
+; X86-NEXT:    retl # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.vcvtneobf162ps256(i8* %A)
+  ret <8 x float> %ret
+}
+declare <8 x float> @llvm.x86.vcvtneobf162ps256(i8* %A)
+
+define <4 x float> @test_int_x86_vcvtneoph2ps128(i8* %A) {
+; X64-LABEL: test_int_x86_vcvtneoph2ps128:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtneoph2ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x78,0xb0,0x07]
+; X64-NEXT:    retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_vcvtneoph2ps128:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT:    vcvtneoph2ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x78,0xb0,0x00]
+; X86-NEXT:    retl # encoding: [0xc3]
+  %ret = call <4 x float> @llvm.x86.vcvtneoph2ps128(i8* %A)
+  ret <4 x float> %ret
+}
+declare <4 x float> @llvm.x86.vcvtneoph2ps128(i8* %A)
+
+define <8 x float> @test_int_x86_vcvtneoph2ps256(i8* %A) {
+; X64-LABEL: test_int_x86_vcvtneoph2ps256:
+; X64:       # %bb.0:
+; X64-NEXT:    vcvtneoph2ps (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7c,0xb0,0x07]
+; X64-NEXT:    retq # encoding: [0xc3]
+;
+; X86-LABEL: test_int_x86_vcvtneoph2ps256:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT:    vcvtneoph2ps (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7c,0xb0,0x00]
+; X86-NEXT:    retl # encoding: [0xc3]
+  %ret = call <8 x float> @llvm.x86.vcvtneoph2ps256(i8* %A)
+  ret <8 x float> %ret
+}
+declare <8 x float> @llvm.x86.vcvtneoph2ps256(i8* %A)
+
+define <8 x bfloat> @test_int_x86_vcvtneps2bf16128(<4 x float> %A) {
+; CHECK-LABEL: test_int_x86_vcvtneps2bf16128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    {vex} vcvtneps2bf16 %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7a,0x72,0xc0]
+; CHECK-NEXT:    # kill: def $xmm1 killed $xmm0
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x bfloat> @llvm.x86.vcvtneps2bf16128(<4 x float> %A)
+  ret <8 x bfloat> %ret
+}
+declare <8 x bfloat> @llvm.x86.vcvtneps2bf16128(<4 x float> %A)
+
+define <8 x bfloat> @test_int_x86_vcvtneps2bf16256(<8 x float> %A) {
+; CHECK-LABEL: test_int_x86_vcvtneps2bf16256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    {vex} vcvtneps2bf16 %ymm0, %xmm0 # encoding: [0xc4,0xe2,0x7e,0x72,0xc0]
+; CHECK-NEXT:    # kill: def $xmm1 killed $xmm0
+; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x bfloat> @llvm.x86.vcvtneps2bf16256(<8 x float> %A)
+  ret <8 x bfloat> %ret
+}
+declare <8 x bfloat> @llvm.x86.vcvtneps2bf16256(<8 x float> %A)
+

diff  --git a/llvm/test/MC/Disassembler/X86/avx_ne_convert-32.txt b/llvm/test/MC/Disassembler/X86/avx_ne_convert-32.txt
new file mode 100644
index 0000000000000..6dda0057fc6c5
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx_ne_convert-32.txt
@@ -0,0 +1,335 @@
+# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:        vbcstnebf162ps  268435456(%esp,%esi,8), %xmm2
+# INTEL:      vbcstnebf162ps xmm2, word ptr [esp + 8*esi + 268435456]
+0xc4,0xe2,0x7a,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vbcstnebf162ps  291(%edi,%eax,4), %xmm2
+# INTEL:      vbcstnebf162ps xmm2, word ptr [edi + 4*eax + 291]
+0xc4,0xe2,0x7a,0xb1,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vbcstnebf162ps  (%eax), %xmm2
+# INTEL:      vbcstnebf162ps xmm2, word ptr [eax]
+0xc4,0xe2,0x7a,0xb1,0x10
+
+# ATT:        vbcstnebf162ps  -64(,%ebp,2), %xmm2
+# INTEL:      vbcstnebf162ps xmm2, word ptr [2*ebp - 64]
+0xc4,0xe2,0x7a,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff
+
+# ATT:        vbcstnebf162ps  254(%ecx), %xmm2
+# INTEL:      vbcstnebf162ps xmm2, word ptr [ecx + 254]
+0xc4,0xe2,0x7a,0xb1,0x91,0xfe,0x00,0x00,0x00
+
+# ATT:        vbcstnebf162ps  -256(%edx), %xmm2
+# INTEL:      vbcstnebf162ps xmm2, word ptr [edx - 256]
+0xc4,0xe2,0x7a,0xb1,0x92,0x00,0xff,0xff,0xff
+
+# ATT:        vbcstnebf162ps  268435456(%esp,%esi,8), %ymm2
+# INTEL:      vbcstnebf162ps ymm2, word ptr [esp + 8*esi + 268435456]
+0xc4,0xe2,0x7e,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vbcstnebf162ps  291(%edi,%eax,4), %ymm2
+# INTEL:      vbcstnebf162ps ymm2, word ptr [edi + 4*eax + 291]
+0xc4,0xe2,0x7e,0xb1,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vbcstnebf162ps  (%eax), %ymm2
+# INTEL:      vbcstnebf162ps ymm2, word ptr [eax]
+0xc4,0xe2,0x7e,0xb1,0x10
+
+# ATT:        vbcstnebf162ps  -64(,%ebp,2), %ymm2
+# INTEL:      vbcstnebf162ps ymm2, word ptr [2*ebp - 64]
+0xc4,0xe2,0x7e,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff
+
+# ATT:        vbcstnebf162ps  254(%ecx), %ymm2
+# INTEL:      vbcstnebf162ps ymm2, word ptr [ecx + 254]
+0xc4,0xe2,0x7e,0xb1,0x91,0xfe,0x00,0x00,0x00
+
+# ATT:        vbcstnebf162ps  -256(%edx), %ymm2
+# INTEL:      vbcstnebf162ps ymm2, word ptr [edx - 256]
+0xc4,0xe2,0x7e,0xb1,0x92,0x00,0xff,0xff,0xff
+
+# ATT:        vbcstnesh2ps  268435456(%esp,%esi,8), %xmm2
+# INTEL:      vbcstnesh2ps xmm2, word ptr [esp + 8*esi + 268435456]
+0xc4,0xe2,0x79,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vbcstnesh2ps  291(%edi,%eax,4), %xmm2
+# INTEL:      vbcstnesh2ps xmm2, word ptr [edi + 4*eax + 291]
+0xc4,0xe2,0x79,0xb1,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vbcstnesh2ps  (%eax), %xmm2
+# INTEL:      vbcstnesh2ps xmm2, word ptr [eax]
+0xc4,0xe2,0x79,0xb1,0x10
+
+# ATT:        vbcstnesh2ps  -64(,%ebp,2), %xmm2
+# INTEL:      vbcstnesh2ps xmm2, word ptr [2*ebp - 64]
+0xc4,0xe2,0x79,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff
+
+# ATT:        vbcstnesh2ps  254(%ecx), %xmm2
+# INTEL:      vbcstnesh2ps xmm2, word ptr [ecx + 254]
+0xc4,0xe2,0x79,0xb1,0x91,0xfe,0x00,0x00,0x00
+
+# ATT:        vbcstnesh2ps  -256(%edx), %xmm2
+# INTEL:      vbcstnesh2ps xmm2, word ptr [edx - 256]
+0xc4,0xe2,0x79,0xb1,0x92,0x00,0xff,0xff,0xff
+
+# ATT:        vbcstnesh2ps  268435456(%esp,%esi,8), %ymm2
+# INTEL:      vbcstnesh2ps ymm2, word ptr [esp + 8*esi + 268435456]
+0xc4,0xe2,0x7d,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vbcstnesh2ps  291(%edi,%eax,4), %ymm2
+# INTEL:      vbcstnesh2ps ymm2, word ptr [edi + 4*eax + 291]
+0xc4,0xe2,0x7d,0xb1,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vbcstnesh2ps  (%eax), %ymm2
+# INTEL:      vbcstnesh2ps ymm2, word ptr [eax]
+0xc4,0xe2,0x7d,0xb1,0x10
+
+# ATT:        vbcstnesh2ps  -64(,%ebp,2), %ymm2
+# INTEL:      vbcstnesh2ps ymm2, word ptr [2*ebp - 64]
+0xc4,0xe2,0x7d,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff
+
+# ATT:        vbcstnesh2ps  254(%ecx), %ymm2
+# INTEL:      vbcstnesh2ps ymm2, word ptr [ecx + 254]
+0xc4,0xe2,0x7d,0xb1,0x91,0xfe,0x00,0x00,0x00
+
+# ATT:        vbcstnesh2ps  -256(%edx), %ymm2
+# INTEL:      vbcstnesh2ps ymm2, word ptr [edx - 256]
+0xc4,0xe2,0x7d,0xb1,0x92,0x00,0xff,0xff,0xff
+
+# ATT:        vcvtneebf162ps  268435456(%esp,%esi,8), %xmm2
+# INTEL:      vcvtneebf162ps xmm2, xmmword ptr [esp + 8*esi + 268435456]
+0xc4,0xe2,0x7a,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vcvtneebf162ps  291(%edi,%eax,4), %xmm2
+# INTEL:      vcvtneebf162ps xmm2, xmmword ptr [edi + 4*eax + 291]
+0xc4,0xe2,0x7a,0xb0,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vcvtneebf162ps  (%eax), %xmm2
+# INTEL:      vcvtneebf162ps xmm2, xmmword ptr [eax]
+0xc4,0xe2,0x7a,0xb0,0x10
+
+# ATT:        vcvtneebf162ps  -512(,%ebp,2), %xmm2
+# INTEL:      vcvtneebf162ps xmm2, xmmword ptr [2*ebp - 512]
+0xc4,0xe2,0x7a,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        vcvtneebf162ps  2032(%ecx), %xmm2
+# INTEL:      vcvtneebf162ps xmm2, xmmword ptr [ecx + 2032]
+0xc4,0xe2,0x7a,0xb0,0x91,0xf0,0x07,0x00,0x00
+
+# ATT:        vcvtneebf162ps  -2048(%edx), %xmm2
+# INTEL:      vcvtneebf162ps xmm2, xmmword ptr [edx - 2048]
+0xc4,0xe2,0x7a,0xb0,0x92,0x00,0xf8,0xff,0xff
+
+# ATT:        vcvtneebf162ps  268435456(%esp,%esi,8), %ymm2
+# INTEL:      vcvtneebf162ps ymm2, ymmword ptr [esp + 8*esi + 268435456]
+0xc4,0xe2,0x7e,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vcvtneebf162ps  291(%edi,%eax,4), %ymm2
+# INTEL:      vcvtneebf162ps ymm2, ymmword ptr [edi + 4*eax + 291]
+0xc4,0xe2,0x7e,0xb0,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vcvtneebf162ps  (%eax), %ymm2
+# INTEL:      vcvtneebf162ps ymm2, ymmword ptr [eax]
+0xc4,0xe2,0x7e,0xb0,0x10
+
+# ATT:        vcvtneebf162ps  -1024(,%ebp,2), %ymm2
+# INTEL:      vcvtneebf162ps ymm2, ymmword ptr [2*ebp - 1024]
+0xc4,0xe2,0x7e,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        vcvtneebf162ps  4064(%ecx), %ymm2
+# INTEL:      vcvtneebf162ps ymm2, ymmword ptr [ecx + 4064]
+0xc4,0xe2,0x7e,0xb0,0x91,0xe0,0x0f,0x00,0x00
+
+# ATT:        vcvtneebf162ps  -4096(%edx), %ymm2
+# INTEL:      vcvtneebf162ps ymm2, ymmword ptr [edx - 4096]
+0xc4,0xe2,0x7e,0xb0,0x92,0x00,0xf0,0xff,0xff
+
+# ATT:        vcvtneeph2ps  268435456(%esp,%esi,8), %xmm2
+# INTEL:      vcvtneeph2ps xmm2, xmmword ptr [esp + 8*esi + 268435456]
+0xc4,0xe2,0x79,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vcvtneeph2ps  291(%edi,%eax,4), %xmm2
+# INTEL:      vcvtneeph2ps xmm2, xmmword ptr [edi + 4*eax + 291]
+0xc4,0xe2,0x79,0xb0,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vcvtneeph2ps  (%eax), %xmm2
+# INTEL:      vcvtneeph2ps xmm2, xmmword ptr [eax]
+0xc4,0xe2,0x79,0xb0,0x10
+
+# ATT:        vcvtneeph2ps  -512(,%ebp,2), %xmm2
+# INTEL:      vcvtneeph2ps xmm2, xmmword ptr [2*ebp - 512]
+0xc4,0xe2,0x79,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        vcvtneeph2ps  2032(%ecx), %xmm2
+# INTEL:      vcvtneeph2ps xmm2, xmmword ptr [ecx + 2032]
+0xc4,0xe2,0x79,0xb0,0x91,0xf0,0x07,0x00,0x00
+
+# ATT:        vcvtneeph2ps  -2048(%edx), %xmm2
+# INTEL:      vcvtneeph2ps xmm2, xmmword ptr [edx - 2048]
+0xc4,0xe2,0x79,0xb0,0x92,0x00,0xf8,0xff,0xff
+
+# ATT:        vcvtneeph2ps  268435456(%esp,%esi,8), %ymm2
+# INTEL:      vcvtneeph2ps ymm2, ymmword ptr [esp + 8*esi + 268435456]
+0xc4,0xe2,0x7d,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vcvtneeph2ps  291(%edi,%eax,4), %ymm2
+# INTEL:      vcvtneeph2ps ymm2, ymmword ptr [edi + 4*eax + 291]
+0xc4,0xe2,0x7d,0xb0,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vcvtneeph2ps  (%eax), %ymm2
+# INTEL:      vcvtneeph2ps ymm2, ymmword ptr [eax]
+0xc4,0xe2,0x7d,0xb0,0x10
+
+# ATT:        vcvtneeph2ps  -1024(,%ebp,2), %ymm2
+# INTEL:      vcvtneeph2ps ymm2, ymmword ptr [2*ebp - 1024]
+0xc4,0xe2,0x7d,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        vcvtneeph2ps  4064(%ecx), %ymm2
+# INTEL:      vcvtneeph2ps ymm2, ymmword ptr [ecx + 4064]
+0xc4,0xe2,0x7d,0xb0,0x91,0xe0,0x0f,0x00,0x00
+
+# ATT:        vcvtneeph2ps  -4096(%edx), %ymm2
+# INTEL:      vcvtneeph2ps ymm2, ymmword ptr [edx - 4096]
+0xc4,0xe2,0x7d,0xb0,0x92,0x00,0xf0,0xff,0xff
+
+# ATT:        vcvtneobf162ps  268435456(%esp,%esi,8), %xmm2
+# INTEL:      vcvtneobf162ps xmm2, xmmword ptr [esp + 8*esi + 268435456]
+0xc4,0xe2,0x7b,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vcvtneobf162ps  291(%edi,%eax,4), %xmm2
+# INTEL:      vcvtneobf162ps xmm2, xmmword ptr [edi + 4*eax + 291]
+0xc4,0xe2,0x7b,0xb0,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vcvtneobf162ps  (%eax), %xmm2
+# INTEL:      vcvtneobf162ps xmm2, xmmword ptr [eax]
+0xc4,0xe2,0x7b,0xb0,0x10
+
+# ATT:        vcvtneobf162ps  -512(,%ebp,2), %xmm2
+# INTEL:      vcvtneobf162ps xmm2, xmmword ptr [2*ebp - 512]
+0xc4,0xe2,0x7b,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        vcvtneobf162ps  2032(%ecx), %xmm2
+# INTEL:      vcvtneobf162ps xmm2, xmmword ptr [ecx + 2032]
+0xc4,0xe2,0x7b,0xb0,0x91,0xf0,0x07,0x00,0x00
+
+# ATT:        vcvtneobf162ps  -2048(%edx), %xmm2
+# INTEL:      vcvtneobf162ps xmm2, xmmword ptr [edx - 2048]
+0xc4,0xe2,0x7b,0xb0,0x92,0x00,0xf8,0xff,0xff
+
+# ATT:        vcvtneobf162ps  268435456(%esp,%esi,8), %ymm2
+# INTEL:      vcvtneobf162ps ymm2, ymmword ptr [esp + 8*esi + 268435456]
+0xc4,0xe2,0x7f,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vcvtneobf162ps  291(%edi,%eax,4), %ymm2
+# INTEL:      vcvtneobf162ps ymm2, ymmword ptr [edi + 4*eax + 291]
+0xc4,0xe2,0x7f,0xb0,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vcvtneobf162ps  (%eax), %ymm2
+# INTEL:      vcvtneobf162ps ymm2, ymmword ptr [eax]
+0xc4,0xe2,0x7f,0xb0,0x10
+
+# ATT:        vcvtneobf162ps  -1024(,%ebp,2), %ymm2
+# INTEL:      vcvtneobf162ps ymm2, ymmword ptr [2*ebp - 1024]
+0xc4,0xe2,0x7f,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        vcvtneobf162ps  4064(%ecx), %ymm2
+# INTEL:      vcvtneobf162ps ymm2, ymmword ptr [ecx + 4064]
+0xc4,0xe2,0x7f,0xb0,0x91,0xe0,0x0f,0x00,0x00
+
+# ATT:        vcvtneobf162ps  -4096(%edx), %ymm2
+# INTEL:      vcvtneobf162ps ymm2, ymmword ptr [edx - 4096]
+0xc4,0xe2,0x7f,0xb0,0x92,0x00,0xf0,0xff,0xff
+
+# ATT:        vcvtneoph2ps  268435456(%esp,%esi,8), %xmm2
+# INTEL:      vcvtneoph2ps xmm2, xmmword ptr [esp + 8*esi + 268435456]
+0xc4,0xe2,0x78,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vcvtneoph2ps  291(%edi,%eax,4), %xmm2
+# INTEL:      vcvtneoph2ps xmm2, xmmword ptr [edi + 4*eax + 291]
+0xc4,0xe2,0x78,0xb0,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vcvtneoph2ps  (%eax), %xmm2
+# INTEL:      vcvtneoph2ps xmm2, xmmword ptr [eax]
+0xc4,0xe2,0x78,0xb0,0x10
+
+# ATT:        vcvtneoph2ps  -512(,%ebp,2), %xmm2
+# INTEL:      vcvtneoph2ps xmm2, xmmword ptr [2*ebp - 512]
+0xc4,0xe2,0x78,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        vcvtneoph2ps  2032(%ecx), %xmm2
+# INTEL:      vcvtneoph2ps xmm2, xmmword ptr [ecx + 2032]
+0xc4,0xe2,0x78,0xb0,0x91,0xf0,0x07,0x00,0x00
+
+# ATT:        vcvtneoph2ps  -2048(%edx), %xmm2
+# INTEL:      vcvtneoph2ps xmm2, xmmword ptr [edx - 2048]
+0xc4,0xe2,0x78,0xb0,0x92,0x00,0xf8,0xff,0xff
+
+# ATT:        vcvtneoph2ps  268435456(%esp,%esi,8), %ymm2
+# INTEL:      vcvtneoph2ps ymm2, ymmword ptr [esp + 8*esi + 268435456]
+0xc4,0xe2,0x7c,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vcvtneoph2ps  291(%edi,%eax,4), %ymm2
+# INTEL:      vcvtneoph2ps ymm2, ymmword ptr [edi + 4*eax + 291]
+0xc4,0xe2,0x7c,0xb0,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vcvtneoph2ps  (%eax), %ymm2
+# INTEL:      vcvtneoph2ps ymm2, ymmword ptr [eax]
+0xc4,0xe2,0x7c,0xb0,0x10
+
+# ATT:        vcvtneoph2ps  -1024(,%ebp,2), %ymm2
+# INTEL:      vcvtneoph2ps ymm2, ymmword ptr [2*ebp - 1024]
+0xc4,0xe2,0x7c,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        vcvtneoph2ps  4064(%ecx), %ymm2
+# INTEL:      vcvtneoph2ps ymm2, ymmword ptr [ecx + 4064]
+0xc4,0xe2,0x7c,0xb0,0x91,0xe0,0x0f,0x00,0x00
+
+# ATT:        vcvtneoph2ps  -4096(%edx), %ymm2
+# INTEL:      vcvtneoph2ps ymm2, ymmword ptr [edx - 4096]
+0xc4,0xe2,0x7c,0xb0,0x92,0x00,0xf0,0xff,0xff
+
+# ATT:        {vex} vcvtneps2bf16 %xmm3, %xmm2
+# INTEL:      {vex} vcvtneps2bf16 xmm2, xmm3
+0xc4,0xe2,0x7a,0x72,0xd3
+
+# ATT:        {vex} vcvtneps2bf16 %ymm3, %xmm2
+# INTEL:      {vex} vcvtneps2bf16 xmm2, ymm3
+0xc4,0xe2,0x7e,0x72,0xd3
+
+# ATT:        {vex} vcvtneps2bf16x  268435456(%esp,%esi,8), %xmm2
+# INTEL:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+0xc4,0xe2,0x7a,0x72,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        {vex} vcvtneps2bf16x  291(%edi,%eax,4), %xmm2
+# INTEL:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [edi + 4*eax + 291]
+0xc4,0xe2,0x7a,0x72,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        {vex} vcvtneps2bf16x  (%eax), %xmm2
+# INTEL:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [eax]
+0xc4,0xe2,0x7a,0x72,0x10
+
+# ATT:        {vex} vcvtneps2bf16x  -512(,%ebp,2), %xmm2
+# INTEL:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [2*ebp - 512]
+0xc4,0xe2,0x7a,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        {vex} vcvtneps2bf16x  2032(%ecx), %xmm2
+# INTEL:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [ecx + 2032]
+0xc4,0xe2,0x7a,0x72,0x91,0xf0,0x07,0x00,0x00
+
+# ATT:        {vex} vcvtneps2bf16x  -2048(%edx), %xmm2
+# INTEL:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [edx - 2048]
+0xc4,0xe2,0x7a,0x72,0x92,0x00,0xf8,0xff,0xff
+
+# ATT:        {vex} vcvtneps2bf16y  -1024(,%ebp,2), %xmm2
+# INTEL:      {vex} vcvtneps2bf16 xmm2, ymmword ptr [2*ebp - 1024]
+0xc4,0xe2,0x7e,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        {vex} vcvtneps2bf16y  4064(%ecx), %xmm2
+# INTEL:      {vex} vcvtneps2bf16 xmm2, ymmword ptr [ecx + 4064]
+0xc4,0xe2,0x7e,0x72,0x91,0xe0,0x0f,0x00,0x00
+
+# ATT:        {vex} vcvtneps2bf16y  -4096(%edx), %xmm2
+# INTEL:      {vex} vcvtneps2bf16 xmm2, ymmword ptr [edx - 4096]
+0xc4,0xe2,0x7e,0x72,0x92,0x00,0xf0,0xff,0xff
+

diff  --git a/llvm/test/MC/Disassembler/X86/avx_ne_convert-64.txt b/llvm/test/MC/Disassembler/X86/avx_ne_convert-64.txt
new file mode 100644
index 0000000000000..1eadb6a3454c4
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx_ne_convert-64.txt
@@ -0,0 +1,335 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:        vbcstnebf162ps  268435456(%rbp,%r14,8), %xmm2
+# INTEL:      vbcstnebf162ps xmm2, word ptr [rbp + 8*r14 + 268435456]
+0xc4,0xa2,0x7a,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vbcstnebf162ps  291(%r8,%rax,4), %xmm2
+# INTEL:      vbcstnebf162ps xmm2, word ptr [r8 + 4*rax + 291]
+0xc4,0xc2,0x7a,0xb1,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vbcstnebf162ps  (%rip), %xmm2
+# INTEL:      vbcstnebf162ps xmm2, word ptr [rip]
+0xc4,0xe2,0x7a,0xb1,0x15,0x00,0x00,0x00,0x00
+
+# ATT:        vbcstnebf162ps  -64(,%rbp,2), %xmm2
+# INTEL:      vbcstnebf162ps xmm2, word ptr [2*rbp - 64]
+0xc4,0xe2,0x7a,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff
+
+# ATT:        vbcstnebf162ps  254(%rcx), %xmm2
+# INTEL:      vbcstnebf162ps xmm2, word ptr [rcx + 254]
+0xc4,0xe2,0x7a,0xb1,0x91,0xfe,0x00,0x00,0x00
+
+# ATT:        vbcstnebf162ps  -256(%rdx), %xmm2
+# INTEL:      vbcstnebf162ps xmm2, word ptr [rdx - 256]
+0xc4,0xe2,0x7a,0xb1,0x92,0x00,0xff,0xff,0xff
+
+# ATT:        vbcstnebf162ps  268435456(%rbp,%r14,8), %ymm2
+# INTEL:      vbcstnebf162ps ymm2, word ptr [rbp + 8*r14 + 268435456]
+0xc4,0xa2,0x7e,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vbcstnebf162ps  291(%r8,%rax,4), %ymm2
+# INTEL:      vbcstnebf162ps ymm2, word ptr [r8 + 4*rax + 291]
+0xc4,0xc2,0x7e,0xb1,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vbcstnebf162ps  (%rip), %ymm2
+# INTEL:      vbcstnebf162ps ymm2, word ptr [rip]
+0xc4,0xe2,0x7e,0xb1,0x15,0x00,0x00,0x00,0x00
+
+# ATT:        vbcstnebf162ps  -64(,%rbp,2), %ymm2
+# INTEL:      vbcstnebf162ps ymm2, word ptr [2*rbp - 64]
+0xc4,0xe2,0x7e,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff
+
+# ATT:        vbcstnebf162ps  254(%rcx), %ymm2
+# INTEL:      vbcstnebf162ps ymm2, word ptr [rcx + 254]
+0xc4,0xe2,0x7e,0xb1,0x91,0xfe,0x00,0x00,0x00
+
+# ATT:        vbcstnebf162ps  -256(%rdx), %ymm2
+# INTEL:      vbcstnebf162ps ymm2, word ptr [rdx - 256]
+0xc4,0xe2,0x7e,0xb1,0x92,0x00,0xff,0xff,0xff
+
+# ATT:        vbcstnesh2ps  268435456(%rbp,%r14,8), %xmm2
+# INTEL:      vbcstnesh2ps xmm2, word ptr [rbp + 8*r14 + 268435456]
+0xc4,0xa2,0x79,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vbcstnesh2ps  291(%r8,%rax,4), %xmm2
+# INTEL:      vbcstnesh2ps xmm2, word ptr [r8 + 4*rax + 291]
+0xc4,0xc2,0x79,0xb1,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vbcstnesh2ps  (%rip), %xmm2
+# INTEL:      vbcstnesh2ps xmm2, word ptr [rip]
+0xc4,0xe2,0x79,0xb1,0x15,0x00,0x00,0x00,0x00
+
+# ATT:        vbcstnesh2ps  -64(,%rbp,2), %xmm2
+# INTEL:      vbcstnesh2ps xmm2, word ptr [2*rbp - 64]
+0xc4,0xe2,0x79,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff
+
+# ATT:        vbcstnesh2ps  254(%rcx), %xmm2
+# INTEL:      vbcstnesh2ps xmm2, word ptr [rcx + 254]
+0xc4,0xe2,0x79,0xb1,0x91,0xfe,0x00,0x00,0x00
+
+# ATT:        vbcstnesh2ps  -256(%rdx), %xmm2
+# INTEL:      vbcstnesh2ps xmm2, word ptr [rdx - 256]
+0xc4,0xe2,0x79,0xb1,0x92,0x00,0xff,0xff,0xff
+
+# ATT:        vbcstnesh2ps  268435456(%rbp,%r14,8), %ymm2
+# INTEL:      vbcstnesh2ps ymm2, word ptr [rbp + 8*r14 + 268435456]
+0xc4,0xa2,0x7d,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vbcstnesh2ps  291(%r8,%rax,4), %ymm2
+# INTEL:      vbcstnesh2ps ymm2, word ptr [r8 + 4*rax + 291]
+0xc4,0xc2,0x7d,0xb1,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vbcstnesh2ps  (%rip), %ymm2
+# INTEL:      vbcstnesh2ps ymm2, word ptr [rip]
+0xc4,0xe2,0x7d,0xb1,0x15,0x00,0x00,0x00,0x00
+
+# ATT:        vbcstnesh2ps  -64(,%rbp,2), %ymm2
+# INTEL:      vbcstnesh2ps ymm2, word ptr [2*rbp - 64]
+0xc4,0xe2,0x7d,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff
+
+# ATT:        vbcstnesh2ps  254(%rcx), %ymm2
+# INTEL:      vbcstnesh2ps ymm2, word ptr [rcx + 254]
+0xc4,0xe2,0x7d,0xb1,0x91,0xfe,0x00,0x00,0x00
+
+# ATT:        vbcstnesh2ps  -256(%rdx), %ymm2
+# INTEL:      vbcstnesh2ps ymm2, word ptr [rdx - 256]
+0xc4,0xe2,0x7d,0xb1,0x92,0x00,0xff,0xff,0xff
+
+# ATT:        vcvtneebf162ps  268435456(%rbp,%r14,8), %xmm2
+# INTEL:      vcvtneebf162ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456]
+0xc4,0xa2,0x7a,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vcvtneebf162ps  291(%r8,%rax,4), %xmm2
+# INTEL:      vcvtneebf162ps xmm2, xmmword ptr [r8 + 4*rax + 291]
+0xc4,0xc2,0x7a,0xb0,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vcvtneebf162ps  (%rip), %xmm2
+# INTEL:      vcvtneebf162ps xmm2, xmmword ptr [rip]
+0xc4,0xe2,0x7a,0xb0,0x15,0x00,0x00,0x00,0x00
+
+# ATT:        vcvtneebf162ps  -512(,%rbp,2), %xmm2
+# INTEL:      vcvtneebf162ps xmm2, xmmword ptr [2*rbp - 512]
+0xc4,0xe2,0x7a,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        vcvtneebf162ps  2032(%rcx), %xmm2
+# INTEL:      vcvtneebf162ps xmm2, xmmword ptr [rcx + 2032]
+0xc4,0xe2,0x7a,0xb0,0x91,0xf0,0x07,0x00,0x00
+
+# ATT:        vcvtneebf162ps  -2048(%rdx), %xmm2
+# INTEL:      vcvtneebf162ps xmm2, xmmword ptr [rdx - 2048]
+0xc4,0xe2,0x7a,0xb0,0x92,0x00,0xf8,0xff,0xff
+
+# ATT:        vcvtneebf162ps  268435456(%rbp,%r14,8), %ymm2
+# INTEL:      vcvtneebf162ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456]
+0xc4,0xa2,0x7e,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vcvtneebf162ps  291(%r8,%rax,4), %ymm2
+# INTEL:      vcvtneebf162ps ymm2, ymmword ptr [r8 + 4*rax + 291]
+0xc4,0xc2,0x7e,0xb0,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vcvtneebf162ps  (%rip), %ymm2
+# INTEL:      vcvtneebf162ps ymm2, ymmword ptr [rip]
+0xc4,0xe2,0x7e,0xb0,0x15,0x00,0x00,0x00,0x00
+
+# ATT:        vcvtneebf162ps  -1024(,%rbp,2), %ymm2
+# INTEL:      vcvtneebf162ps ymm2, ymmword ptr [2*rbp - 1024]
+0xc4,0xe2,0x7e,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        vcvtneebf162ps  4064(%rcx), %ymm2
+# INTEL:      vcvtneebf162ps ymm2, ymmword ptr [rcx + 4064]
+0xc4,0xe2,0x7e,0xb0,0x91,0xe0,0x0f,0x00,0x00
+
+# ATT:        vcvtneebf162ps  -4096(%rdx), %ymm2
+# INTEL:      vcvtneebf162ps ymm2, ymmword ptr [rdx - 4096]
+0xc4,0xe2,0x7e,0xb0,0x92,0x00,0xf0,0xff,0xff
+
+# ATT:        vcvtneeph2ps  268435456(%rbp,%r14,8), %xmm2
+# INTEL:      vcvtneeph2ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456]
+0xc4,0xa2,0x79,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vcvtneeph2ps  291(%r8,%rax,4), %xmm2
+# INTEL:      vcvtneeph2ps xmm2, xmmword ptr [r8 + 4*rax + 291]
+0xc4,0xc2,0x79,0xb0,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vcvtneeph2ps  (%rip), %xmm2
+# INTEL:      vcvtneeph2ps xmm2, xmmword ptr [rip]
+0xc4,0xe2,0x79,0xb0,0x15,0x00,0x00,0x00,0x00
+
+# ATT:        vcvtneeph2ps  -512(,%rbp,2), %xmm2
+# INTEL:      vcvtneeph2ps xmm2, xmmword ptr [2*rbp - 512]
+0xc4,0xe2,0x79,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        vcvtneeph2ps  2032(%rcx), %xmm2
+# INTEL:      vcvtneeph2ps xmm2, xmmword ptr [rcx + 2032]
+0xc4,0xe2,0x79,0xb0,0x91,0xf0,0x07,0x00,0x00
+
+# ATT:        vcvtneeph2ps  -2048(%rdx), %xmm2
+# INTEL:      vcvtneeph2ps xmm2, xmmword ptr [rdx - 2048]
+0xc4,0xe2,0x79,0xb0,0x92,0x00,0xf8,0xff,0xff
+
+# ATT:        vcvtneeph2ps  268435456(%rbp,%r14,8), %ymm2
+# INTEL:      vcvtneeph2ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456]
+0xc4,0xa2,0x7d,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vcvtneeph2ps  291(%r8,%rax,4), %ymm2
+# INTEL:      vcvtneeph2ps ymm2, ymmword ptr [r8 + 4*rax + 291]
+0xc4,0xc2,0x7d,0xb0,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vcvtneeph2ps  (%rip), %ymm2
+# INTEL:      vcvtneeph2ps ymm2, ymmword ptr [rip]
+0xc4,0xe2,0x7d,0xb0,0x15,0x00,0x00,0x00,0x00
+
+# ATT:        vcvtneeph2ps  -1024(,%rbp,2), %ymm2
+# INTEL:      vcvtneeph2ps ymm2, ymmword ptr [2*rbp - 1024]
+0xc4,0xe2,0x7d,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        vcvtneeph2ps  4064(%rcx), %ymm2
+# INTEL:      vcvtneeph2ps ymm2, ymmword ptr [rcx + 4064]
+0xc4,0xe2,0x7d,0xb0,0x91,0xe0,0x0f,0x00,0x00
+
+# ATT:        vcvtneeph2ps  -4096(%rdx), %ymm2
+# INTEL:      vcvtneeph2ps ymm2, ymmword ptr [rdx - 4096]
+0xc4,0xe2,0x7d,0xb0,0x92,0x00,0xf0,0xff,0xff
+
+# ATT:        vcvtneobf162ps  268435456(%rbp,%r14,8), %xmm2
+# INTEL:      vcvtneobf162ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456]
+0xc4,0xa2,0x7b,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vcvtneobf162ps  291(%r8,%rax,4), %xmm2
+# INTEL:      vcvtneobf162ps xmm2, xmmword ptr [r8 + 4*rax + 291]
+0xc4,0xc2,0x7b,0xb0,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vcvtneobf162ps  (%rip), %xmm2
+# INTEL:      vcvtneobf162ps xmm2, xmmword ptr [rip]
+0xc4,0xe2,0x7b,0xb0,0x15,0x00,0x00,0x00,0x00
+
+# ATT:        vcvtneobf162ps  -512(,%rbp,2), %xmm2
+# INTEL:      vcvtneobf162ps xmm2, xmmword ptr [2*rbp - 512]
+0xc4,0xe2,0x7b,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        vcvtneobf162ps  2032(%rcx), %xmm2
+# INTEL:      vcvtneobf162ps xmm2, xmmword ptr [rcx + 2032]
+0xc4,0xe2,0x7b,0xb0,0x91,0xf0,0x07,0x00,0x00
+
+# ATT:        vcvtneobf162ps  -2048(%rdx), %xmm2
+# INTEL:      vcvtneobf162ps xmm2, xmmword ptr [rdx - 2048]
+0xc4,0xe2,0x7b,0xb0,0x92,0x00,0xf8,0xff,0xff
+
+# ATT:        vcvtneobf162ps  268435456(%rbp,%r14,8), %ymm2
+# INTEL:      vcvtneobf162ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456]
+0xc4,0xa2,0x7f,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vcvtneobf162ps  291(%r8,%rax,4), %ymm2
+# INTEL:      vcvtneobf162ps ymm2, ymmword ptr [r8 + 4*rax + 291]
+0xc4,0xc2,0x7f,0xb0,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vcvtneobf162ps  (%rip), %ymm2
+# INTEL:      vcvtneobf162ps ymm2, ymmword ptr [rip]
+0xc4,0xe2,0x7f,0xb0,0x15,0x00,0x00,0x00,0x00
+
+# ATT:        vcvtneobf162ps  -1024(,%rbp,2), %ymm2
+# INTEL:      vcvtneobf162ps ymm2, ymmword ptr [2*rbp - 1024]
+0xc4,0xe2,0x7f,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        vcvtneobf162ps  4064(%rcx), %ymm2
+# INTEL:      vcvtneobf162ps ymm2, ymmword ptr [rcx + 4064]
+0xc4,0xe2,0x7f,0xb0,0x91,0xe0,0x0f,0x00,0x00
+
+# ATT:        vcvtneobf162ps  -4096(%rdx), %ymm2
+# INTEL:      vcvtneobf162ps ymm2, ymmword ptr [rdx - 4096]
+0xc4,0xe2,0x7f,0xb0,0x92,0x00,0xf0,0xff,0xff
+
+# ATT:        vcvtneoph2ps  268435456(%rbp,%r14,8), %xmm2
+# INTEL:      vcvtneoph2ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456]
+0xc4,0xa2,0x78,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vcvtneoph2ps  291(%r8,%rax,4), %xmm2
+# INTEL:      vcvtneoph2ps xmm2, xmmword ptr [r8 + 4*rax + 291]
+0xc4,0xc2,0x78,0xb0,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vcvtneoph2ps  (%rip), %xmm2
+# INTEL:      vcvtneoph2ps xmm2, xmmword ptr [rip]
+0xc4,0xe2,0x78,0xb0,0x15,0x00,0x00,0x00,0x00
+
+# ATT:        vcvtneoph2ps  -512(,%rbp,2), %xmm2
+# INTEL:      vcvtneoph2ps xmm2, xmmword ptr [2*rbp - 512]
+0xc4,0xe2,0x78,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        vcvtneoph2ps  2032(%rcx), %xmm2
+# INTEL:      vcvtneoph2ps xmm2, xmmword ptr [rcx + 2032]
+0xc4,0xe2,0x78,0xb0,0x91,0xf0,0x07,0x00,0x00
+
+# ATT:        vcvtneoph2ps  -2048(%rdx), %xmm2
+# INTEL:      vcvtneoph2ps xmm2, xmmword ptr [rdx - 2048]
+0xc4,0xe2,0x78,0xb0,0x92,0x00,0xf8,0xff,0xff
+
+# ATT:        vcvtneoph2ps  268435456(%rbp,%r14,8), %ymm2
+# INTEL:      vcvtneoph2ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456]
+0xc4,0xa2,0x7c,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vcvtneoph2ps  291(%r8,%rax,4), %ymm2
+# INTEL:      vcvtneoph2ps ymm2, ymmword ptr [r8 + 4*rax + 291]
+0xc4,0xc2,0x7c,0xb0,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vcvtneoph2ps  (%rip), %ymm2
+# INTEL:      vcvtneoph2ps ymm2, ymmword ptr [rip]
+0xc4,0xe2,0x7c,0xb0,0x15,0x00,0x00,0x00,0x00
+
+# ATT:        vcvtneoph2ps  -1024(,%rbp,2), %ymm2
+# INTEL:      vcvtneoph2ps ymm2, ymmword ptr [2*rbp - 1024]
+0xc4,0xe2,0x7c,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        vcvtneoph2ps  4064(%rcx), %ymm2
+# INTEL:      vcvtneoph2ps ymm2, ymmword ptr [rcx + 4064]
+0xc4,0xe2,0x7c,0xb0,0x91,0xe0,0x0f,0x00,0x00
+
+# ATT:        vcvtneoph2ps  -4096(%rdx), %ymm2
+# INTEL:      vcvtneoph2ps ymm2, ymmword ptr [rdx - 4096]
+0xc4,0xe2,0x7c,0xb0,0x92,0x00,0xf0,0xff,0xff
+
+# ATT:        {vex} vcvtneps2bf16 %xmm3, %xmm2
+# INTEL:      {vex} vcvtneps2bf16 xmm2, xmm3
+0xc4,0xe2,0x7a,0x72,0xd3
+
+# ATT:        {vex} vcvtneps2bf16 %ymm3, %xmm2
+# INTEL:      {vex} vcvtneps2bf16 xmm2, ymm3
+0xc4,0xe2,0x7e,0x72,0xd3
+
+# ATT:        {vex} vcvtneps2bf16x  268435456(%rbp,%r14,8), %xmm2
+# INTEL:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [rbp + 8*r14 + 268435456]
+0xc4,0xa2,0x7a,0x72,0x94,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        {vex} vcvtneps2bf16x  291(%r8,%rax,4), %xmm2
+# INTEL:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [r8 + 4*rax + 291]
+0xc4,0xc2,0x7a,0x72,0x94,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        {vex} vcvtneps2bf16x  (%rip), %xmm2
+# INTEL:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [rip]
+0xc4,0xe2,0x7a,0x72,0x15,0x00,0x00,0x00,0x00
+
+# ATT:        {vex} vcvtneps2bf16x  -512(,%rbp,2), %xmm2
+# INTEL:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [2*rbp - 512]
+0xc4,0xe2,0x7a,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        {vex} vcvtneps2bf16x  2032(%rcx), %xmm2
+# INTEL:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [rcx + 2032]
+0xc4,0xe2,0x7a,0x72,0x91,0xf0,0x07,0x00,0x00
+
+# ATT:        {vex} vcvtneps2bf16x  -2048(%rdx), %xmm2
+# INTEL:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [rdx - 2048]
+0xc4,0xe2,0x7a,0x72,0x92,0x00,0xf8,0xff,0xff
+
+# ATT:        {vex} vcvtneps2bf16y  -1024(,%rbp,2), %xmm2
+# INTEL:      {vex} vcvtneps2bf16 xmm2, ymmword ptr [2*rbp - 1024]
+0xc4,0xe2,0x7e,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        {vex} vcvtneps2bf16y  4064(%rcx), %xmm2
+# INTEL:      {vex} vcvtneps2bf16 xmm2, ymmword ptr [rcx + 4064]
+0xc4,0xe2,0x7e,0x72,0x91,0xe0,0x0f,0x00,0x00
+
+# ATT:        {vex} vcvtneps2bf16y  -4096(%rdx), %xmm2
+# INTEL:      {vex} vcvtneps2bf16 xmm2, ymmword ptr [rdx - 4096]
+0xc4,0xe2,0x7e,0x72,0x92,0x00,0xf0,0xff,0xff
+

diff  --git a/llvm/test/MC/X86/avx_ne_convert-32-att.s b/llvm/test/MC/X86/avx_ne_convert-32-att.s
new file mode 100644
index 0000000000000..023d9c0e6a1c4
--- /dev/null
+++ b/llvm/test/MC/X86/avx_ne_convert-32-att.s
@@ -0,0 +1,334 @@
+// RUN: llvm-mc -triple i686-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK:      vbcstnebf162ps  268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vbcstnebf162ps  268435456(%esp,%esi,8), %xmm2
+
+// CHECK:      vbcstnebf162ps  291(%edi,%eax,4), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x94,0x87,0x23,0x01,0x00,0x00]
+               vbcstnebf162ps  291(%edi,%eax,4), %xmm2
+
+// CHECK:      vbcstnebf162ps  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x10]
+               vbcstnebf162ps  (%eax), %xmm2
+
+// CHECK:      vbcstnebf162ps  -64(,%ebp,2), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff]
+               vbcstnebf162ps  -64(,%ebp,2), %xmm2
+
+// CHECK:      vbcstnebf162ps  254(%ecx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x91,0xfe,0x00,0x00,0x00]
+               vbcstnebf162ps  254(%ecx), %xmm2
+
+// CHECK:      vbcstnebf162ps  -256(%edx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x92,0x00,0xff,0xff,0xff]
+               vbcstnebf162ps  -256(%edx), %xmm2
+
+// CHECK:      vbcstnebf162ps  268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vbcstnebf162ps  268435456(%esp,%esi,8), %ymm2
+
+// CHECK:      vbcstnebf162ps  291(%edi,%eax,4), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x94,0x87,0x23,0x01,0x00,0x00]
+               vbcstnebf162ps  291(%edi,%eax,4), %ymm2
+
+// CHECK:      vbcstnebf162ps  (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x10]
+               vbcstnebf162ps  (%eax), %ymm2
+
+// CHECK:      vbcstnebf162ps  -64(,%ebp,2), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff]
+               vbcstnebf162ps  -64(,%ebp,2), %ymm2
+
+// CHECK:      vbcstnebf162ps  254(%ecx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x91,0xfe,0x00,0x00,0x00]
+               vbcstnebf162ps  254(%ecx), %ymm2
+
+// CHECK:      vbcstnebf162ps  -256(%edx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x92,0x00,0xff,0xff,0xff]
+               vbcstnebf162ps  -256(%edx), %ymm2
+
+// CHECK:      vbcstnesh2ps  268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vbcstnesh2ps  268435456(%esp,%esi,8), %xmm2
+
+// CHECK:      vbcstnesh2ps  291(%edi,%eax,4), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x94,0x87,0x23,0x01,0x00,0x00]
+               vbcstnesh2ps  291(%edi,%eax,4), %xmm2
+
+// CHECK:      vbcstnesh2ps  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x10]
+               vbcstnesh2ps  (%eax), %xmm2
+
+// CHECK:      vbcstnesh2ps  -64(,%ebp,2), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff]
+               vbcstnesh2ps  -64(,%ebp,2), %xmm2
+
+// CHECK:      vbcstnesh2ps  254(%ecx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x91,0xfe,0x00,0x00,0x00]
+               vbcstnesh2ps  254(%ecx), %xmm2
+
+// CHECK:      vbcstnesh2ps  -256(%edx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x92,0x00,0xff,0xff,0xff]
+               vbcstnesh2ps  -256(%edx), %xmm2
+
+// CHECK:      vbcstnesh2ps  268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vbcstnesh2ps  268435456(%esp,%esi,8), %ymm2
+
+// CHECK:      vbcstnesh2ps  291(%edi,%eax,4), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x94,0x87,0x23,0x01,0x00,0x00]
+               vbcstnesh2ps  291(%edi,%eax,4), %ymm2
+
+// CHECK:      vbcstnesh2ps  (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x10]
+               vbcstnesh2ps  (%eax), %ymm2
+
+// CHECK:      vbcstnesh2ps  -64(,%ebp,2), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff]
+               vbcstnesh2ps  -64(,%ebp,2), %ymm2
+
+// CHECK:      vbcstnesh2ps  254(%ecx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x91,0xfe,0x00,0x00,0x00]
+               vbcstnesh2ps  254(%ecx), %ymm2
+
+// CHECK:      vbcstnesh2ps  -256(%edx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x92,0x00,0xff,0xff,0xff]
+               vbcstnesh2ps  -256(%edx), %ymm2
+
+// CHECK:      vcvtneebf162ps  268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vcvtneebf162ps  268435456(%esp,%esi,8), %xmm2
+
+// CHECK:      vcvtneebf162ps  291(%edi,%eax,4), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x94,0x87,0x23,0x01,0x00,0x00]
+               vcvtneebf162ps  291(%edi,%eax,4), %xmm2
+
+// CHECK:      vcvtneebf162ps  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x10]
+               vcvtneebf162ps  (%eax), %xmm2
+
+// CHECK:      vcvtneebf162ps  -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               vcvtneebf162ps  -512(,%ebp,2), %xmm2
+
+// CHECK:      vcvtneebf162ps  2032(%ecx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x91,0xf0,0x07,0x00,0x00]
+               vcvtneebf162ps  2032(%ecx), %xmm2
+
+// CHECK:      vcvtneebf162ps  -2048(%edx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x92,0x00,0xf8,0xff,0xff]
+               vcvtneebf162ps  -2048(%edx), %xmm2
+
+// CHECK:      vcvtneebf162ps  268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vcvtneebf162ps  268435456(%esp,%esi,8), %ymm2
+
+// CHECK:      vcvtneebf162ps  291(%edi,%eax,4), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x94,0x87,0x23,0x01,0x00,0x00]
+               vcvtneebf162ps  291(%edi,%eax,4), %ymm2
+
+// CHECK:      vcvtneebf162ps  (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x10]
+               vcvtneebf162ps  (%eax), %ymm2
+
+// CHECK:      vcvtneebf162ps  -1024(,%ebp,2), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               vcvtneebf162ps  -1024(,%ebp,2), %ymm2
+
+// CHECK:      vcvtneebf162ps  4064(%ecx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x91,0xe0,0x0f,0x00,0x00]
+               vcvtneebf162ps  4064(%ecx), %ymm2
+
+// CHECK:      vcvtneebf162ps  -4096(%edx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x92,0x00,0xf0,0xff,0xff]
+               vcvtneebf162ps  -4096(%edx), %ymm2
+
+// CHECK:      vcvtneeph2ps  268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vcvtneeph2ps  268435456(%esp,%esi,8), %xmm2
+
+// CHECK:      vcvtneeph2ps  291(%edi,%eax,4), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x94,0x87,0x23,0x01,0x00,0x00]
+               vcvtneeph2ps  291(%edi,%eax,4), %xmm2
+
+// CHECK:      vcvtneeph2ps  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x10]
+               vcvtneeph2ps  (%eax), %xmm2
+
+// CHECK:      vcvtneeph2ps  -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               vcvtneeph2ps  -512(,%ebp,2), %xmm2
+
+// CHECK:      vcvtneeph2ps  2032(%ecx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x91,0xf0,0x07,0x00,0x00]
+               vcvtneeph2ps  2032(%ecx), %xmm2
+
+// CHECK:      vcvtneeph2ps  -2048(%edx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x92,0x00,0xf8,0xff,0xff]
+               vcvtneeph2ps  -2048(%edx), %xmm2
+
+// CHECK:      vcvtneeph2ps  268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vcvtneeph2ps  268435456(%esp,%esi,8), %ymm2
+
+// CHECK:      vcvtneeph2ps  291(%edi,%eax,4), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x94,0x87,0x23,0x01,0x00,0x00]
+               vcvtneeph2ps  291(%edi,%eax,4), %ymm2
+
+// CHECK:      vcvtneeph2ps  (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x10]
+               vcvtneeph2ps  (%eax), %ymm2
+
+// CHECK:      vcvtneeph2ps  -1024(,%ebp,2), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               vcvtneeph2ps  -1024(,%ebp,2), %ymm2
+
+// CHECK:      vcvtneeph2ps  4064(%ecx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x91,0xe0,0x0f,0x00,0x00]
+               vcvtneeph2ps  4064(%ecx), %ymm2
+
+// CHECK:      vcvtneeph2ps  -4096(%edx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x92,0x00,0xf0,0xff,0xff]
+               vcvtneeph2ps  -4096(%edx), %ymm2
+
+// CHECK:      vcvtneobf162ps  268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vcvtneobf162ps  268435456(%esp,%esi,8), %xmm2
+
+// CHECK:      vcvtneobf162ps  291(%edi,%eax,4), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x94,0x87,0x23,0x01,0x00,0x00]
+               vcvtneobf162ps  291(%edi,%eax,4), %xmm2
+
+// CHECK:      vcvtneobf162ps  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x10]
+               vcvtneobf162ps  (%eax), %xmm2
+
+// CHECK:      vcvtneobf162ps  -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               vcvtneobf162ps  -512(,%ebp,2), %xmm2
+
+// CHECK:      vcvtneobf162ps  2032(%ecx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x91,0xf0,0x07,0x00,0x00]
+               vcvtneobf162ps  2032(%ecx), %xmm2
+
+// CHECK:      vcvtneobf162ps  -2048(%edx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x92,0x00,0xf8,0xff,0xff]
+               vcvtneobf162ps  -2048(%edx), %xmm2
+
+// CHECK:      vcvtneobf162ps  268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vcvtneobf162ps  268435456(%esp,%esi,8), %ymm2
+
+// CHECK:      vcvtneobf162ps  291(%edi,%eax,4), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x94,0x87,0x23,0x01,0x00,0x00]
+               vcvtneobf162ps  291(%edi,%eax,4), %ymm2
+
+// CHECK:      vcvtneobf162ps  (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x10]
+               vcvtneobf162ps  (%eax), %ymm2
+
+// CHECK:      vcvtneobf162ps  -1024(,%ebp,2), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               vcvtneobf162ps  -1024(,%ebp,2), %ymm2
+
+// CHECK:      vcvtneobf162ps  4064(%ecx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x91,0xe0,0x0f,0x00,0x00]
+               vcvtneobf162ps  4064(%ecx), %ymm2
+
+// CHECK:      vcvtneobf162ps  -4096(%edx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x92,0x00,0xf0,0xff,0xff]
+               vcvtneobf162ps  -4096(%edx), %ymm2
+
+// CHECK:      vcvtneoph2ps  268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vcvtneoph2ps  268435456(%esp,%esi,8), %xmm2
+
+// CHECK:      vcvtneoph2ps  291(%edi,%eax,4), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x94,0x87,0x23,0x01,0x00,0x00]
+               vcvtneoph2ps  291(%edi,%eax,4), %xmm2
+
+// CHECK:      vcvtneoph2ps  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x10]
+               vcvtneoph2ps  (%eax), %xmm2
+
+// CHECK:      vcvtneoph2ps  -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               vcvtneoph2ps  -512(,%ebp,2), %xmm2
+
+// CHECK:      vcvtneoph2ps  2032(%ecx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x91,0xf0,0x07,0x00,0x00]
+               vcvtneoph2ps  2032(%ecx), %xmm2
+
+// CHECK:      vcvtneoph2ps  -2048(%edx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x92,0x00,0xf8,0xff,0xff]
+               vcvtneoph2ps  -2048(%edx), %xmm2
+
+// CHECK:      vcvtneoph2ps  268435456(%esp,%esi,8), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vcvtneoph2ps  268435456(%esp,%esi,8), %ymm2
+
+// CHECK:      vcvtneoph2ps  291(%edi,%eax,4), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x94,0x87,0x23,0x01,0x00,0x00]
+               vcvtneoph2ps  291(%edi,%eax,4), %ymm2
+
+// CHECK:      vcvtneoph2ps  (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x10]
+               vcvtneoph2ps  (%eax), %ymm2
+
+// CHECK:      vcvtneoph2ps  -1024(,%ebp,2), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               vcvtneoph2ps  -1024(,%ebp,2), %ymm2
+
+// CHECK:      vcvtneoph2ps  4064(%ecx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x91,0xe0,0x0f,0x00,0x00]
+               vcvtneoph2ps  4064(%ecx), %ymm2
+
+// CHECK:      vcvtneoph2ps  -4096(%edx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x92,0x00,0xf0,0xff,0xff]
+               vcvtneoph2ps  -4096(%edx), %ymm2
+
+// CHECK:      {vex} vcvtneps2bf16 %xmm3, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0xd3]
+               {vex} vcvtneps2bf16 %xmm3, %xmm2
+
+// CHECK:      {vex} vcvtneps2bf16 %ymm3, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0xd3]
+               {vex} vcvtneps2bf16 %ymm3, %xmm2
+
+// CHECK:      {vex} vcvtneps2bf16x  268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {vex} vcvtneps2bf16x  268435456(%esp,%esi,8), %xmm2
+
+// CHECK:      {vex} vcvtneps2bf16x  291(%edi,%eax,4), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x94,0x87,0x23,0x01,0x00,0x00]
+               {vex} vcvtneps2bf16x  291(%edi,%eax,4), %xmm2
+
+// CHECK:      {vex} vcvtneps2bf16x  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x10]
+               {vex} vcvtneps2bf16x  (%eax), %xmm2
+
+// CHECK:      {vex} vcvtneps2bf16x  -512(,%ebp,2), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               {vex} vcvtneps2bf16x  -512(,%ebp,2), %xmm2
+
+// CHECK:      {vex} vcvtneps2bf16x  2032(%ecx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x91,0xf0,0x07,0x00,0x00]
+               {vex} vcvtneps2bf16x  2032(%ecx), %xmm2
+
+// CHECK:      {vex} vcvtneps2bf16x  -2048(%edx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x92,0x00,0xf8,0xff,0xff]
+               {vex} vcvtneps2bf16x  -2048(%edx), %xmm2
+
+// CHECK:      {vex} vcvtneps2bf16y  -1024(,%ebp,2), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               {vex} vcvtneps2bf16y  -1024(,%ebp,2), %xmm2
+
+// CHECK:      {vex} vcvtneps2bf16y  4064(%ecx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x91,0xe0,0x0f,0x00,0x00]
+               {vex} vcvtneps2bf16y  4064(%ecx), %xmm2
+
+// CHECK:      {vex} vcvtneps2bf16y  -4096(%edx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x92,0x00,0xf0,0xff,0xff]
+               {vex} vcvtneps2bf16y  -4096(%edx), %xmm2
+

diff  --git a/llvm/test/MC/X86/avx_ne_convert-32-intel.s b/llvm/test/MC/X86/avx_ne_convert-32-intel.s
new file mode 100644
index 0000000000000..f92ce59750234
--- /dev/null
+++ b/llvm/test/MC/X86/avx_ne_convert-32-intel.s
@@ -0,0 +1,334 @@
+// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK:      vbcstnebf162ps xmm2, word ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vbcstnebf162ps xmm2, word ptr [esp + 8*esi + 268435456]
+
+// CHECK:      vbcstnebf162ps xmm2, word ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x94,0x87,0x23,0x01,0x00,0x00]
+               vbcstnebf162ps xmm2, word ptr [edi + 4*eax + 291]
+
+// CHECK:      vbcstnebf162ps xmm2, word ptr [eax]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x10]
+               vbcstnebf162ps xmm2, word ptr [eax]
+
+// CHECK:      vbcstnebf162ps xmm2, word ptr [2*ebp - 64]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff]
+               vbcstnebf162ps xmm2, word ptr [2*ebp - 64]
+
+// CHECK:      vbcstnebf162ps xmm2, word ptr [ecx + 254]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x91,0xfe,0x00,0x00,0x00]
+               vbcstnebf162ps xmm2, word ptr [ecx + 254]
+
+// CHECK:      vbcstnebf162ps xmm2, word ptr [edx - 256]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x92,0x00,0xff,0xff,0xff]
+               vbcstnebf162ps xmm2, word ptr [edx - 256]
+
+// CHECK:      vbcstnebf162ps ymm2, word ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vbcstnebf162ps ymm2, word ptr [esp + 8*esi + 268435456]
+
+// CHECK:      vbcstnebf162ps ymm2, word ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x94,0x87,0x23,0x01,0x00,0x00]
+               vbcstnebf162ps ymm2, word ptr [edi + 4*eax + 291]
+
+// CHECK:      vbcstnebf162ps ymm2, word ptr [eax]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x10]
+               vbcstnebf162ps ymm2, word ptr [eax]
+
+// CHECK:      vbcstnebf162ps ymm2, word ptr [2*ebp - 64]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff]
+               vbcstnebf162ps ymm2, word ptr [2*ebp - 64]
+
+// CHECK:      vbcstnebf162ps ymm2, word ptr [ecx + 254]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x91,0xfe,0x00,0x00,0x00]
+               vbcstnebf162ps ymm2, word ptr [ecx + 254]
+
+// CHECK:      vbcstnebf162ps ymm2, word ptr [edx - 256]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x92,0x00,0xff,0xff,0xff]
+               vbcstnebf162ps ymm2, word ptr [edx - 256]
+
+// CHECK:      vbcstnesh2ps xmm2, word ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vbcstnesh2ps xmm2, word ptr [esp + 8*esi + 268435456]
+
+// CHECK:      vbcstnesh2ps xmm2, word ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x94,0x87,0x23,0x01,0x00,0x00]
+               vbcstnesh2ps xmm2, word ptr [edi + 4*eax + 291]
+
+// CHECK:      vbcstnesh2ps xmm2, word ptr [eax]
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x10]
+               vbcstnesh2ps xmm2, word ptr [eax]
+
+// CHECK:      vbcstnesh2ps xmm2, word ptr [2*ebp - 64]
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff]
+               vbcstnesh2ps xmm2, word ptr [2*ebp - 64]
+
+// CHECK:      vbcstnesh2ps xmm2, word ptr [ecx + 254]
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x91,0xfe,0x00,0x00,0x00]
+               vbcstnesh2ps xmm2, word ptr [ecx + 254]
+
+// CHECK:      vbcstnesh2ps xmm2, word ptr [edx - 256]
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x92,0x00,0xff,0xff,0xff]
+               vbcstnesh2ps xmm2, word ptr [edx - 256]
+
+// CHECK:      vbcstnesh2ps ymm2, word ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vbcstnesh2ps ymm2, word ptr [esp + 8*esi + 268435456]
+
+// CHECK:      vbcstnesh2ps ymm2, word ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x94,0x87,0x23,0x01,0x00,0x00]
+               vbcstnesh2ps ymm2, word ptr [edi + 4*eax + 291]
+
+// CHECK:      vbcstnesh2ps ymm2, word ptr [eax]
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x10]
+               vbcstnesh2ps ymm2, word ptr [eax]
+
+// CHECK:      vbcstnesh2ps ymm2, word ptr [2*ebp - 64]
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff]
+               vbcstnesh2ps ymm2, word ptr [2*ebp - 64]
+
+// CHECK:      vbcstnesh2ps ymm2, word ptr [ecx + 254]
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x91,0xfe,0x00,0x00,0x00]
+               vbcstnesh2ps ymm2, word ptr [ecx + 254]
+
+// CHECK:      vbcstnesh2ps ymm2, word ptr [edx - 256]
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x92,0x00,0xff,0xff,0xff]
+               vbcstnesh2ps ymm2, word ptr [edx - 256]
+
+// CHECK:      vcvtneebf162ps xmm2, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vcvtneebf162ps xmm2, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      vcvtneebf162ps xmm2, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x94,0x87,0x23,0x01,0x00,0x00]
+               vcvtneebf162ps xmm2, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK:      vcvtneebf162ps xmm2, xmmword ptr [eax]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x10]
+               vcvtneebf162ps xmm2, xmmword ptr [eax]
+
+// CHECK:      vcvtneebf162ps xmm2, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               vcvtneebf162ps xmm2, xmmword ptr [2*ebp - 512]
+
+// CHECK:      vcvtneebf162ps xmm2, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x91,0xf0,0x07,0x00,0x00]
+               vcvtneebf162ps xmm2, xmmword ptr [ecx + 2032]
+
+// CHECK:      vcvtneebf162ps xmm2, xmmword ptr [edx - 2048]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x92,0x00,0xf8,0xff,0xff]
+               vcvtneebf162ps xmm2, xmmword ptr [edx - 2048]
+
+// CHECK:      vcvtneebf162ps ymm2, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vcvtneebf162ps ymm2, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      vcvtneebf162ps ymm2, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x94,0x87,0x23,0x01,0x00,0x00]
+               vcvtneebf162ps ymm2, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK:      vcvtneebf162ps ymm2, ymmword ptr [eax]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x10]
+               vcvtneebf162ps ymm2, ymmword ptr [eax]
+
+// CHECK:      vcvtneebf162ps ymm2, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               vcvtneebf162ps ymm2, ymmword ptr [2*ebp - 1024]
+
+// CHECK:      vcvtneebf162ps ymm2, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x91,0xe0,0x0f,0x00,0x00]
+               vcvtneebf162ps ymm2, ymmword ptr [ecx + 4064]
+
+// CHECK:      vcvtneebf162ps ymm2, ymmword ptr [edx - 4096]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x92,0x00,0xf0,0xff,0xff]
+               vcvtneebf162ps ymm2, ymmword ptr [edx - 4096]
+
+// CHECK:      vcvtneeph2ps xmm2, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vcvtneeph2ps xmm2, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      vcvtneeph2ps xmm2, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x94,0x87,0x23,0x01,0x00,0x00]
+               vcvtneeph2ps xmm2, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK:      vcvtneeph2ps xmm2, xmmword ptr [eax]
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x10]
+               vcvtneeph2ps xmm2, xmmword ptr [eax]
+
+// CHECK:      vcvtneeph2ps xmm2, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               vcvtneeph2ps xmm2, xmmword ptr [2*ebp - 512]
+
+// CHECK:      vcvtneeph2ps xmm2, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x91,0xf0,0x07,0x00,0x00]
+               vcvtneeph2ps xmm2, xmmword ptr [ecx + 2032]
+
+// CHECK:      vcvtneeph2ps xmm2, xmmword ptr [edx - 2048]
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x92,0x00,0xf8,0xff,0xff]
+               vcvtneeph2ps xmm2, xmmword ptr [edx - 2048]
+
+// CHECK:      vcvtneeph2ps ymm2, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vcvtneeph2ps ymm2, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      vcvtneeph2ps ymm2, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x94,0x87,0x23,0x01,0x00,0x00]
+               vcvtneeph2ps ymm2, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK:      vcvtneeph2ps ymm2, ymmword ptr [eax]
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x10]
+               vcvtneeph2ps ymm2, ymmword ptr [eax]
+
+// CHECK:      vcvtneeph2ps ymm2, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               vcvtneeph2ps ymm2, ymmword ptr [2*ebp - 1024]
+
+// CHECK:      vcvtneeph2ps ymm2, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x91,0xe0,0x0f,0x00,0x00]
+               vcvtneeph2ps ymm2, ymmword ptr [ecx + 4064]
+
+// CHECK:      vcvtneeph2ps ymm2, ymmword ptr [edx - 4096]
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x92,0x00,0xf0,0xff,0xff]
+               vcvtneeph2ps ymm2, ymmword ptr [edx - 4096]
+
+// CHECK:      vcvtneobf162ps xmm2, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vcvtneobf162ps xmm2, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      vcvtneobf162ps xmm2, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x94,0x87,0x23,0x01,0x00,0x00]
+               vcvtneobf162ps xmm2, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK:      vcvtneobf162ps xmm2, xmmword ptr [eax]
+// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x10]
+               vcvtneobf162ps xmm2, xmmword ptr [eax]
+
+// CHECK:      vcvtneobf162ps xmm2, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               vcvtneobf162ps xmm2, xmmword ptr [2*ebp - 512]
+
+// CHECK:      vcvtneobf162ps xmm2, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x91,0xf0,0x07,0x00,0x00]
+               vcvtneobf162ps xmm2, xmmword ptr [ecx + 2032]
+
+// CHECK:      vcvtneobf162ps xmm2, xmmword ptr [edx - 2048]
+// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x92,0x00,0xf8,0xff,0xff]
+               vcvtneobf162ps xmm2, xmmword ptr [edx - 2048]
+
+// CHECK:      vcvtneobf162ps ymm2, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vcvtneobf162ps ymm2, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      vcvtneobf162ps ymm2, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x94,0x87,0x23,0x01,0x00,0x00]
+               vcvtneobf162ps ymm2, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK:      vcvtneobf162ps ymm2, ymmword ptr [eax]
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x10]
+               vcvtneobf162ps ymm2, ymmword ptr [eax]
+
+// CHECK:      vcvtneobf162ps ymm2, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               vcvtneobf162ps ymm2, ymmword ptr [2*ebp - 1024]
+
+// CHECK:      vcvtneobf162ps ymm2, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x91,0xe0,0x0f,0x00,0x00]
+               vcvtneobf162ps ymm2, ymmword ptr [ecx + 4064]
+
+// CHECK:      vcvtneobf162ps ymm2, ymmword ptr [edx - 4096]
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x92,0x00,0xf0,0xff,0xff]
+               vcvtneobf162ps ymm2, ymmword ptr [edx - 4096]
+
+// CHECK:      vcvtneoph2ps xmm2, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vcvtneoph2ps xmm2, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      vcvtneoph2ps xmm2, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x94,0x87,0x23,0x01,0x00,0x00]
+               vcvtneoph2ps xmm2, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK:      vcvtneoph2ps xmm2, xmmword ptr [eax]
+// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x10]
+               vcvtneoph2ps xmm2, xmmword ptr [eax]
+
+// CHECK:      vcvtneoph2ps xmm2, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               vcvtneoph2ps xmm2, xmmword ptr [2*ebp - 512]
+
+// CHECK:      vcvtneoph2ps xmm2, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x91,0xf0,0x07,0x00,0x00]
+               vcvtneoph2ps xmm2, xmmword ptr [ecx + 2032]
+
+// CHECK:      vcvtneoph2ps xmm2, xmmword ptr [edx - 2048]
+// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x92,0x00,0xf8,0xff,0xff]
+               vcvtneoph2ps xmm2, xmmword ptr [edx - 2048]
+
+// CHECK:      vcvtneoph2ps ymm2, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vcvtneoph2ps ymm2, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      vcvtneoph2ps ymm2, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x94,0x87,0x23,0x01,0x00,0x00]
+               vcvtneoph2ps ymm2, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK:      vcvtneoph2ps ymm2, ymmword ptr [eax]
+// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x10]
+               vcvtneoph2ps ymm2, ymmword ptr [eax]
+
+// CHECK:      vcvtneoph2ps ymm2, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               vcvtneoph2ps ymm2, ymmword ptr [2*ebp - 1024]
+
+// CHECK:      vcvtneoph2ps ymm2, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x91,0xe0,0x0f,0x00,0x00]
+               vcvtneoph2ps ymm2, ymmword ptr [ecx + 4064]
+
+// CHECK:      vcvtneoph2ps ymm2, ymmword ptr [edx - 4096]
+// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x92,0x00,0xf0,0xff,0xff]
+               vcvtneoph2ps ymm2, ymmword ptr [edx - 4096]
+
+// CHECK:      {vex} vcvtneps2bf16 xmm2, xmm3
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0xd3]
+               {vex} vcvtneps2bf16 xmm2, xmm3
+
+// CHECK:      {vex} vcvtneps2bf16 xmm2, ymm3
+// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0xd3]
+               {vex} vcvtneps2bf16 xmm2, ymm3
+
+// CHECK:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {vex} vcvtneps2bf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x94,0x87,0x23,0x01,0x00,0x00]
+               {vex} vcvtneps2bf16 xmm2, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [eax]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x10]
+               {vex} vcvtneps2bf16 xmm2, xmmword ptr [eax]
+
+// CHECK:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               {vex} vcvtneps2bf16 xmm2, xmmword ptr [2*ebp - 512]
+
+// CHECK:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x91,0xf0,0x07,0x00,0x00]
+               {vex} vcvtneps2bf16 xmm2, xmmword ptr [ecx + 2032]
+
+// CHECK:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [edx - 2048]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x92,0x00,0xf8,0xff,0xff]
+               {vex} vcvtneps2bf16 xmm2, xmmword ptr [edx - 2048]
+
+// CHECK:      {vex} vcvtneps2bf16 xmm2, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               {vex} vcvtneps2bf16 xmm2, ymmword ptr [2*ebp - 1024]
+
+// CHECK:      {vex} vcvtneps2bf16 xmm2, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x91,0xe0,0x0f,0x00,0x00]
+               {vex} vcvtneps2bf16 xmm2, ymmword ptr [ecx + 4064]
+
+// CHECK:      {vex} vcvtneps2bf16 xmm2, ymmword ptr [edx - 4096]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x92,0x00,0xf0,0xff,0xff]
+               {vex} vcvtneps2bf16 xmm2, ymmword ptr [edx - 4096]
+

diff  --git a/llvm/test/MC/X86/avx_ne_convert-64-att.s b/llvm/test/MC/X86/avx_ne_convert-64-att.s
new file mode 100644
index 0000000000000..22cd9fedae825
--- /dev/null
+++ b/llvm/test/MC/X86/avx_ne_convert-64-att.s
@@ -0,0 +1,334 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK:      vbcstnebf162ps  268435456(%rbp,%r14,8), %xmm2
+// CHECK: encoding: [0xc4,0xa2,0x7a,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vbcstnebf162ps  268435456(%rbp,%r14,8), %xmm2
+
+// CHECK:      vbcstnebf162ps  291(%r8,%rax,4), %xmm2
+// CHECK: encoding: [0xc4,0xc2,0x7a,0xb1,0x94,0x80,0x23,0x01,0x00,0x00]
+               vbcstnebf162ps  291(%r8,%rax,4), %xmm2
+
+// CHECK:      vbcstnebf162ps  (%rip), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x15,0x00,0x00,0x00,0x00]
+               vbcstnebf162ps  (%rip), %xmm2
+
+// CHECK:      vbcstnebf162ps  -64(,%rbp,2), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff]
+               vbcstnebf162ps  -64(,%rbp,2), %xmm2
+
+// CHECK:      vbcstnebf162ps  254(%rcx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x91,0xfe,0x00,0x00,0x00]
+               vbcstnebf162ps  254(%rcx), %xmm2
+
+// CHECK:      vbcstnebf162ps  -256(%rdx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x92,0x00,0xff,0xff,0xff]
+               vbcstnebf162ps  -256(%rdx), %xmm2
+
+// CHECK:      vbcstnebf162ps  268435456(%rbp,%r14,8), %ymm2
+// CHECK: encoding: [0xc4,0xa2,0x7e,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vbcstnebf162ps  268435456(%rbp,%r14,8), %ymm2
+
+// CHECK:      vbcstnebf162ps  291(%r8,%rax,4), %ymm2
+// CHECK: encoding: [0xc4,0xc2,0x7e,0xb1,0x94,0x80,0x23,0x01,0x00,0x00]
+               vbcstnebf162ps  291(%r8,%rax,4), %ymm2
+
+// CHECK:      vbcstnebf162ps  (%rip), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x15,0x00,0x00,0x00,0x00]
+               vbcstnebf162ps  (%rip), %ymm2
+
+// CHECK:      vbcstnebf162ps  -64(,%rbp,2), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff]
+               vbcstnebf162ps  -64(,%rbp,2), %ymm2
+
+// CHECK:      vbcstnebf162ps  254(%rcx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x91,0xfe,0x00,0x00,0x00]
+               vbcstnebf162ps  254(%rcx), %ymm2
+
+// CHECK:      vbcstnebf162ps  -256(%rdx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x92,0x00,0xff,0xff,0xff]
+               vbcstnebf162ps  -256(%rdx), %ymm2
+
+// CHECK:      vbcstnesh2ps  268435456(%rbp,%r14,8), %xmm2
+// CHECK: encoding: [0xc4,0xa2,0x79,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vbcstnesh2ps  268435456(%rbp,%r14,8), %xmm2
+
+// CHECK:      vbcstnesh2ps  291(%r8,%rax,4), %xmm2
+// CHECK: encoding: [0xc4,0xc2,0x79,0xb1,0x94,0x80,0x23,0x01,0x00,0x00]
+               vbcstnesh2ps  291(%r8,%rax,4), %xmm2
+
+// CHECK:      vbcstnesh2ps  (%rip), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x15,0x00,0x00,0x00,0x00]
+               vbcstnesh2ps  (%rip), %xmm2
+
+// CHECK:      vbcstnesh2ps  -64(,%rbp,2), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff]
+               vbcstnesh2ps  -64(,%rbp,2), %xmm2
+
+// CHECK:      vbcstnesh2ps  254(%rcx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x91,0xfe,0x00,0x00,0x00]
+               vbcstnesh2ps  254(%rcx), %xmm2
+
+// CHECK:      vbcstnesh2ps  -256(%rdx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x92,0x00,0xff,0xff,0xff]
+               vbcstnesh2ps  -256(%rdx), %xmm2
+
+// CHECK:      vbcstnesh2ps  268435456(%rbp,%r14,8), %ymm2
+// CHECK: encoding: [0xc4,0xa2,0x7d,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vbcstnesh2ps  268435456(%rbp,%r14,8), %ymm2
+
+// CHECK:      vbcstnesh2ps  291(%r8,%rax,4), %ymm2
+// CHECK: encoding: [0xc4,0xc2,0x7d,0xb1,0x94,0x80,0x23,0x01,0x00,0x00]
+               vbcstnesh2ps  291(%r8,%rax,4), %ymm2
+
+// CHECK:      vbcstnesh2ps  (%rip), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x15,0x00,0x00,0x00,0x00]
+               vbcstnesh2ps  (%rip), %ymm2
+
+// CHECK:      vbcstnesh2ps  -64(,%rbp,2), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff]
+               vbcstnesh2ps  -64(,%rbp,2), %ymm2
+
+// CHECK:      vbcstnesh2ps  254(%rcx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x91,0xfe,0x00,0x00,0x00]
+               vbcstnesh2ps  254(%rcx), %ymm2
+
+// CHECK:      vbcstnesh2ps  -256(%rdx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x92,0x00,0xff,0xff,0xff]
+               vbcstnesh2ps  -256(%rdx), %ymm2
+
+// CHECK:      vcvtneebf162ps  268435456(%rbp,%r14,8), %xmm2
+// CHECK: encoding: [0xc4,0xa2,0x7a,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vcvtneebf162ps  268435456(%rbp,%r14,8), %xmm2
+
+// CHECK:      vcvtneebf162ps  291(%r8,%rax,4), %xmm2
+// CHECK: encoding: [0xc4,0xc2,0x7a,0xb0,0x94,0x80,0x23,0x01,0x00,0x00]
+               vcvtneebf162ps  291(%r8,%rax,4), %xmm2
+
+// CHECK:      vcvtneebf162ps  (%rip), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x15,0x00,0x00,0x00,0x00]
+               vcvtneebf162ps  (%rip), %xmm2
+
+// CHECK:      vcvtneebf162ps  -512(,%rbp,2), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               vcvtneebf162ps  -512(,%rbp,2), %xmm2
+
+// CHECK:      vcvtneebf162ps  2032(%rcx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x91,0xf0,0x07,0x00,0x00]
+               vcvtneebf162ps  2032(%rcx), %xmm2
+
+// CHECK:      vcvtneebf162ps  -2048(%rdx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x92,0x00,0xf8,0xff,0xff]
+               vcvtneebf162ps  -2048(%rdx), %xmm2
+
+// CHECK:      vcvtneebf162ps  268435456(%rbp,%r14,8), %ymm2
+// CHECK: encoding: [0xc4,0xa2,0x7e,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vcvtneebf162ps  268435456(%rbp,%r14,8), %ymm2
+
+// CHECK:      vcvtneebf162ps  291(%r8,%rax,4), %ymm2
+// CHECK: encoding: [0xc4,0xc2,0x7e,0xb0,0x94,0x80,0x23,0x01,0x00,0x00]
+               vcvtneebf162ps  291(%r8,%rax,4), %ymm2
+
+// CHECK:      vcvtneebf162ps  (%rip), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x15,0x00,0x00,0x00,0x00]
+               vcvtneebf162ps  (%rip), %ymm2
+
+// CHECK:      vcvtneebf162ps  -1024(,%rbp,2), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               vcvtneebf162ps  -1024(,%rbp,2), %ymm2
+
+// CHECK:      vcvtneebf162ps  4064(%rcx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x91,0xe0,0x0f,0x00,0x00]
+               vcvtneebf162ps  4064(%rcx), %ymm2
+
+// CHECK:      vcvtneebf162ps  -4096(%rdx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x92,0x00,0xf0,0xff,0xff]
+               vcvtneebf162ps  -4096(%rdx), %ymm2
+
+// CHECK:      vcvtneeph2ps  268435456(%rbp,%r14,8), %xmm2
+// CHECK: encoding: [0xc4,0xa2,0x79,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vcvtneeph2ps  268435456(%rbp,%r14,8), %xmm2
+
+// CHECK:      vcvtneeph2ps  291(%r8,%rax,4), %xmm2
+// CHECK: encoding: [0xc4,0xc2,0x79,0xb0,0x94,0x80,0x23,0x01,0x00,0x00]
+               vcvtneeph2ps  291(%r8,%rax,4), %xmm2
+
+// CHECK:      vcvtneeph2ps  (%rip), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x15,0x00,0x00,0x00,0x00]
+               vcvtneeph2ps  (%rip), %xmm2
+
+// CHECK:      vcvtneeph2ps  -512(,%rbp,2), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               vcvtneeph2ps  -512(,%rbp,2), %xmm2
+
+// CHECK:      vcvtneeph2ps  2032(%rcx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x91,0xf0,0x07,0x00,0x00]
+               vcvtneeph2ps  2032(%rcx), %xmm2
+
+// CHECK:      vcvtneeph2ps  -2048(%rdx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x92,0x00,0xf8,0xff,0xff]
+               vcvtneeph2ps  -2048(%rdx), %xmm2
+
+// CHECK:      vcvtneeph2ps  268435456(%rbp,%r14,8), %ymm2
+// CHECK: encoding: [0xc4,0xa2,0x7d,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vcvtneeph2ps  268435456(%rbp,%r14,8), %ymm2
+
+// CHECK:      vcvtneeph2ps  291(%r8,%rax,4), %ymm2
+// CHECK: encoding: [0xc4,0xc2,0x7d,0xb0,0x94,0x80,0x23,0x01,0x00,0x00]
+               vcvtneeph2ps  291(%r8,%rax,4), %ymm2
+
+// CHECK:      vcvtneeph2ps  (%rip), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x15,0x00,0x00,0x00,0x00]
+               vcvtneeph2ps  (%rip), %ymm2
+
+// CHECK:      vcvtneeph2ps  -1024(,%rbp,2), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               vcvtneeph2ps  -1024(,%rbp,2), %ymm2
+
+// CHECK:      vcvtneeph2ps  4064(%rcx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x91,0xe0,0x0f,0x00,0x00]
+               vcvtneeph2ps  4064(%rcx), %ymm2
+
+// CHECK:      vcvtneeph2ps  -4096(%rdx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x92,0x00,0xf0,0xff,0xff]
+               vcvtneeph2ps  -4096(%rdx), %ymm2
+
+// CHECK:      vcvtneobf162ps  268435456(%rbp,%r14,8), %xmm2
+// CHECK: encoding: [0xc4,0xa2,0x7b,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vcvtneobf162ps  268435456(%rbp,%r14,8), %xmm2
+
+// CHECK:      vcvtneobf162ps  291(%r8,%rax,4), %xmm2
+// CHECK: encoding: [0xc4,0xc2,0x7b,0xb0,0x94,0x80,0x23,0x01,0x00,0x00]
+               vcvtneobf162ps  291(%r8,%rax,4), %xmm2
+
+// CHECK:      vcvtneobf162ps  (%rip), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x15,0x00,0x00,0x00,0x00]
+               vcvtneobf162ps  (%rip), %xmm2
+
+// CHECK:      vcvtneobf162ps  -512(,%rbp,2), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               vcvtneobf162ps  -512(,%rbp,2), %xmm2
+
+// CHECK:      vcvtneobf162ps  2032(%rcx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x91,0xf0,0x07,0x00,0x00]
+               vcvtneobf162ps  2032(%rcx), %xmm2
+
+// CHECK:      vcvtneobf162ps  -2048(%rdx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x92,0x00,0xf8,0xff,0xff]
+               vcvtneobf162ps  -2048(%rdx), %xmm2
+
+// CHECK:      vcvtneobf162ps  268435456(%rbp,%r14,8), %ymm2
+// CHECK: encoding: [0xc4,0xa2,0x7f,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vcvtneobf162ps  268435456(%rbp,%r14,8), %ymm2
+
+// CHECK:      vcvtneobf162ps  291(%r8,%rax,4), %ymm2
+// CHECK: encoding: [0xc4,0xc2,0x7f,0xb0,0x94,0x80,0x23,0x01,0x00,0x00]
+               vcvtneobf162ps  291(%r8,%rax,4), %ymm2
+
+// CHECK:      vcvtneobf162ps  (%rip), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x15,0x00,0x00,0x00,0x00]
+               vcvtneobf162ps  (%rip), %ymm2
+
+// CHECK:      vcvtneobf162ps  -1024(,%rbp,2), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               vcvtneobf162ps  -1024(,%rbp,2), %ymm2
+
+// CHECK:      vcvtneobf162ps  4064(%rcx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x91,0xe0,0x0f,0x00,0x00]
+               vcvtneobf162ps  4064(%rcx), %ymm2
+
+// CHECK:      vcvtneobf162ps  -4096(%rdx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x92,0x00,0xf0,0xff,0xff]
+               vcvtneobf162ps  -4096(%rdx), %ymm2
+
+// CHECK:      vcvtneoph2ps  268435456(%rbp,%r14,8), %xmm2
+// CHECK: encoding: [0xc4,0xa2,0x78,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vcvtneoph2ps  268435456(%rbp,%r14,8), %xmm2
+
+// CHECK:      vcvtneoph2ps  291(%r8,%rax,4), %xmm2
+// CHECK: encoding: [0xc4,0xc2,0x78,0xb0,0x94,0x80,0x23,0x01,0x00,0x00]
+               vcvtneoph2ps  291(%r8,%rax,4), %xmm2
+
+// CHECK:      vcvtneoph2ps  (%rip), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x15,0x00,0x00,0x00,0x00]
+               vcvtneoph2ps  (%rip), %xmm2
+
+// CHECK:      vcvtneoph2ps  -512(,%rbp,2), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               vcvtneoph2ps  -512(,%rbp,2), %xmm2
+
+// CHECK:      vcvtneoph2ps  2032(%rcx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x91,0xf0,0x07,0x00,0x00]
+               vcvtneoph2ps  2032(%rcx), %xmm2
+
+// CHECK:      vcvtneoph2ps  -2048(%rdx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x92,0x00,0xf8,0xff,0xff]
+               vcvtneoph2ps  -2048(%rdx), %xmm2
+
+// CHECK:      vcvtneoph2ps  268435456(%rbp,%r14,8), %ymm2
+// CHECK: encoding: [0xc4,0xa2,0x7c,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vcvtneoph2ps  268435456(%rbp,%r14,8), %ymm2
+
+// CHECK:      vcvtneoph2ps  291(%r8,%rax,4), %ymm2
+// CHECK: encoding: [0xc4,0xc2,0x7c,0xb0,0x94,0x80,0x23,0x01,0x00,0x00]
+               vcvtneoph2ps  291(%r8,%rax,4), %ymm2
+
+// CHECK:      vcvtneoph2ps  (%rip), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x15,0x00,0x00,0x00,0x00]
+               vcvtneoph2ps  (%rip), %ymm2
+
+// CHECK:      vcvtneoph2ps  -1024(,%rbp,2), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               vcvtneoph2ps  -1024(,%rbp,2), %ymm2
+
+// CHECK:      vcvtneoph2ps  4064(%rcx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x91,0xe0,0x0f,0x00,0x00]
+               vcvtneoph2ps  4064(%rcx), %ymm2
+
+// CHECK:      vcvtneoph2ps  -4096(%rdx), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x92,0x00,0xf0,0xff,0xff]
+               vcvtneoph2ps  -4096(%rdx), %ymm2
+
+// CHECK:      {vex} vcvtneps2bf16 %xmm3, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0xd3]
+               {vex} vcvtneps2bf16 %xmm3, %xmm2
+
+// CHECK:      {vex} vcvtneps2bf16 %ymm3, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0xd3]
+               {vex} vcvtneps2bf16 %ymm3, %xmm2
+
+// CHECK:      {vex} vcvtneps2bf16x  268435456(%rbp,%r14,8), %xmm2
+// CHECK: encoding: [0xc4,0xa2,0x7a,0x72,0x94,0xf5,0x00,0x00,0x00,0x10]
+               {vex} vcvtneps2bf16x  268435456(%rbp,%r14,8), %xmm2
+
+// CHECK:      {vex} vcvtneps2bf16x  291(%r8,%rax,4), %xmm2
+// CHECK: encoding: [0xc4,0xc2,0x7a,0x72,0x94,0x80,0x23,0x01,0x00,0x00]
+               {vex} vcvtneps2bf16x  291(%r8,%rax,4), %xmm2
+
+// CHECK:      {vex} vcvtneps2bf16x  (%rip), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x15,0x00,0x00,0x00,0x00]
+               {vex} vcvtneps2bf16x  (%rip), %xmm2
+
+// CHECK:      {vex} vcvtneps2bf16x  -512(,%rbp,2), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               {vex} vcvtneps2bf16x  -512(,%rbp,2), %xmm2
+
+// CHECK:      {vex} vcvtneps2bf16x  2032(%rcx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x91,0xf0,0x07,0x00,0x00]
+               {vex} vcvtneps2bf16x  2032(%rcx), %xmm2
+
+// CHECK:      {vex} vcvtneps2bf16x  -2048(%rdx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x92,0x00,0xf8,0xff,0xff]
+               {vex} vcvtneps2bf16x  -2048(%rdx), %xmm2
+
+// CHECK:      {vex} vcvtneps2bf16y  -1024(,%rbp,2), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               {vex} vcvtneps2bf16y  -1024(,%rbp,2), %xmm2
+
+// CHECK:      {vex} vcvtneps2bf16y  4064(%rcx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x91,0xe0,0x0f,0x00,0x00]
+               {vex} vcvtneps2bf16y  4064(%rcx), %xmm2
+
+// CHECK:      {vex} vcvtneps2bf16y  -4096(%rdx), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x92,0x00,0xf0,0xff,0xff]
+               {vex} vcvtneps2bf16y  -4096(%rdx), %xmm2
+

diff  --git a/llvm/test/MC/X86/avx_ne_convert-64-intel.s b/llvm/test/MC/X86/avx_ne_convert-64-intel.s
new file mode 100644
index 0000000000000..fbe2488485af5
--- /dev/null
+++ b/llvm/test/MC/X86/avx_ne_convert-64-intel.s
@@ -0,0 +1,334 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK:      vbcstnebf162ps xmm2, word ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0xa2,0x7a,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vbcstnebf162ps xmm2, word ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vbcstnebf162ps xmm2, word ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0xc2,0x7a,0xb1,0x94,0x80,0x23,0x01,0x00,0x00]
+               vbcstnebf162ps xmm2, word ptr [r8 + 4*rax + 291]
+
+// CHECK:      vbcstnebf162ps xmm2, word ptr [rip]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x15,0x00,0x00,0x00,0x00]
+               vbcstnebf162ps xmm2, word ptr [rip]
+
+// CHECK:      vbcstnebf162ps xmm2, word ptr [2*rbp - 64]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff]
+               vbcstnebf162ps xmm2, word ptr [2*rbp - 64]
+
+// CHECK:      vbcstnebf162ps xmm2, word ptr [rcx + 254]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x91,0xfe,0x00,0x00,0x00]
+               vbcstnebf162ps xmm2, word ptr [rcx + 254]
+
+// CHECK:      vbcstnebf162ps xmm2, word ptr [rdx - 256]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x92,0x00,0xff,0xff,0xff]
+               vbcstnebf162ps xmm2, word ptr [rdx - 256]
+
+// CHECK:      vbcstnebf162ps ymm2, word ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0xa2,0x7e,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vbcstnebf162ps ymm2, word ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vbcstnebf162ps ymm2, word ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0xc2,0x7e,0xb1,0x94,0x80,0x23,0x01,0x00,0x00]
+               vbcstnebf162ps ymm2, word ptr [r8 + 4*rax + 291]
+
+// CHECK:      vbcstnebf162ps ymm2, word ptr [rip]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x15,0x00,0x00,0x00,0x00]
+               vbcstnebf162ps ymm2, word ptr [rip]
+
+// CHECK:      vbcstnebf162ps ymm2, word ptr [2*rbp - 64]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff]
+               vbcstnebf162ps ymm2, word ptr [2*rbp - 64]
+
+// CHECK:      vbcstnebf162ps ymm2, word ptr [rcx + 254]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x91,0xfe,0x00,0x00,0x00]
+               vbcstnebf162ps ymm2, word ptr [rcx + 254]
+
+// CHECK:      vbcstnebf162ps ymm2, word ptr [rdx - 256]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x92,0x00,0xff,0xff,0xff]
+               vbcstnebf162ps ymm2, word ptr [rdx - 256]
+
+// CHECK:      vbcstnesh2ps xmm2, word ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0xa2,0x79,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vbcstnesh2ps xmm2, word ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vbcstnesh2ps xmm2, word ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0xc2,0x79,0xb1,0x94,0x80,0x23,0x01,0x00,0x00]
+               vbcstnesh2ps xmm2, word ptr [r8 + 4*rax + 291]
+
+// CHECK:      vbcstnesh2ps xmm2, word ptr [rip]
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x15,0x00,0x00,0x00,0x00]
+               vbcstnesh2ps xmm2, word ptr [rip]
+
+// CHECK:      vbcstnesh2ps xmm2, word ptr [2*rbp - 64]
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff]
+               vbcstnesh2ps xmm2, word ptr [2*rbp - 64]
+
+// CHECK:      vbcstnesh2ps xmm2, word ptr [rcx + 254]
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x91,0xfe,0x00,0x00,0x00]
+               vbcstnesh2ps xmm2, word ptr [rcx + 254]
+
+// CHECK:      vbcstnesh2ps xmm2, word ptr [rdx - 256]
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x92,0x00,0xff,0xff,0xff]
+               vbcstnesh2ps xmm2, word ptr [rdx - 256]
+
+// CHECK:      vbcstnesh2ps ymm2, word ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0xa2,0x7d,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vbcstnesh2ps ymm2, word ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vbcstnesh2ps ymm2, word ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0xc2,0x7d,0xb1,0x94,0x80,0x23,0x01,0x00,0x00]
+               vbcstnesh2ps ymm2, word ptr [r8 + 4*rax + 291]
+
+// CHECK:      vbcstnesh2ps ymm2, word ptr [rip]
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x15,0x00,0x00,0x00,0x00]
+               vbcstnesh2ps ymm2, word ptr [rip]
+
+// CHECK:      vbcstnesh2ps ymm2, word ptr [2*rbp - 64]
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff]
+               vbcstnesh2ps ymm2, word ptr [2*rbp - 64]
+
+// CHECK:      vbcstnesh2ps ymm2, word ptr [rcx + 254]
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x91,0xfe,0x00,0x00,0x00]
+               vbcstnesh2ps ymm2, word ptr [rcx + 254]
+
+// CHECK:      vbcstnesh2ps ymm2, word ptr [rdx - 256]
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x92,0x00,0xff,0xff,0xff]
+               vbcstnesh2ps ymm2, word ptr [rdx - 256]
+
+// CHECK:      vcvtneebf162ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0xa2,0x7a,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vcvtneebf162ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vcvtneebf162ps xmm2, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0xc2,0x7a,0xb0,0x94,0x80,0x23,0x01,0x00,0x00]
+               vcvtneebf162ps xmm2, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vcvtneebf162ps xmm2, xmmword ptr [rip]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x15,0x00,0x00,0x00,0x00]
+               vcvtneebf162ps xmm2, xmmword ptr [rip]
+
+// CHECK:      vcvtneebf162ps xmm2, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               vcvtneebf162ps xmm2, xmmword ptr [2*rbp - 512]
+
+// CHECK:      vcvtneebf162ps xmm2, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x91,0xf0,0x07,0x00,0x00]
+               vcvtneebf162ps xmm2, xmmword ptr [rcx + 2032]
+
+// CHECK:      vcvtneebf162ps xmm2, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x92,0x00,0xf8,0xff,0xff]
+               vcvtneebf162ps xmm2, xmmword ptr [rdx - 2048]
+
+// CHECK:      vcvtneebf162ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0xa2,0x7e,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vcvtneebf162ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vcvtneebf162ps ymm2, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0xc2,0x7e,0xb0,0x94,0x80,0x23,0x01,0x00,0x00]
+               vcvtneebf162ps ymm2, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vcvtneebf162ps ymm2, ymmword ptr [rip]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x15,0x00,0x00,0x00,0x00]
+               vcvtneebf162ps ymm2, ymmword ptr [rip]
+
+// CHECK:      vcvtneebf162ps ymm2, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               vcvtneebf162ps ymm2, ymmword ptr [2*rbp - 1024]
+
+// CHECK:      vcvtneebf162ps ymm2, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x91,0xe0,0x0f,0x00,0x00]
+               vcvtneebf162ps ymm2, ymmword ptr [rcx + 4064]
+
+// CHECK:      vcvtneebf162ps ymm2, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x92,0x00,0xf0,0xff,0xff]
+               vcvtneebf162ps ymm2, ymmword ptr [rdx - 4096]
+
+// CHECK:      vcvtneeph2ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0xa2,0x79,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vcvtneeph2ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vcvtneeph2ps xmm2, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0xc2,0x79,0xb0,0x94,0x80,0x23,0x01,0x00,0x00]
+               vcvtneeph2ps xmm2, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vcvtneeph2ps xmm2, xmmword ptr [rip]
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x15,0x00,0x00,0x00,0x00]
+               vcvtneeph2ps xmm2, xmmword ptr [rip]
+
+// CHECK:      vcvtneeph2ps xmm2, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               vcvtneeph2ps xmm2, xmmword ptr [2*rbp - 512]
+
+// CHECK:      vcvtneeph2ps xmm2, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x91,0xf0,0x07,0x00,0x00]
+               vcvtneeph2ps xmm2, xmmword ptr [rcx + 2032]
+
+// CHECK:      vcvtneeph2ps xmm2, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x92,0x00,0xf8,0xff,0xff]
+               vcvtneeph2ps xmm2, xmmword ptr [rdx - 2048]
+
+// CHECK:      vcvtneeph2ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0xa2,0x7d,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vcvtneeph2ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vcvtneeph2ps ymm2, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0xc2,0x7d,0xb0,0x94,0x80,0x23,0x01,0x00,0x00]
+               vcvtneeph2ps ymm2, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vcvtneeph2ps ymm2, ymmword ptr [rip]
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x15,0x00,0x00,0x00,0x00]
+               vcvtneeph2ps ymm2, ymmword ptr [rip]
+
+// CHECK:      vcvtneeph2ps ymm2, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               vcvtneeph2ps ymm2, ymmword ptr [2*rbp - 1024]
+
+// CHECK:      vcvtneeph2ps ymm2, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x91,0xe0,0x0f,0x00,0x00]
+               vcvtneeph2ps ymm2, ymmword ptr [rcx + 4064]
+
+// CHECK:      vcvtneeph2ps ymm2, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x92,0x00,0xf0,0xff,0xff]
+               vcvtneeph2ps ymm2, ymmword ptr [rdx - 4096]
+
+// CHECK:      vcvtneobf162ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0xa2,0x7b,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vcvtneobf162ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vcvtneobf162ps xmm2, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0xc2,0x7b,0xb0,0x94,0x80,0x23,0x01,0x00,0x00]
+               vcvtneobf162ps xmm2, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vcvtneobf162ps xmm2, xmmword ptr [rip]
+// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x15,0x00,0x00,0x00,0x00]
+               vcvtneobf162ps xmm2, xmmword ptr [rip]
+
+// CHECK:      vcvtneobf162ps xmm2, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               vcvtneobf162ps xmm2, xmmword ptr [2*rbp - 512]
+
+// CHECK:      vcvtneobf162ps xmm2, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x91,0xf0,0x07,0x00,0x00]
+               vcvtneobf162ps xmm2, xmmword ptr [rcx + 2032]
+
+// CHECK:      vcvtneobf162ps xmm2, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x92,0x00,0xf8,0xff,0xff]
+               vcvtneobf162ps xmm2, xmmword ptr [rdx - 2048]
+
+// CHECK:      vcvtneobf162ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0xa2,0x7f,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vcvtneobf162ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vcvtneobf162ps ymm2, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0xc2,0x7f,0xb0,0x94,0x80,0x23,0x01,0x00,0x00]
+               vcvtneobf162ps ymm2, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vcvtneobf162ps ymm2, ymmword ptr [rip]
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x15,0x00,0x00,0x00,0x00]
+               vcvtneobf162ps ymm2, ymmword ptr [rip]
+
+// CHECK:      vcvtneobf162ps ymm2, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               vcvtneobf162ps ymm2, ymmword ptr [2*rbp - 1024]
+
+// CHECK:      vcvtneobf162ps ymm2, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x91,0xe0,0x0f,0x00,0x00]
+               vcvtneobf162ps ymm2, ymmword ptr [rcx + 4064]
+
+// CHECK:      vcvtneobf162ps ymm2, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x92,0x00,0xf0,0xff,0xff]
+               vcvtneobf162ps ymm2, ymmword ptr [rdx - 4096]
+
+// CHECK:      vcvtneoph2ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0xa2,0x78,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vcvtneoph2ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vcvtneoph2ps xmm2, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0xc2,0x78,0xb0,0x94,0x80,0x23,0x01,0x00,0x00]
+               vcvtneoph2ps xmm2, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vcvtneoph2ps xmm2, xmmword ptr [rip]
+// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x15,0x00,0x00,0x00,0x00]
+               vcvtneoph2ps xmm2, xmmword ptr [rip]
+
+// CHECK:      vcvtneoph2ps xmm2, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               vcvtneoph2ps xmm2, xmmword ptr [2*rbp - 512]
+
+// CHECK:      vcvtneoph2ps xmm2, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x91,0xf0,0x07,0x00,0x00]
+               vcvtneoph2ps xmm2, xmmword ptr [rcx + 2032]
+
+// CHECK:      vcvtneoph2ps xmm2, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x92,0x00,0xf8,0xff,0xff]
+               vcvtneoph2ps xmm2, xmmword ptr [rdx - 2048]
+
+// CHECK:      vcvtneoph2ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0xa2,0x7c,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10]
+               vcvtneoph2ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vcvtneoph2ps ymm2, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0xc2,0x7c,0xb0,0x94,0x80,0x23,0x01,0x00,0x00]
+               vcvtneoph2ps ymm2, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vcvtneoph2ps ymm2, ymmword ptr [rip]
+// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x15,0x00,0x00,0x00,0x00]
+               vcvtneoph2ps ymm2, ymmword ptr [rip]
+
+// CHECK:      vcvtneoph2ps ymm2, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               vcvtneoph2ps ymm2, ymmword ptr [2*rbp - 1024]
+
+// CHECK:      vcvtneoph2ps ymm2, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x91,0xe0,0x0f,0x00,0x00]
+               vcvtneoph2ps ymm2, ymmword ptr [rcx + 4064]
+
+// CHECK:      vcvtneoph2ps ymm2, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x92,0x00,0xf0,0xff,0xff]
+               vcvtneoph2ps ymm2, ymmword ptr [rdx - 4096]
+
+// CHECK:      {vex} vcvtneps2bf16 xmm2, xmm3
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0xd3]
+               {vex} vcvtneps2bf16 xmm2, xmm3
+
+// CHECK:      {vex} vcvtneps2bf16 xmm2, ymm3
+// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0xd3]
+               {vex} vcvtneps2bf16 xmm2, ymm3
+
+// CHECK:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0xa2,0x7a,0x72,0x94,0xf5,0x00,0x00,0x00,0x10]
+               {vex} vcvtneps2bf16 xmm2, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0xc2,0x7a,0x72,0x94,0x80,0x23,0x01,0x00,0x00]
+               {vex} vcvtneps2bf16 xmm2, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [rip]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x15,0x00,0x00,0x00,0x00]
+               {vex} vcvtneps2bf16 xmm2, xmmword ptr [rip]
+
+// CHECK:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               {vex} vcvtneps2bf16 xmm2, xmmword ptr [2*rbp - 512]
+
+// CHECK:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x91,0xf0,0x07,0x00,0x00]
+               {vex} vcvtneps2bf16 xmm2, xmmword ptr [rcx + 2032]
+
+// CHECK:      {vex} vcvtneps2bf16 xmm2, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x92,0x00,0xf8,0xff,0xff]
+               {vex} vcvtneps2bf16 xmm2, xmmword ptr [rdx - 2048]
+
+// CHECK:      {vex} vcvtneps2bf16 xmm2, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               {vex} vcvtneps2bf16 xmm2, ymmword ptr [2*rbp - 1024]
+
+// CHECK:      {vex} vcvtneps2bf16 xmm2, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x91,0xe0,0x0f,0x00,0x00]
+               {vex} vcvtneps2bf16 xmm2, ymmword ptr [rcx + 4064]
+
+// CHECK:      {vex} vcvtneps2bf16 xmm2, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x92,0x00,0xf0,0xff,0xff]
+               {vex} vcvtneps2bf16 xmm2, ymmword ptr [rdx - 4096]
+


        


More information about the cfe-commits mailing list