r333509 - [X86] Remove masking from the AVX512VNNI builtins. Use a select in IR instead.
Craig Topper via cfe-commits
cfe-commits at lists.llvm.org
Tue May 29 22:26:04 PDT 2018
Author: ctopper
Date: Tue May 29 22:26:04 2018
New Revision: 333509
URL: http://llvm.org/viewvc/llvm-project?rev=333509&view=rev
Log:
[X86] Remove masking from the AVX512VNNI builtins. Use a select in IR instead.
Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/avx512vlvnniintrin.h
cfe/trunk/lib/Headers/avx512vnniintrin.h
cfe/trunk/test/CodeGen/avx512vlvnni-builtins.c
cfe/trunk/test/CodeGen/avx512vnni-builtins.c
Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=333509&r1=333508&r2=333509&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Tue May 29 22:26:04 2018
@@ -970,30 +970,18 @@ TARGET_BUILTIN(__builtin_ia32_storeapd51
TARGET_BUILTIN(__builtin_ia32_storeups512_mask, "vf*V16fUs", "n", "avx512f")
TARGET_BUILTIN(__builtin_ia32_storeaps512_mask, "vV16f*V16fUs", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusd128_mask, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusd256_mask, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusd512_mask, "V16iV16iV16iV16iUs", "nc", "avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusds128_mask, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusds256_mask, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusds512_mask, "V16iV16iV16iV16iUs", "nc", "avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssd128_mask, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssd256_mask, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssd512_mask, "V16iV16iV16iV16iUs", "nc", "avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssds128_mask, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssds256_mask, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssds512_mask, "V16iV16iV16iV16iUs", "nc", "avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusd128_maskz, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusd256_maskz, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusd512_maskz, "V16iV16iV16iV16iUs", "nc", "avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusds128_maskz, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusds256_maskz, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusds512_maskz, "V16iV16iV16iV16iUs", "nc", "avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssd128_maskz, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssd256_maskz, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssd512_maskz, "V16iV16iV16iV16iUs", "nc", "avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssds128_maskz, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssds256_maskz, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssds512_maskz, "V16iV16iV16iV16iUs", "nc", "avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusd128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusd256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusd512, "V16iV16iV16iV16i", "nc", "avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusds128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusds256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusds512, "V16iV16iV16iV16i", "nc", "avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssd128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssd256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssd512, "V16iV16iV16iV16i", "nc", "avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssds128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssds256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssds512, "V16iV16iV16iV16i", "nc", "avx512vnni")
TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2ddC*V2LLiUcIi", "n", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2LLiV2LLiLLiC*V2LLiUcIi", "n", "avx512vl")
Modified: cfe/trunk/lib/Headers/avx512vlvnniintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlvnniintrin.h?rev=333509&r1=333508&r2=333509&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512vlvnniintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512vlvnniintrin.h Tue May 29 22:26:04 2018
@@ -33,222 +33,189 @@
static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A,
+ (__v8si)__B);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
- return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) __U);
+ return (__m256i)__builtin_ia32_selectd_256(__U,
+ (__v8si)_mm256_dpbusd_epi32(__S, __A, __B),
+ (__v8si)__S);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
- return (__m256i) __builtin_ia32_vpdpbusd256_maskz ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) __U);
+ return (__m256i)__builtin_ia32_selectd_256(__U,
+ (__v8si)_mm256_dpbusd_epi32(__S, __A, __B),
+ (__v8si)_mm256_setzero_si256());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B)
+_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B)
{
- return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) -1);
+ return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A,
+ (__v8si)__B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
- return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) __U);
+ return (__m256i)__builtin_ia32_selectd_256(__U,
+ (__v8si)_mm256_dpbusds_epi32(__S, __A, __B),
+ (__v8si)__S);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
- return (__m256i) __builtin_ia32_vpdpbusds256_maskz ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) __U);
-}
+ return (__m256i)__builtin_ia32_selectd_256(__U,
+ (__v8si)_mm256_dpbusds_epi32(__S, __A, __B),
+ (__v8si)_mm256_setzero_si256());
+
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B)
+_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B)
{
- return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) -1);
+ return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A,
+ (__v8si)__B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
- return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) __U);
+ return (__m256i)__builtin_ia32_selectd_256(__U,
+ (__v8si)_mm256_dpwssd_epi32(__S, __A, __B),
+ (__v8si)__S);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
- return (__m256i) __builtin_ia32_vpdpwssd256_maskz ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) __U);
+ return (__m256i)__builtin_ia32_selectd_256(__U,
+ (__v8si)_mm256_dpwssd_epi32(__S, __A, __B),
+ (__v8si)_mm256_setzero_si256());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B)
+_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B)
{
- return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) -1);
+ return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A,
+ (__v8si)__B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
- return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) __U);
+ return (__m256i)__builtin_ia32_selectd_256(__U,
+ (__v8si)_mm256_dpwssds_epi32(__S, __A, __B),
+ (__v8si)__S);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
- return (__m256i) __builtin_ia32_vpdpwssds256_maskz ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) __U);
+ return (__m256i)__builtin_ia32_selectd_256(__U,
+ (__v8si)_mm256_dpwssds_epi32(__S, __A, __B),
+ (__v8si)_mm256_setzero_si256());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B)
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B)
{
- return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) -1);
+ return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A,
+ (__v4si)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
- return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) __U);
+ return (__m128i)__builtin_ia32_selectd_128(__U,
+ (__v4si)_mm_dpbusd_epi32(__S, __A, __B),
+ (__v4si)__S);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
- return (__m128i) __builtin_ia32_vpdpbusd128_maskz ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) __U);
+ return (__m128i)__builtin_ia32_selectd_128(__U,
+ (__v4si)_mm_dpbusd_epi32(__S, __A, __B),
+ (__v4si)_mm_setzero_si128());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B)
+_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B)
{
- return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) -1);
+ return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A,
+ (__v4si)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
- return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) __U);
+ return (__m128i)__builtin_ia32_selectd_128(__U,
+ (__v4si)_mm_dpbusds_epi32(__S, __A, __B),
+ (__v4si)__S);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
- return (__m128i) __builtin_ia32_vpdpbusds128_maskz ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) __U);
+ return (__m128i)__builtin_ia32_selectd_128(__U,
+ (__v4si)_mm_dpbusds_epi32(__S, __A, __B),
+ (__v4si)_mm_setzero_si128());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B)
+_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B)
{
- return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) -1);
+ return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A,
+ (__v4si)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
- return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) __U);
+ return (__m128i)__builtin_ia32_selectd_128(__U,
+ (__v4si)_mm_dpwssd_epi32(__S, __A, __B),
+ (__v4si)__S);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
- return (__m128i) __builtin_ia32_vpdpwssd128_maskz ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) __U);
+ return (__m128i)__builtin_ia32_selectd_128(__U,
+ (__v4si)_mm_dpwssd_epi32(__S, __A, __B),
+ (__v4si)_mm_setzero_si128());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B)
+_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B)
{
- return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) -1);
+ return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A,
+ (__v4si)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
- return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) __U);
+ return (__m128i)__builtin_ia32_selectd_128(__U,
+ (__v4si)_mm_dpwssds_epi32(__S, __A, __B),
+ (__v4si)__S);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
- return (__m128i) __builtin_ia32_vpdpwssds128_maskz ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) __U);
+ return (__m128i)__builtin_ia32_selectd_128(__U,
+ (__v4si)_mm_dpwssds_epi32(__S, __A, __B),
+ (__v4si)_mm_setzero_si128());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B)
-{
- return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) -1);
-}
-
-
#undef __DEFAULT_FN_ATTRS
#endif
Modified: cfe/trunk/lib/Headers/avx512vnniintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vnniintrin.h?rev=333509&r1=333508&r2=333509&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512vnniintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512vnniintrin.h Tue May 29 22:26:04 2018
@@ -33,114 +33,97 @@
static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B)
+{
+ return (__m512i)__builtin_ia32_vpdpbusd512((__v16si)__S, (__v16si)__A,
+ (__v16si)__B);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
+ return (__m512i)__builtin_ia32_selectd_512(__U,
+ (__v16si)_mm512_dpbusd_epi32(__S, __A, __B),
+ (__v16si)__S);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_vpdpbusd512_maskz ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
+ return (__m512i)__builtin_ia32_selectd_512(__U,
+ (__v16si)_mm512_dpbusd_epi32(__S, __A, __B),
+ (__v16si)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B)
+_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) -1);
+ return (__m512i)__builtin_ia32_vpdpbusds512((__v16si)__S, (__v16si)__A,
+ (__v16si)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
+ return (__m512i)__builtin_ia32_selectd_512(__U,
+ (__v16si)_mm512_dpbusds_epi32(__S, __A, __B),
+ (__v16si)__S);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_vpdpbusds512_maskz ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
+ return (__m512i)__builtin_ia32_selectd_512(__U,
+ (__v16si)_mm512_dpbusds_epi32(__S, __A, __B),
+ (__v16si)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B)
+_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) -1);
+ return (__m512i)__builtin_ia32_vpdpwssd512((__v16si)__S, (__v16si)__A,
+ (__v16si)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
+ return (__m512i)__builtin_ia32_selectd_512(__U,
+ (__v16si)_mm512_dpwssd_epi32(__S, __A, __B),
+ (__v16si)__S);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_vpdpwssd512_maskz ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
+ return (__m512i)__builtin_ia32_selectd_512(__U,
+ (__v16si)_mm512_dpwssd_epi32(__S, __A, __B),
+ (__v16si)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B)
+_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) -1);
+ return (__m512i)__builtin_ia32_vpdpwssds512((__v16si)__S, (__v16si)__A,
+ (__v16si)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
+ return (__m512i)__builtin_ia32_selectd_512(__U,
+ (__v16si)_mm512_dpwssds_epi32(__S, __A, __B),
+ (__v16si)__S);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_vpdpwssds512_maskz ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B)
-{
- return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) -1);
+ return (__m512i)__builtin_ia32_selectd_512(__U,
+ (__v16si)_mm512_dpwssds_epi32(__S, __A, __B),
+ (__v16si)_mm512_setzero_si512());
}
-
#undef __DEFAULT_FN_ATTRS
#endif
Modified: cfe/trunk/test/CodeGen/avx512vlvnni-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vlvnni-builtins.c?rev=333509&r1=333508&r2=333509&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512vlvnni-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512vlvnni-builtins.c Tue May 29 22:26:04 2018
@@ -4,145 +4,161 @@
__m256i test_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_mask_dpbusd_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpbusd.256
+ // CHECK: @llvm.x86.avx512.vpdpbusd.256
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_dpbusd_epi32(__S, __U, __A, __B);
}
__m256i test_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_maskz_dpbusd_epi32
- // CHECK: @llvm.x86.avx512.maskz.vpdpbusd.256
+ // CHECK: @llvm.x86.avx512.vpdpbusd.256
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_dpbusd_epi32(__U, __S, __A, __B);
}
__m256i test_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_dpbusd_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpbusd.256
+ // CHECK: @llvm.x86.avx512.vpdpbusd.256
return _mm256_dpbusd_epi32(__S, __A, __B);
}
__m256i test_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_mask_dpbusds_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpbusds.256
+ // CHECK: @llvm.x86.avx512.vpdpbusds.256
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_dpbusds_epi32(__S, __U, __A, __B);
}
__m256i test_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_maskz_dpbusds_epi32
- // CHECK: @llvm.x86.avx512.maskz.vpdpbusds.256
+ // CHECK: @llvm.x86.avx512.vpdpbusds.256
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_dpbusds_epi32(__U, __S, __A, __B);
}
__m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_dpbusds_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpbusds.256
+ // CHECK: @llvm.x86.avx512.vpdpbusds.256
return _mm256_dpbusds_epi32(__S, __A, __B);
}
__m256i test_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_mask_dpwssd_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpwssd.256
+ // CHECK: @llvm.x86.avx512.vpdpwssd.256
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_dpwssd_epi32(__S, __U, __A, __B);
}
__m256i test_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_maskz_dpwssd_epi32
- // CHECK: @llvm.x86.avx512.maskz.vpdpwssd.256
+ // CHECK: @llvm.x86.avx512.vpdpwssd.256
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_dpwssd_epi32(__U, __S, __A, __B);
}
__m256i test_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_dpwssd_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpwssd.256
+ // CHECK: @llvm.x86.avx512.vpdpwssd.256
return _mm256_dpwssd_epi32(__S, __A, __B);
}
__m256i test_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_mask_dpwssds_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpwssds.256
+ // CHECK: @llvm.x86.avx512.vpdpwssds.256
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_dpwssds_epi32(__S, __U, __A, __B);
}
__m256i test_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_maskz_dpwssds_epi32
- // CHECK: @llvm.x86.avx512.maskz.vpdpwssds.256
+ // CHECK: @llvm.x86.avx512.vpdpwssds.256
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_dpwssds_epi32(__U, __S, __A, __B);
}
__m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_dpwssds_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpwssds.256
+ // CHECK: @llvm.x86.avx512.vpdpwssds.256
return _mm256_dpwssds_epi32(__S, __A, __B);
}
__m128i test_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm_mask_dpbusd_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpbusd.128
+ // CHECK: @llvm.x86.avx512.vpdpbusd.128
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_dpbusd_epi32(__S, __U, __A, __B);
}
__m128i test_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm_maskz_dpbusd_epi32
- // CHECK: @llvm.x86.avx512.maskz.vpdpbusd.128
+ // CHECK: @llvm.x86.avx512.vpdpbusd.128
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_dpbusd_epi32(__U, __S, __A, __B);
}
__m128i test_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm_dpbusd_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpbusd.128
+ // CHECK: @llvm.x86.avx512.vpdpbusd.128
return _mm_dpbusd_epi32(__S, __A, __B);
}
__m128i test_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm_mask_dpbusds_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpbusds.128
+ // CHECK: @llvm.x86.avx512.vpdpbusds.128
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_dpbusds_epi32(__S, __U, __A, __B);
}
__m128i test_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm_maskz_dpbusds_epi32
- // CHECK: @llvm.x86.avx512.maskz.vpdpbusds.128
+ // CHECK: @llvm.x86.avx512.vpdpbusds.128
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_dpbusds_epi32(__U, __S, __A, __B);
}
__m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm_dpbusds_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpbusds.128
+ // CHECK: @llvm.x86.avx512.vpdpbusds.128
return _mm_dpbusds_epi32(__S, __A, __B);
}
__m128i test_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm_mask_dpwssd_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpwssd.128
+ // CHECK: @llvm.x86.avx512.vpdpwssd.128
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_dpwssd_epi32(__S, __U, __A, __B);
}
__m128i test_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm_maskz_dpwssd_epi32
- // CHECK: @llvm.x86.avx512.maskz.vpdpwssd.128
+ // CHECK: @llvm.x86.avx512.vpdpwssd.128
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_dpwssd_epi32(__U, __S, __A, __B);
}
__m128i test_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm_dpwssd_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpwssd.128
+ // CHECK: @llvm.x86.avx512.vpdpwssd.128
return _mm_dpwssd_epi32(__S, __A, __B);
}
__m128i test_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm_mask_dpwssds_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpwssds.128
+ // CHECK: @llvm.x86.avx512.vpdpwssds.128
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_dpwssds_epi32(__S, __U, __A, __B);
}
__m128i test_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm_maskz_dpwssds_epi32
- // CHECK: @llvm.x86.avx512.maskz.vpdpwssds.128
+ // CHECK: @llvm.x86.avx512.vpdpwssds.128
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_dpwssds_epi32(__U, __S, __A, __B);
}
__m128i test_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: @test_mm_dpwssds_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpwssds.128
+ // CHECK: @llvm.x86.avx512.vpdpwssds.128
return _mm_dpwssds_epi32(__S, __A, __B);
}
Modified: cfe/trunk/test/CodeGen/avx512vnni-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vnni-builtins.c?rev=333509&r1=333508&r2=333509&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512vnni-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512vnni-builtins.c Tue May 29 22:26:04 2018
@@ -4,73 +4,81 @@
__m512i test_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_mask_dpbusd_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpbusd.512
+ // CHECK: @llvm.x86.avx512.vpdpbusd.512
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_dpbusd_epi32(__S, __U, __A, __B);
}
__m512i test_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_maskz_dpbusd_epi32
- // CHECK: @llvm.x86.avx512.maskz.vpdpbusd.512
+ // CHECK: @llvm.x86.avx512.vpdpbusd.512
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_dpbusd_epi32(__U, __S, __A, __B);
}
__m512i test_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_dpbusd_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpbusd.512
+ // CHECK: @llvm.x86.avx512.vpdpbusd.512
return _mm512_dpbusd_epi32(__S, __A, __B);
}
__m512i test_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_mask_dpbusds_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpbusds.512
+ // CHECK: @llvm.x86.avx512.vpdpbusds.51
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_dpbusds_epi32(__S, __U, __A, __B);
}
__m512i test_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_maskz_dpbusds_epi32
- // CHECK: @llvm.x86.avx512.maskz.vpdpbusds.512
+ // CHECK: @llvm.x86.avx512.vpdpbusds.512
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_dpbusds_epi32(__U, __S, __A, __B);
}
__m512i test_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_dpbusds_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpbusds.512
+ // CHECK: @llvm.x86.avx512.vpdpbusds.512
return _mm512_dpbusds_epi32(__S, __A, __B);
}
__m512i test_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_mask_dpwssd_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpwssd.512
+ // CHECK: @llvm.x86.avx512.vpdpwssd.512
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_dpwssd_epi32(__S, __U, __A, __B);
}
__m512i test_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_maskz_dpwssd_epi32
- // CHECK: @llvm.x86.avx512.maskz.vpdpwssd.512
+ // CHECK: @llvm.x86.avx512.vpdpwssd.512
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_dpwssd_epi32(__U, __S, __A, __B);
}
__m512i test_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_dpwssd_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpwssd.512
+ // CHECK: @llvm.x86.avx512.vpdpwssd.512
return _mm512_dpwssd_epi32(__S, __A, __B);
}
__m512i test_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_mask_dpwssds_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpwssds.512
+ // CHECK: @llvm.x86.avx512.vpdpwssds.512
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_dpwssds_epi32(__S, __U, __A, __B);
}
__m512i test_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_maskz_dpwssds_epi32
- // CHECK: @llvm.x86.avx512.maskz.vpdpwssds.512
+ // CHECK: @llvm.x86.avx512.vpdpwssds.512
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_dpwssds_epi32(__U, __S, __A, __B);
}
__m512i test_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_dpwssds_epi32
- // CHECK: @llvm.x86.avx512.mask.vpdpwssds.512
+ // CHECK: @llvm.x86.avx512.vpdpwssds.512
return _mm512_dpwssds_epi32(__S, __A, __B);
}
More information about the cfe-commits
mailing list