r280197 - [AVX-512] Implement masked floating point logical operations with native IR and remove the builtins.
Craig Topper via cfe-commits
cfe-commits at lists.llvm.org
Tue Aug 30 22:38:59 PDT 2016
Author: ctopper
Date: Wed Aug 31 00:38:58 2016
New Revision: 280197
URL: http://llvm.org/viewvc/llvm-project?rev=280197&view=rev
Log:
[AVX-512] Implement masked floating point logical operations with native IR and remove the builtins.
Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/avx512dqintrin.h
cfe/trunk/lib/Headers/avx512vldqintrin.h
cfe/trunk/test/CodeGen/avx512dq-builtins.c
cfe/trunk/test/CodeGen/avx512vldq-builtins.c
Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=280197&r1=280196&r2=280197&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed Aug 31 00:38:58 2016
@@ -1131,32 +1131,8 @@ TARGET_BUILTIN(__builtin_ia32_psubd512_m
TARGET_BUILTIN(__builtin_ia32_pmulld512_mask, "V16iV16iV16iV16iUs", "", "avx512f")
TARGET_BUILTIN(__builtin_ia32_pmullq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_xorpd512_mask, "V8dV8dV8dV8dUc", "", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_xorps512_mask, "V16fV16fV16fV16fUs", "", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_orpd512_mask, "V8dV8dV8dV8dUc", "", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_orps512_mask, "V16fV16fV16fV16fUs", "", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andpd512_mask, "V8dV8dV8dV8dUc", "", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andps512_mask, "V16fV16fV16fV16fUs", "", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andnpd512_mask, "V8dV8dV8dV8dUc", "", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andnps512_mask, "V16fV16fV16fV16fUs", "", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_pmullq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl,avx512dq")
TARGET_BUILTIN(__builtin_ia32_pmullq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andnpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andnpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andnps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andnps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_andps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_xorpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_xorpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_xorps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_xorps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_orpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_orpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_orps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_orps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl,avx512dq")
TARGET_BUILTIN(__builtin_ia32_pabsb512_mask, "V64cV64cV64cULLi", "", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_pabsw512_mask, "V32sV32sV32sUi", "", "avx512bw")
Modified: cfe/trunk/lib/Headers/avx512dqintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512dqintrin.h?rev=280197&r1=280196&r2=280197&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512dqintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512dqintrin.h Wed Aug 31 00:38:58 2016
@@ -54,179 +54,155 @@ _mm512_maskz_mullo_epi64 (__mmask8 __U,
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_xor_pd (__m512d __A, __m512d __B) {
- return (__m512d) ((__v8du) __A ^ (__v8du) __B);
+_mm512_xor_pd(__m512d __A, __m512d __B) {
+ return (__m512d)((__v8du)__A ^ (__v8du)__B);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
- return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __W,
- (__mmask8) __U);
+_mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+ (__v8df)_mm512_xor_pd(__A, __B),
+ (__v8df)__W);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) {
- return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df)
- _mm512_setzero_pd (),
- (__mmask8) __U);
+_mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+ (__v8df)_mm512_xor_pd(__A, __B),
+ (__v8df)_mm512_setzero_pd());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_xor_ps (__m512 __A, __m512 __B) {
- return (__m512) ((__v16su) __A ^ (__v16su) __B);
+ return (__m512)((__v16su)__A ^ (__v16su)__B);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
- return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __W,
- (__mmask16) __U);
+_mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+ (__v16sf)_mm512_xor_ps(__A, __B),
+ (__v16sf)__W);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) {
- return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf)
- _mm512_setzero_ps (),
- (__mmask16) __U);
+_mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+ (__v16sf)_mm512_xor_ps(__A, __B),
+ (__v16sf)_mm512_setzero_ps());
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_or_pd (__m512d __A, __m512d __B) {
- return (__m512d) ((__v8du) __A | (__v8du) __B);
+_mm512_or_pd(__m512d __A, __m512d __B) {
+ return (__m512d)((__v8du)__A | (__v8du)__B);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
- return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __W,
- (__mmask8) __U);
+_mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+ (__v8df)_mm512_or_pd(__A, __B),
+ (__v8df)__W);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) {
- return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df)
- _mm512_setzero_pd (),
- (__mmask8) __U);
+_mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+ (__v8df)_mm512_or_pd(__A, __B),
+ (__v8df)_mm512_setzero_pd());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_or_ps (__m512 __A, __m512 __B) {
- return (__m512) ((__v16su) __A | (__v16su) __B);
+_mm512_or_ps(__m512 __A, __m512 __B) {
+ return (__m512)((__v16su)__A | (__v16su)__B);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
- return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __W,
- (__mmask16) __U);
+_mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+ (__v16sf)_mm512_or_ps(__A, __B),
+ (__v16sf)__W);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) {
- return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf)
- _mm512_setzero_ps (),
- (__mmask16) __U);
+_mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+ (__v16sf)_mm512_or_ps(__A, __B),
+ (__v16sf)_mm512_setzero_ps());
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_and_pd (__m512d __A, __m512d __B) {
- return (__m512d) ((__v8du) __A & (__v8du) __B);
+_mm512_and_pd(__m512d __A, __m512d __B) {
+ return (__m512d)((__v8du)__A & (__v8du)__B);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
- return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __W,
- (__mmask8) __U);
+_mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+ (__v8df)_mm512_and_pd(__A, __B),
+ (__v8df)__W);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) {
- return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df)
- _mm512_setzero_pd (),
- (__mmask8) __U);
+_mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+ (__v8df)_mm512_and_pd(__A, __B),
+ (__v8df)_mm512_setzero_pd());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_and_ps (__m512 __A, __m512 __B) {
- return (__m512) ((__v16su) __A & (__v16su) __B);
+_mm512_and_ps(__m512 __A, __m512 __B) {
+ return (__m512)((__v16su)__A & (__v16su)__B);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
- return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __W,
- (__mmask16) __U);
+_mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+ (__v16sf)_mm512_and_ps(__A, __B),
+ (__v16sf)__W);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) {
- return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf)
- _mm512_setzero_ps (),
- (__mmask16) __U);
+_mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+ (__v16sf)_mm512_and_ps(__A, __B),
+ (__v16sf)_mm512_setzero_ps());
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_andnot_pd (__m512d __A, __m512d __B) {
+_mm512_andnot_pd(__m512d __A, __m512d __B) {
return (__m512d)(~(__v8du)__A & (__v8du)__B);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
- return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df) __W,
- (__mmask8) __U);
+_mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+ (__v8df)_mm512_andnot_pd(__A, __B),
+ (__v8df)__W);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) {
- return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
- (__v8df) __B,
- (__v8df)
- _mm512_setzero_pd (),
- (__mmask8) __U);
+_mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+ return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+ (__v8df)_mm512_andnot_pd(__A, __B),
+ (__v8df)_mm512_setzero_pd());
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_andnot_ps (__m512 __A, __m512 __B) {
+_mm512_andnot_ps(__m512 __A, __m512 __B) {
return (__m512)(~(__v16su)__A & (__v16su)__B);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
- return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf) __W,
- (__mmask16) __U);
+_mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+ (__v16sf)_mm512_andnot_ps(__A, __B),
+ (__v16sf)__W);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) {
- return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
- (__v16sf) __B,
- (__v16sf)
- _mm512_setzero_ps (),
- (__mmask16) __U);
+_mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+ return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+ (__v16sf)_mm512_andnot_ps(__A, __B),
+ (__v16sf)_mm512_setzero_ps());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
Modified: cfe/trunk/lib/Headers/avx512vldqintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vldqintrin.h?rev=280197&r1=280196&r2=280197&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512vldqintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512vldqintrin.h Wed Aug 31 00:38:58 2016
@@ -76,276 +76,227 @@ _mm_maskz_mullo_epi64 (__mmask8 __U, __m
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
- return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
- (__v4df) __B,
- (__v4df) __W,
- (__mmask8) __U);
+_mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+ return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+ (__v4df)_mm256_andnot_pd(__A, __B),
+ (__v4df)__W);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) {
- return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
- (__v4df) __B,
- (__v4df)
- _mm256_setzero_pd (),
- (__mmask8) __U);
+_mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+ return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+ (__v4df)_mm256_andnot_pd(__A, __B),
+ (__v4df)_mm256_setzero_pd());
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
- return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __W,
- (__mmask8) __U);
+_mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+ return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+ (__v2df)_mm_andnot_pd(__A, __B),
+ (__v2df)__W);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) {
- return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) __U);
+_mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) {
+ return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+ (__v2df)_mm_andnot_pd(__A, __B),
+ (__v2df)_mm_setzero_pd());
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
- return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
- (__v8sf) __B,
- (__v8sf) __W,
- (__mmask8) __U);
+_mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+ return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+ (__v8sf)_mm256_andnot_ps(__A, __B),
+ (__v8sf)__W);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) {
- return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
- (__v8sf) __B,
- (__v8sf)
- _mm256_setzero_ps (),
- (__mmask8) __U);
+_mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+ return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+ (__v8sf)_mm256_andnot_ps(__A, __B),
+ (__v8sf)_mm256_setzero_ps());
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
- return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __W,
- (__mmask8) __U);
+_mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+ return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+ (__v4sf)_mm_andnot_ps(__A, __B),
+ (__v4sf)__W);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) {
- return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) __U);
+_mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) {
+ return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+ (__v4sf)_mm_andnot_ps(__A, __B),
+ (__v4sf)_mm_setzero_ps());
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
- return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
- (__v4df) __B,
- (__v4df) __W,
- (__mmask8) __U);
+_mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+ return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+ (__v4df)_mm256_and_pd(__A, __B),
+ (__v4df)__W);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) {
- return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
- (__v4df) __B,
- (__v4df)
- _mm256_setzero_pd (),
- (__mmask8) __U);
+_mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+ return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+ (__v4df)_mm256_and_pd(__A, __B),
+ (__v4df)_mm256_setzero_pd());
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
- return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __W,
- (__mmask8) __U);
+_mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+ return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+ (__v2df)_mm_and_pd(__A, __B),
+ (__v2df)__W);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) {
- return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) __U);
+_mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) {
+ return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+ (__v2df)_mm_and_pd(__A, __B),
+ (__v2df)_mm_setzero_pd());
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
- return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
- (__v8sf) __B,
- (__v8sf) __W,
- (__mmask8) __U);
+_mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+ return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+ (__v8sf)_mm256_and_ps(__A, __B),
+ (__v8sf)__W);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) {
- return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
- (__v8sf) __B,
- (__v8sf)
- _mm256_setzero_ps (),
- (__mmask8) __U);
+_mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+ return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+ (__v8sf)_mm256_and_ps(__A, __B),
+ (__v8sf)_mm256_setzero_ps());
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
- return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __W,
- (__mmask8) __U);
+_mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+ return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+ (__v4sf)_mm_and_ps(__A, __B),
+ (__v4sf)__W);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) {
- return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) __U);
+_mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) {
+ return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+ (__v4sf)_mm_and_ps(__A, __B),
+ (__v4sf)_mm_setzero_ps());
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
- __m256d __B) {
- return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
- (__v4df) __B,
- (__v4df) __W,
- (__mmask8) __U);
+_mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+ return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+ (__v4df)_mm256_xor_pd(__A, __B),
+ (__v4df)__W);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) {
- return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
- (__v4df) __B,
- (__v4df)
- _mm256_setzero_pd (),
- (__mmask8) __U);
+_mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+ return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+ (__v4df)_mm256_xor_pd(__A, __B),
+ (__v4df)_mm256_setzero_pd());
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
- return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __W,
- (__mmask8) __U);
+_mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+ return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+ (__v2df)_mm_xor_pd(__A, __B),
+ (__v2df)__W);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
- return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) __U);
+ return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+ (__v2df)_mm_xor_pd(__A, __B),
+ (__v2df)_mm_setzero_pd());
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
- return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
- (__v8sf) __B,
- (__v8sf) __W,
- (__mmask8) __U);
+_mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+ return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+ (__v8sf)_mm256_xor_ps(__A, __B),
+ (__v8sf)__W);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) {
- return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
- (__v8sf) __B,
- (__v8sf)
- _mm256_setzero_ps (),
- (__mmask8) __U);
+_mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+ return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+ (__v8sf)_mm256_xor_ps(__A, __B),
+ (__v8sf)_mm256_setzero_ps());
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
- return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __W,
- (__mmask8) __U);
+_mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+ return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+ (__v4sf)_mm_xor_ps(__A, __B),
+ (__v4sf)__W);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) {
- return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) __U);
+_mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) {
+ return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+ (__v4sf)_mm_xor_ps(__A, __B),
+ (__v4sf)_mm_setzero_ps());
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
- return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
- (__v4df) __B,
- (__v4df) __W,
- (__mmask8) __U);
+_mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+ return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+ (__v4df)_mm256_or_pd(__A, __B),
+ (__v4df)__W);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) {
- return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
- (__v4df) __B,
- (__v4df)
- _mm256_setzero_pd (),
- (__mmask8) __U);
+_mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+ return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+ (__v4df)_mm256_or_pd(__A, __B),
+ (__v4df)_mm256_setzero_pd());
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
- return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __W,
- (__mmask8) __U);
+_mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+ return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+ (__v2df)_mm_or_pd(__A, __B),
+ (__v2df)__W);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) {
- return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) __U);
+_mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) {
+ return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+ (__v2df)_mm_or_pd(__A, __B),
+ (__v2df)_mm_setzero_pd());
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
- return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
- (__v8sf) __B,
- (__v8sf) __W,
- (__mmask8) __U);
+_mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+ return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+ (__v8sf)_mm256_or_ps(__A, __B),
+ (__v8sf)__W);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) {
- return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
- (__v8sf) __B,
- (__v8sf)
- _mm256_setzero_ps (),
- (__mmask8) __U);
+_mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+ return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+ (__v8sf)_mm256_or_ps(__A, __B),
+ (__v8sf)_mm256_setzero_ps());
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
- return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __W,
- (__mmask8) __U);
+_mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+ return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+ (__v4sf)_mm_or_ps(__A, __B),
+ (__v4sf)__W);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) {
- return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) __U);
+_mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) {
+ return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+ (__v4sf)_mm_or_ps(__A, __B),
+ (__v4sf)_mm_setzero_ps());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
Modified: cfe/trunk/test/CodeGen/avx512dq-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512dq-builtins.c?rev=280197&r1=280196&r2=280197&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512dq-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512dq-builtins.c Wed Aug 31 00:38:58 2016
@@ -31,13 +31,17 @@ __m512d test_mm512_xor_pd (__m512d __A,
__m512d test_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_mask_xor_pd
- // CHECK: @llvm.x86.avx512.mask.xor.pd.512
+ // CHECK: xor <8 x i64>
+ // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}}
return (__m512d) _mm512_mask_xor_pd(__W, __U, __A, __B);
}
__m512d test_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_maskz_xor_pd
- // CHECK: @llvm.x86.avx512.mask.xor.pd.512
+ // CHECK: xor <8 x i64>
+ // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}}
return (__m512d) _mm512_maskz_xor_pd(__U, __A, __B);
}
@@ -49,13 +53,17 @@ __m512 test_mm512_xor_ps (__m512 __A, __
__m512 test_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_mask_xor_ps
- // CHECK: @llvm.x86.avx512.mask.xor.ps.512
+ // CHECK: xor <16 x i32>
+ // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}}
return (__m512) _mm512_mask_xor_ps(__W, __U, __A, __B);
}
__m512 test_mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_maskz_xor_ps
- // CHECK: @llvm.x86.avx512.mask.xor.ps.512
+ // CHECK: xor <16 x i32>
+ // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}}
return (__m512) _mm512_maskz_xor_ps(__U, __A, __B);
}
@@ -67,13 +75,17 @@ __m512d test_mm512_or_pd (__m512d __A, _
__m512d test_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_mask_or_pd
- // CHECK: @llvm.x86.avx512.mask.or.pd.512
+ // CHECK: or <8 x i64>
+ // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}}
return (__m512d) _mm512_mask_or_pd(__W, __U, __A, __B);
}
__m512d test_mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_maskz_or_pd
- // CHECK: @llvm.x86.avx512.mask.or.pd.512
+ // CHECK: or <8 x i64>
+ // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}}
return (__m512d) _mm512_maskz_or_pd(__U, __A, __B);
}
@@ -85,13 +97,17 @@ __m512 test_mm512_or_ps (__m512 __A, __m
__m512 test_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_mask_or_ps
- // CHECK: @llvm.x86.avx512.mask.or.ps.512
+ // CHECK: or <16 x i32>
+ // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}}
return (__m512) _mm512_mask_or_ps(__W, __U, __A, __B);
}
__m512 test_mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_maskz_or_ps
- // CHECK: @llvm.x86.avx512.mask.or.ps.512
+ // CHECK: or <16 x i32>
+ // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}}
return (__m512) _mm512_maskz_or_ps(__U, __A, __B);
}
@@ -103,13 +119,17 @@ __m512d test_mm512_and_pd (__m512d __A,
__m512d test_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_mask_and_pd
- // CHECK: @llvm.x86.avx512.mask.and.pd.512
+ // CHECK: and <8 x i64>
+ // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}}
return (__m512d) _mm512_mask_and_pd(__W, __U, __A, __B);
}
__m512d test_mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_maskz_and_pd
- // CHECK: @llvm.x86.avx512.mask.and.pd.512
+ // CHECK: and <8 x i64>
+ // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}}
return (__m512d) _mm512_maskz_and_pd(__U, __A, __B);
}
@@ -121,13 +141,17 @@ __m512 test_mm512_and_ps (__m512 __A, __
__m512 test_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_mask_and_ps
- // CHECK: @llvm.x86.avx512.mask.and.ps.512
+ // CHECK: and <16 x i32>
+ // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}}
return (__m512) _mm512_mask_and_ps(__W, __U, __A, __B);
}
__m512 test_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_maskz_and_ps
- // CHECK: @llvm.x86.avx512.mask.and.ps.512
+ // CHECK: and <16 x i32>
+ // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}}
return (__m512) _mm512_maskz_and_ps(__U, __A, __B);
}
@@ -140,13 +164,17 @@ __m512d test_mm512_andnot_pd (__m512d __
__m512d test_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_mask_andnot_pd
- // CHECK: @llvm.x86.avx512.mask.andn.pd.512
+ // CHECK: xor <8 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+ // CHECK: and <8 x i64> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return (__m512d) _mm512_mask_andnot_pd(__W, __U, __A, __B);
}
__m512d test_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_maskz_andnot_pd
- // CHECK: @llvm.x86.avx512.mask.andn.pd.512
+ // CHECK: xor <8 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+ // CHECK: and <8 x i64> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return (__m512d) _mm512_maskz_andnot_pd(__U, __A, __B);
}
@@ -159,13 +187,17 @@ __m512 test_mm512_andnot_ps (__m512 __A,
__m512 test_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_mask_andnot_ps
- // CHECK: @llvm.x86.avx512.mask.andn.ps.512
+ // CHECK: xor <16 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+ // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return (__m512) _mm512_mask_andnot_ps(__W, __U, __A, __B);
}
__m512 test_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_maskz_andnot_ps
- // CHECK: @llvm.x86.avx512.mask.andn.ps.512
+ // CHECK: xor <16 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+ // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return (__m512) _mm512_maskz_andnot_ps(__U, __A, __B);
}
Modified: cfe/trunk/test/CodeGen/avx512vldq-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vldq-builtins.c?rev=280197&r1=280196&r2=280197&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512vldq-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512vldq-builtins.c Wed Aug 31 00:38:58 2016
@@ -43,193 +43,233 @@ __m128i test_mm_maskz_mullo_epi64 (__mma
__m256d test_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
// CHECK-LABEL: @test_mm256_mask_andnot_pd
- // CHECK: @llvm.x86.avx512.mask.andn.pd.256
+ // CHECK: xor <4 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1>
+ // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+ // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return (__m256d) _mm256_mask_andnot_pd ( __W, __U, __A, __B);
}
__m256d test_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) {
// CHECK-LABEL: @test_mm256_maskz_andnot_pd
- // CHECK: @llvm.x86.avx512.mask.andn.pd.256
+ // CHECK: xor <4 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1>
+ // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+ // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return (__m256d) _mm256_maskz_andnot_pd (__U, __A, __B);
}
__m128d test_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: @test_mm_mask_andnot_pd
- // CHECK: @llvm.x86.avx512.mask.andn.pd.128
+ // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
+ // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+ // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return (__m128d) _mm_mask_andnot_pd ( __W, __U, __A, __B);
}
__m128d test_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: @test_mm_maskz_andnot_pd
- // CHECK: @llvm.x86.avx512.mask.andn.pd.128
+ // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
+ // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+ // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return (__m128d) _mm_maskz_andnot_pd (__U, __A, __B);
}
__m256 test_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
// CHECK-LABEL: @test_mm256_mask_andnot_ps
- // CHECK: @llvm.x86.avx512.mask.andn.ps.256
+ // CHECK: xor <8 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+ // CHECK: and <8 x i32> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return (__m256) _mm256_mask_andnot_ps ( __W, __U, __A, __B);
}
__m256 test_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) {
// CHECK-LABEL: @test_mm256_maskz_andnot_ps
- // CHECK: @llvm.x86.avx512.mask.andn.ps.256
+ // CHECK: xor <8 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+ // CHECK: and <8 x i32> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return (__m256) _mm256_maskz_andnot_ps (__U, __A, __B);
}
__m128 test_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: @test_mm_mask_andnot_ps
- // CHECK: @llvm.x86.avx512.mask.andn.ps.128
+ // CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+ // CHECK: and <4 x i32> %{{.*}}, %{{.*}}
+ // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return (__m128) _mm_mask_andnot_ps ( __W, __U, __A, __B);
}
__m128 test_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: @test_mm_maskz_andnot_ps
- // CHECK: @llvm.x86.avx512.mask.andn.ps.128
+ // CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+ // CHECK: and <4 x i32> %{{.*}}, %{{.*}}
+ // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return (__m128) _mm_maskz_andnot_ps (__U, __A, __B);
}
__m256d test_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
// CHECK-LABEL: @test_mm256_mask_and_pd
- // CHECK: @llvm.x86.avx512.mask.and.pd.256
+ // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+ // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return (__m256d) _mm256_mask_and_pd ( __W, __U, __A, __B);
}
__m256d test_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) {
// CHECK-LABEL: @test_mm256_maskz_and_pd
- // CHECK: @llvm.x86.avx512.mask.and.pd.256
+ // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
+ // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return (__m256d) _mm256_maskz_and_pd (__U, __A, __B);
}
__m128d test_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: @test_mm_mask_and_pd
- // CHECK: @llvm.x86.avx512.mask.and.pd.128
+ // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+ // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return (__m128d) _mm_mask_and_pd ( __W, __U, __A, __B);
}
__m128d test_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: @test_mm_maskz_and_pd
- // CHECK: @llvm.x86.avx512.mask.and.pd.128
+ // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
+ // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return (__m128d) _mm_maskz_and_pd (__U, __A, __B);
}
__m256 test_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
// CHECK-LABEL: @test_mm256_mask_and_ps
- // CHECK: @llvm.x86.avx512.mask.and.ps.256
+ // CHECK: and <8 x i32> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return (__m256) _mm256_mask_and_ps ( __W, __U, __A, __B);
}
__m256 test_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) {
// CHECK-LABEL: @test_mm256_maskz_and_ps
- // CHECK: @llvm.x86.avx512.mask.and.ps.256
+ // CHECK: and <8 x i32> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return (__m256) _mm256_maskz_and_ps (__U, __A, __B);
}
__m128 test_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: @test_mm_mask_and_ps
- // CHECK: @llvm.x86.avx512.mask.and.ps.128
+ // CHECK: and <4 x i32> %{{.*}}, %{{.*}}
+ // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return (__m128) _mm_mask_and_ps ( __W, __U, __A, __B);
}
__m128 test_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: @test_mm_maskz_and_ps
- // CHECK: @llvm.x86.avx512.mask.and.ps.128
+ // CHECK: and <4 x i32> %{{.*}}, %{{.*}}
+ // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return (__m128) _mm_maskz_and_ps (__U, __A, __B);
}
__m256d test_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
// CHECK-LABEL: @test_mm256_mask_xor_pd
- // CHECK: @llvm.x86.avx512.mask.xor.pd.256
+ // CHECK: xor <4 x i64> %{{.*}}, %{{.*}}
+ // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return (__m256d) _mm256_mask_xor_pd ( __W, __U, __A, __B);
}
__m256d test_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) {
// CHECK-LABEL: @test_mm256_maskz_xor_pd
- // CHECK: @llvm.x86.avx512.mask.xor.pd.256
+ // CHECK: xor <4 x i64> %{{.*}}, %{{.*}}
+ // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return (__m256d) _mm256_maskz_xor_pd (__U, __A, __B);
}
__m128d test_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: @test_mm_mask_xor_pd
- // CHECK: @llvm.x86.avx512.mask.xor.pd.128
+ // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
+ // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return (__m128d) _mm_mask_xor_pd ( __W, __U, __A, __B);
}
__m128d test_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: @test_mm_maskz_xor_pd
- // CHECK: @llvm.x86.avx512.mask.xor.pd.128
+ // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
+ // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return (__m128d) _mm_maskz_xor_pd (__U, __A, __B);
}
__m256 test_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
// CHECK-LABEL: @test_mm256_mask_xor_ps
- // CHECK: @llvm.x86.avx512.mask.xor.ps.256
+ // CHECK: xor <8 x i32> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return (__m256) _mm256_mask_xor_ps ( __W, __U, __A, __B);
}
__m256 test_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) {
// CHECK-LABEL: @test_mm256_maskz_xor_ps
- // CHECK: @llvm.x86.avx512.mask.xor.ps.256
+ // CHECK: xor <8 x i32> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return (__m256) _mm256_maskz_xor_ps (__U, __A, __B);
}
__m128 test_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: @test_mm_mask_xor_ps
- // CHECK: @llvm.x86.avx512.mask.xor.ps.128
- return (__m128) _mm_mask_xor_ps ( __W, __U, __A, __B);
+ // CHECK: xor <4 x i32> %{{.*}}, %{{.*}}
+ // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
+ return (__m128) _mm_mask_xor_ps ( __W, __U, __A, __B);
}
__m128 test_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: @test_mm_maskz_xor_ps
- // CHECK: @llvm.x86.avx512.mask.xor.ps.128
+ // CHECK: xor <4 x i32> %{{.*}}, %{{.*}}
+ // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return (__m128) _mm_maskz_xor_ps (__U, __A, __B);
}
__m256d test_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
// CHECK-LABEL: @test_mm256_mask_or_pd
- // CHECK: @llvm.x86.avx512.mask.or.pd.256
+ // CHECK: or <4 x i64> %{{.*}}, %{{.*}}
+ // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return (__m256d) _mm256_mask_or_pd ( __W, __U, __A, __B);
}
__m256d test_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) {
// CHECK-LABEL: @test_mm256_maskz_or_pd
- // CHECK: @llvm.x86.avx512.mask.or.pd.256
+ // CHECK: or <4 x i64> %{{.*}}, %{{.*}}
+ // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return (__m256d) _mm256_maskz_or_pd (__U, __A, __B);
}
__m128d test_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: @test_mm_mask_or_pd
- // CHECK: @llvm.x86.avx512.mask.or.pd.128
+ // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
+ // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return (__m128d) _mm_mask_or_pd ( __W, __U, __A, __B);
}
__m128d test_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: @test_mm_maskz_or_pd
- // CHECK: @llvm.x86.avx512.mask.or.pd.128
+ // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
+ // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return (__m128d) _mm_maskz_or_pd (__U, __A, __B);
}
__m256 test_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
// CHECK-LABEL: @test_mm256_mask_or_ps
- // CHECK: @llvm.x86.avx512.mask.or.ps.256
+ // CHECK: or <8 x i32> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return (__m256) _mm256_mask_or_ps ( __W, __U, __A, __B);
}
__m256 test_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) {
// CHECK-LABEL: @test_mm256_maskz_or_ps
- // CHECK: @llvm.x86.avx512.mask.or.ps.256
+ // CHECK: or <8 x i32> %{{.*}}, %{{.*}}
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return (__m256) _mm256_maskz_or_ps (__U, __A, __B);
}
__m128 test_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: @test_mm_mask_or_ps
- // CHECK: @llvm.x86.avx512.mask.or.ps.128
+ // CHECK: or <4 x i32> %{{.*}}, %{{.*}}
+ // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return (__m128) _mm_mask_or_ps ( __W, __U, __A, __B);
}
__m128 test_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: @test_mm_maskz_or_ps
- // CHECK: @llvm.x86.avx512.mask.or.ps.128
+ // CHECK: or <4 x i32> %{{.*}}, %{{.*}}
+ // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return (__m128) _mm_maskz_or_ps(__U, __A, __B);
}
More information about the cfe-commits
mailing list