r334330 - [X86] Fold masking into subvector extract builtins.
Craig Topper via cfe-commits
cfe-commits at lists.llvm.org
Fri Jun 8 14:50:07 PDT 2018
Author: ctopper
Date: Fri Jun 8 14:50:07 2018
New Revision: 334330
URL: http://llvm.org/viewvc/llvm-project?rev=334330&view=rev
Log:
[X86] Fold masking into subvector extract builtins.
I'm looking into making the select builtins require avx512f, avx512bw, or avx512vl since masking operations generally require those features.
The extract builtins are funny because the 512-bit versions return a 128 or 256 bit vector with masking even when avx512vl is not supported.
Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/Headers/avx512dqintrin.h
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/lib/Headers/avx512vldqintrin.h
cfe/trunk/lib/Headers/avx512vlintrin.h
cfe/trunk/lib/Sema/SemaChecking.cpp
Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=334330&r1=334329&r2=334330&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Fri Jun 8 14:50:07 2018
@@ -947,8 +947,8 @@ TARGET_BUILTIN(__builtin_ia32_alignd128,
TARGET_BUILTIN(__builtin_ia32_alignd256, "V8iV8iV8iIi", "nc", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_alignq128, "V2LLiV2LLiV2LLiIi", "nc", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_alignq256, "V4LLiV4LLiV4LLiIi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extractf64x4, "V4dV8dIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_extractf32x4, "V4fV16fIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_extractf64x4_mask, "V4dV8dIiV4dUc", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_extractf32x4_mask, "V4fV16fIiV4fUc", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vpdpbusd128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpbusd256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni")
@@ -1672,16 +1672,16 @@ TARGET_BUILTIN(__builtin_ia32_pmovqw128_
TARGET_BUILTIN(__builtin_ia32_pmovqw128mem_mask, "vV8s*V2LLiUc", "n", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovqw256_mask, "V8sV4LLiV8sUc", "nc", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovqw256mem_mask, "vV8s*V4LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extractf32x8, "V8fV16fIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_extractf64x2_512, "V2dV8dIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_extracti32x8, "V8iV16iIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_extracti64x2_512, "V2LLiV8LLiIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_extracti32x4, "V4iV16iIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_extracti64x4, "V4LLiV8LLiIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_extractf64x2_256, "V2dV4dIi", "nc", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extracti64x2_256, "V2LLiV4LLiIi", "nc", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extractf32x4_256, "V4fV8fIi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extracti32x4_256, "V4iV8iIi", "nc", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_extractf32x8_mask, "V8fV16fIiV8fUc", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_extractf64x2_512_mask, "V2dV8dIiV2dUc", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_extracti32x8_mask, "V8iV16iIiV8iUc", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_extracti64x2_512_mask, "V2LLiV8LLiIiV2LLiUc", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_extracti32x4_mask, "V4iV16iIiV4iUc", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_extracti64x4_mask, "V4LLiV8LLiIiV4LLiUc", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_extractf64x2_256_mask, "V2dV4dIiV2dUc", "nc", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_extracti64x2_256_mask, "V2LLiV4LLiIiV2LLiUc", "nc", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_extractf32x4_256_mask, "V4fV8fIiV4fUc", "nc", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_extracti32x4_256_mask, "V4iV8iIiV4iUc", "nc", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_insertf32x8, "V16fV16fV8fIi", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_insertf64x2_512, "V8dV8dV2dIi", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_inserti32x8, "V16iV16iV8iIi", "nc", "avx512dq")
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=334330&r1=334329&r2=334330&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Jun 8 14:50:07 2018
@@ -9239,18 +9239,18 @@ Value *CodeGenFunction::EmitX86BuiltinEx
case X86::BI__builtin_ia32_vextractf128_ps256:
case X86::BI__builtin_ia32_vextractf128_si256:
case X86::BI__builtin_ia32_extract128i256:
- case X86::BI__builtin_ia32_extractf64x4:
- case X86::BI__builtin_ia32_extractf32x4:
- case X86::BI__builtin_ia32_extracti64x4:
- case X86::BI__builtin_ia32_extracti32x4:
- case X86::BI__builtin_ia32_extractf32x8:
- case X86::BI__builtin_ia32_extracti32x8:
- case X86::BI__builtin_ia32_extractf32x4_256:
- case X86::BI__builtin_ia32_extracti32x4_256:
- case X86::BI__builtin_ia32_extractf64x2_256:
- case X86::BI__builtin_ia32_extracti64x2_256:
- case X86::BI__builtin_ia32_extractf64x2_512:
- case X86::BI__builtin_ia32_extracti64x2_512: {
+ case X86::BI__builtin_ia32_extractf64x4_mask:
+ case X86::BI__builtin_ia32_extractf32x4_mask:
+ case X86::BI__builtin_ia32_extracti64x4_mask:
+ case X86::BI__builtin_ia32_extracti32x4_mask:
+ case X86::BI__builtin_ia32_extractf32x8_mask:
+ case X86::BI__builtin_ia32_extracti32x8_mask:
+ case X86::BI__builtin_ia32_extractf32x4_256_mask:
+ case X86::BI__builtin_ia32_extracti32x4_256_mask:
+ case X86::BI__builtin_ia32_extractf64x2_256_mask:
+ case X86::BI__builtin_ia32_extracti64x2_256_mask:
+ case X86::BI__builtin_ia32_extractf64x2_512_mask:
+ case X86::BI__builtin_ia32_extracti64x2_512_mask: {
llvm::Type *DstTy = ConvertType(E->getType());
unsigned NumElts = DstTy->getVectorNumElements();
unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue() * NumElts;
@@ -9259,10 +9259,15 @@ Value *CodeGenFunction::EmitX86BuiltinEx
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i + Index;
- return Builder.CreateShuffleVector(Ops[0],
- UndefValue::get(Ops[0]->getType()),
- makeArrayRef(Indices, NumElts),
- "extract");
+ Value *Res = Builder.CreateShuffleVector(Ops[0],
+ UndefValue::get(Ops[0]->getType()),
+ makeArrayRef(Indices, NumElts),
+ "extract");
+
+ if (Ops.size() == 4)
+ Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
+
+ return Res;
}
case X86::BI__builtin_ia32_vinsertf128_pd256:
case X86::BI__builtin_ia32_vinsertf128_ps256:
Modified: cfe/trunk/lib/Headers/avx512dqintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512dqintrin.h?rev=334330&r1=334329&r2=334330&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512dqintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512dqintrin.h Fri Jun 8 14:50:07 2018
@@ -1103,56 +1103,70 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __
}
#define _mm512_extractf32x8_ps(A, imm) \
- (__m256)__builtin_ia32_extractf32x8((__v16sf)(__m512)(A), (int)(imm))
+ (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
+ (__v8sf)_mm256_undefined_ps(), \
+ (__mmask8)-1);
#define _mm512_mask_extractf32x8_ps(W, U, A, imm) \
- (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
- (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \
- (__v8sf)(__m256)(W))
+ (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
+ (__v8sf)(__m256)(W), \
+ (__mmask8)(U))
#define _mm512_maskz_extractf32x8_ps(U, A, imm) \
- (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
- (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \
- (__v8sf)_mm256_setzero_ps())
+ (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
+ (__v8sf)_mm256_setzero_ps(), \
+ (__mmask8)(U))
#define _mm512_extractf64x2_pd(A, imm) \
- (__m128d)__builtin_ia32_extractf64x2_512((__v8df)(__m512d)(A), (int)(imm))
+ (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
+ (int)(imm), \
+ (__v2df)_mm_undefined_pd(), \
+ (__mmask8)-1)
#define _mm512_mask_extractf64x2_pd(W, U, A, imm) \
- (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
- (__v2df)_mm512_extractf64x2_pd((A), (imm)), \
- (__v2df)(__m128d)(W))
+ (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
+ (int)(imm), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U))
#define _mm512_maskz_extractf64x2_pd(U, A, imm) \
- (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
- (__v2df)_mm512_extractf64x2_pd((A), (imm)), \
- (__v2df)_mm_setzero_pd())
+ (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
+ (int)(imm), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U))
#define _mm512_extracti32x8_epi32(A, imm) \
- (__m256i)__builtin_ia32_extracti32x8((__v16si)(__m512i)(A), (int)(imm))
+ (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
+ (__v8si)_mm256_undefined_si256(), \
+ (__mmask8)-1)
#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \
- (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
- (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \
- (__v8si)(__m256i)(W))
+ (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
+ (__v8si)(__m256i)(W), \
+ (__mmask8)(U))
#define _mm512_maskz_extracti32x8_epi32(U, A, imm) \
- (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
- (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \
- (__v8si)_mm256_setzero_si256())
+ (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
+ (__v8si)_mm256_setzero_si256(), \
+ (__mmask8)(U))
#define _mm512_extracti64x2_epi64(A, imm) \
- (__m128i)__builtin_ia32_extracti64x2_512((__v8di)(__m512i)(A), (int)(imm))
+ (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
+ (int)(imm), \
+ (__v2di)_mm_undefined_si128(), \
+ (__mmask8)-1)
#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \
- (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \
- (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \
- (__v2di)(__m128i)(W))
+ (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
+ (int)(imm), \
+ (__v2di)(__m128i)(W), \
+ (__mmask8)(U))
#define _mm512_maskz_extracti64x2_epi64(U, A, imm) \
- (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \
- (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \
- (__v2di)_mm_setzero_si128())
+ (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
+ (int)(imm), \
+ (__v2di)_mm_setzero_si128(), \
+ (__mmask8)(U))
#define _mm512_insertf32x8(A, B, imm) \
(__m512)__builtin_ia32_insertf32x8((__v16sf)(__m512)(A), \
Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=334330&r1=334329&r2=334330&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Fri Jun 8 14:50:07 2018
@@ -3494,30 +3494,34 @@ _mm512_maskz_permutex2var_epi64(__mmask8
/* Vector Extract */
#define _mm512_extractf64x4_pd(A, I) \
- (__m256d)__builtin_ia32_extractf64x4((__v8df)(__m512d)(A), (int)(I))
+ (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
+ (__v4df)_mm256_undefined_si256(), \
+ (__mmask8)-1)
#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
- (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
- (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
- (__v4df)(__m256d)(W))
+ (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
+ (__v4df)(__m256d)(W), \
+ (__mmask8)(U))
#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
- (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
- (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
- (__v4df)_mm256_setzero_pd())
+ (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
+ (__v4df)_mm256_setzero_pd(), \
+ (__mmask8)(U))
#define _mm512_extractf32x4_ps(A, I) \
- (__m128)__builtin_ia32_extractf32x4((__v16sf)(__m512)(A), (int)(I))
+ (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
+ (__v4sf)_mm_undefined_ps(), \
+ (__mmask8)-1)
#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
- (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
- (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
- (__v4sf)(__m128)(W))
+ (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U))
#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
- (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
- (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
- (__v4sf)_mm_setzero_ps())
+ (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U))
/* Vector Blend */
@@ -7534,30 +7538,34 @@ _mm512_mask_cvtepi64_storeu_epi16 (void
}
#define _mm512_extracti32x4_epi32(A, imm) \
- (__m128i)__builtin_ia32_extracti32x4((__v16si)(__m512i)(A), (int)(imm))
+ (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
+ (__v4si)_mm_undefined_si128(), \
+ (__mmask8)-1)
#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
- (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
- (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
- (__v4si)(__m128i)(W))
+ (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
+ (__v4si)(__m128i)(W), \
+ (__mmask8)(U))
#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
- (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
- (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
- (__v4si)_mm_setzero_si128())
+ (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
+ (__v4si)_mm_setzero_si128(), \
+ (__mmask8)(U))
#define _mm512_extracti64x4_epi64(A, imm) \
- (__m256i)__builtin_ia32_extracti64x4((__v8di)(__m512i)(A), (int)(imm))
+ (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
+ (__v4di)_mm256_undefined_si256(), \
+ (__mmask8)-1)
#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
- (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
- (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
- (__v4di)(__m256i)(W))
+ (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
+ (__v4di)(__m256i)(W), \
+ (__mmask8)(U))
#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
- (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
- (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
- (__v4di)_mm256_setzero_si256())
+ (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
+ (__v4di)_mm256_setzero_si256(), \
+ (__mmask8)(U))
#define _mm512_insertf64x4(A, B, imm) \
(__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
Modified: cfe/trunk/lib/Headers/avx512vldqintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vldqintrin.h?rev=334330&r1=334329&r2=334330&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512vldqintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512vldqintrin.h Fri Jun 8 14:50:07 2018
@@ -1083,30 +1083,40 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 _
}
#define _mm256_extractf64x2_pd(A, imm) \
- (__m128d)__builtin_ia32_extractf64x2_256((__v4df)(__m256d)(A), (int)(imm))
+ (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
+ (int)(imm), \
+ (__v2df)_mm_undefined_pd(), \
+ (__mmask8)-1)
#define _mm256_mask_extractf64x2_pd(W, U, A, imm) \
- (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
- (__v2df)_mm256_extractf64x2_pd((A), (imm)), \
- (__v2df)(__m128d)(W))
+ (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
+ (int)(imm), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U))
#define _mm256_maskz_extractf64x2_pd(U, A, imm) \
- (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
- (__v2df)_mm256_extractf64x2_pd((A), (imm)), \
- (__v2df)_mm_setzero_pd())
+ (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
+ (int)(imm), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U))
#define _mm256_extracti64x2_epi64(A, imm) \
- (__m128i)__builtin_ia32_extracti64x2_256((__v4di)(__m256i)(A), (int)(imm))
+ (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
+ (int)(imm), \
+ (__v2di)_mm_undefined_si128(), \
+ (__mmask8)-1)
#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \
- (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
- (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \
- (__v2di)(__m128i)(W))
+ (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
+ (int)(imm), \
+ (__v2di)(__m128i)(W), \
+ (__mmask8)(U))
#define _mm256_maskz_extracti64x2_epi64(U, A, imm) \
- (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
- (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \
- (__v2di)_mm_setzero_si128())
+ (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
+ (int)(imm), \
+ (__v2di)_mm_setzero_si128(), \
+ (__mmask8)(U))
#define _mm256_insertf64x2(A, B, imm) \
(__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \
Modified: cfe/trunk/lib/Headers/avx512vlintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlintrin.h?rev=334330&r1=334329&r2=334330&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512vlintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512vlintrin.h Fri Jun 8 14:50:07 2018
@@ -7699,30 +7699,40 @@ _mm256_mask_cvtepi64_storeu_epi16 (void
}
#define _mm256_extractf32x4_ps(A, imm) \
- (__m128)__builtin_ia32_extractf32x4_256((__v8sf)(__m256)(A), (int)(imm))
+ (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
+ (int)(imm), \
+ (__v4sf)_mm_undefined_ps(), \
+ (__mmask8)-1)
#define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
- (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
- (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \
- (__v4sf)(__m128)(W))
+ (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
+ (int)(imm), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U))
#define _mm256_maskz_extractf32x4_ps(U, A, imm) \
- (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
- (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \
- (__v4sf)_mm_setzero_ps())
+ (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
+ (int)(imm), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U))
#define _mm256_extracti32x4_epi32(A, imm) \
- (__m128i)__builtin_ia32_extracti32x4_256((__v8si)(__m256i)(A), (int)(imm))
+ (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
+ (int)(imm), \
+ (__v4si)_mm_undefined_si128(), \
+ (__mmask8)-1)
#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
- (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
- (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \
- (__v4si)(__m128i)(W))
+ (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
+ (int)(imm), \
+ (__v4si)(__m128i)(W), \
+ (__mmask8)(U))
#define _mm256_maskz_extracti32x4_epi32(U, A, imm) \
- (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
- (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \
- (__v4si)_mm_setzero_si128())
+ (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
+ (int)(imm), \
+ (__v4si)_mm_setzero_si128(), \
+ (__mmask8)(U))
#define _mm256_insertf32x4(A, B, imm) \
(__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \
Modified: cfe/trunk/lib/Sema/SemaChecking.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=334330&r1=334329&r2=334330&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaChecking.cpp (original)
+++ cfe/trunk/lib/Sema/SemaChecking.cpp Fri Jun 8 14:50:07 2018
@@ -2611,14 +2611,14 @@ bool Sema::CheckX86BuiltinFunctionCall(u
case X86::BI__builtin_ia32_vextractf128_ps256:
case X86::BI__builtin_ia32_vextractf128_si256:
case X86::BI__builtin_ia32_extract128i256:
- case X86::BI__builtin_ia32_extractf64x4:
- case X86::BI__builtin_ia32_extracti64x4:
- case X86::BI__builtin_ia32_extractf32x8:
- case X86::BI__builtin_ia32_extracti32x8:
- case X86::BI__builtin_ia32_extractf64x2_256:
- case X86::BI__builtin_ia32_extracti64x2_256:
- case X86::BI__builtin_ia32_extractf32x4_256:
- case X86::BI__builtin_ia32_extracti32x4_256:
+ case X86::BI__builtin_ia32_extractf64x4_mask:
+ case X86::BI__builtin_ia32_extracti64x4_mask:
+ case X86::BI__builtin_ia32_extractf32x8_mask:
+ case X86::BI__builtin_ia32_extracti32x8_mask:
+ case X86::BI__builtin_ia32_extractf64x2_256_mask:
+ case X86::BI__builtin_ia32_extracti64x2_256_mask:
+ case X86::BI__builtin_ia32_extractf32x4_256_mask:
+ case X86::BI__builtin_ia32_extracti32x4_256_mask:
i = 1; l = 0; u = 1;
break;
case X86::BI__builtin_ia32_vec_set_v2di:
@@ -2641,10 +2641,10 @@ bool Sema::CheckX86BuiltinFunctionCall(u
case X86::BI__builtin_ia32_vec_ext_v4si:
case X86::BI__builtin_ia32_vec_ext_v4sf:
case X86::BI__builtin_ia32_vec_ext_v4di:
- case X86::BI__builtin_ia32_extractf32x4:
- case X86::BI__builtin_ia32_extracti32x4:
- case X86::BI__builtin_ia32_extractf64x2_512:
- case X86::BI__builtin_ia32_extracti64x2_512:
+ case X86::BI__builtin_ia32_extractf32x4_mask:
+ case X86::BI__builtin_ia32_extracti32x4_mask:
+ case X86::BI__builtin_ia32_extractf64x2_512_mask:
+ case X86::BI__builtin_ia32_extracti64x2_512_mask:
i = 1; l = 0; u = 3;
break;
case X86::BI_mm_prefetch:
More information about the cfe-commits
mailing list