r333853 - [X86] Replace __builtin_ia32_vbroadcastf128_pd256 and __builtin_ia32_vbroadcastf128_ps256 with an unaligned load intrinsics and a __builtin_shufflevector call.
Craig Topper via cfe-commits
cfe-commits at lists.llvm.org
Sun Jun 3 12:42:59 PDT 2018
Author: ctopper
Date: Sun Jun 3 12:42:59 2018
New Revision: 333853
URL: http://llvm.org/viewvc/llvm-project?rev=333853&view=rev
Log:
[X86] Replace __builtin_ia32_vbroadcastf128_pd256 and __builtin_ia32_vbroadcastf128_ps256 with an unaligned load intrinsics and a __builtin_shufflevector call.
Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/Headers/avxintrin.h
cfe/trunk/test/CodeGen/builtins-x86.c
Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=333853&r1=333852&r2=333853&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sun Jun 3 12:42:59 2018
@@ -510,8 +510,6 @@ TARGET_BUILTIN(__builtin_ia32_movmskpd25
TARGET_BUILTIN(__builtin_ia32_movmskps256, "iV8f", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_vzeroall, "v", "n", "avx")
TARGET_BUILTIN(__builtin_ia32_vzeroupper, "v", "n", "avx")
-TARGET_BUILTIN(__builtin_ia32_vbroadcastf128_pd256, "V4dV2dC*", "n", "avx")
-TARGET_BUILTIN(__builtin_ia32_vbroadcastf128_ps256, "V8fV4fC*", "n", "avx")
TARGET_BUILTIN(__builtin_ia32_lddqu256, "V32ccC*", "n", "avx")
TARGET_BUILTIN(__builtin_ia32_maskloadpd, "V2dV2dC*V2LLi", "n", "avx")
TARGET_BUILTIN(__builtin_ia32_maskloadps, "V4fV4fC*V4i", "n", "avx")
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=333853&r1=333852&r2=333853&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Sun Jun 3 12:42:59 2018
@@ -8300,26 +8300,6 @@ static Value *EmitX86MaskLogic(CodeGenFu
CGF.Builder.getIntNTy(std::max(NumElts, 8U)));
}
-static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
- ArrayRef<Value *> Ops,
- llvm::Type *DstTy,
- unsigned SrcSizeInBits,
- unsigned Align) {
- // Load the subvector.
- Value *SubVec = CGF.Builder.CreateAlignedLoad(Ops[0], Align);
-
- // Create broadcast mask.
- unsigned NumDstElts = DstTy->getVectorNumElements();
- unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits();
-
- SmallVector<uint32_t, 8> Mask;
- for (unsigned i = 0; i != NumDstElts; i += NumSrcElts)
- for (unsigned j = 0; j != NumSrcElts; ++j)
- Mask.push_back(j);
-
- return CGF.Builder.CreateShuffleVector(SubVec, SubVec, Mask, "subvecbcst");
-}
-
static Value *EmitX86Select(CodeGenFunction &CGF,
Value *Mask, Value *Op0, Value *Op1) {
@@ -8959,12 +8939,6 @@ Value *CodeGenFunction::EmitX86BuiltinEx
case X86::BI__builtin_ia32_movdqa64load512_mask:
return EmitX86MaskedLoad(*this, Ops, 64);
- case X86::BI__builtin_ia32_vbroadcastf128_pd256:
- case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
- llvm::Type *DstTy = ConvertType(E->getType());
- return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1);
- }
-
case X86::BI__builtin_ia32_storehps:
case X86::BI__builtin_ia32_storelps: {
llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
Modified: cfe/trunk/lib/Headers/avxintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avxintrin.h?rev=333853&r1=333852&r2=333853&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avxintrin.h (original)
+++ cfe/trunk/lib/Headers/avxintrin.h Sun Jun 3 12:42:59 2018
@@ -3111,7 +3111,9 @@ _mm256_broadcast_ss(float const *__a)
static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_broadcast_pd(__m128d const *__a)
{
- return (__m256d)__builtin_ia32_vbroadcastf128_pd256((__v2df const *)__a);
+ __m128d __b = _mm_loadu_pd((const double *)__a);
+ return (__m256d)__builtin_shufflevector((__v2df)__b, (__v2df)__b,
+ 0, 1, 0, 1);
}
/// Loads the data from a 128-bit vector of [4 x float] from the
@@ -3129,7 +3131,9 @@ _mm256_broadcast_pd(__m128d const *__a)
static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_broadcast_ps(__m128 const *__a)
{
- return (__m256)__builtin_ia32_vbroadcastf128_ps256((__v4sf const *)__a);
+ __m128 __b = _mm_loadu_ps((const float *)__a);
+ return (__m256)__builtin_shufflevector((__v4sf)__b, (__v4sf)__b,
+ 0, 1, 2, 3, 0, 1, 2, 3);
}
/* SIMD load ops */
Modified: cfe/trunk/test/CodeGen/builtins-x86.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtins-x86.c?rev=333853&r1=333852&r2=333853&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/builtins-x86.c (original)
+++ cfe/trunk/test/CodeGen/builtins-x86.c Sun Jun 3 12:42:59 2018
@@ -466,8 +466,6 @@ void f0() {
tmp_i = __builtin_ia32_movmskps256(tmp_V8f);
__builtin_ia32_vzeroall();
__builtin_ia32_vzeroupper();
- tmp_V4d = __builtin_ia32_vbroadcastf128_pd256(tmp_V2dCp);
- tmp_V8f = __builtin_ia32_vbroadcastf128_ps256(tmp_V4fCp);
tmp_V32c = __builtin_ia32_lddqu256(tmp_cCp);
tmp_V2d = __builtin_ia32_maskloadpd(tmp_V2dCp, tmp_V2LLi);
tmp_V4f = __builtin_ia32_maskloadps(tmp_V4fCp, tmp_V4i);
More information about the cfe-commits
mailing list