r276417 - [X86][AVX] Added support for lowering to VBROADCASTF128/VBROADCASTI128 with generic IR
Simon Pilgrim via cfe-commits
cfe-commits at lists.llvm.org
Fri Jul 22 06:58:56 PDT 2016
Author: rksimon
Date: Fri Jul 22 08:58:56 2016
New Revision: 276417
URL: http://llvm.org/viewvc/llvm-project?rev=276417&view=rev
Log:
[X86][AVX] Added support for lowering to VBROADCASTF128/VBROADCASTI128 with generic IR
As discussed on D22460, I've updated the vbroadcastf128 pd256/ps256 builtins to map directly to generic IR - load+splat a 128-bit vector to both lanes of a 256-bit vector.
Fix for PR28657.
Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGen/avx-builtins.c
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=276417&r1=276416&r2=276417&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Jul 22 08:58:56 2016
@@ -6619,6 +6619,26 @@ static Value *EmitX86MaskedLoad(CodeGenF
return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
}
+static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
+ SmallVectorImpl<Value *> &Ops,
+ llvm::Type *DstTy,
+ unsigned SrcSizeInBits,
+ unsigned Align) {
+ // Load the subvector.
+ Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align);
+
+ // Create broadcast mask.
+ unsigned NumDstElts = DstTy->getVectorNumElements();
+ unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits();
+
+ SmallVector<uint32_t, 8> Mask;
+ for (unsigned i = 0; i != NumDstElts; i += NumSrcElts)
+ for (unsigned j = 0; j != NumSrcElts; ++j)
+ Mask.push_back(j);
+
+ return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst");
+}
+
static Value *EmitX86Select(CodeGenFunction &CGF,
Value *Mask, Value *Op0, Value *Op1) {
@@ -6995,6 +7015,13 @@ Value *CodeGenFunction::EmitX86BuiltinEx
getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
return EmitX86MaskedLoad(*this, Ops, Align);
}
+
+ case X86::BI__builtin_ia32_vbroadcastf128_pd256:
+ case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
+ llvm::Type *DstTy = ConvertType(E->getType());
+ return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 16);
+ }
+
case X86::BI__builtin_ia32_storehps:
case X86::BI__builtin_ia32_storelps: {
llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
Modified: cfe/trunk/test/CodeGen/avx-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-builtins.c?rev=276417&r1=276416&r2=276417&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx-builtins.c Fri Jul 22 08:58:56 2016
@@ -84,13 +84,15 @@ __m256 test_mm256_blendv_ps(__m256 V1, _
__m256d test_mm256_broadcast_pd(__m128d* A) {
// CHECK-LABEL: test_mm256_broadcast_pd
- // CHECK: call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %{{.*}})
+ // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
+ // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
return _mm256_broadcast_pd(A);
}
__m256 test_mm256_broadcast_ps(__m128* A) {
// CHECK-LABEL: test_mm256_broadcast_ps
- // CHECK: call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %{{.*}})
+ // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 16
+ // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
return _mm256_broadcast_ps(A);
}
More information about the cfe-commits
mailing list