[llvm-commits] [llvm] r129472 - in /llvm/trunk: lib/Transforms/Scalar/ScalarReplAggregates.cpp test/Transforms/ScalarRepl/vector_promote.ll

Wed Apr 13 20:58:29 PDT 2011

Hi,

That sounds like a good idea.  I'll do that.

  -- Mon Ping

On Apr 13, 2011, at 5:18 PM, Eli Friedman wrote:

> On Wed, Apr 13, 2011 at 2:40 PM, Mon P Wang <wangmp at apple.com> wrote:
>> Author: wangmp
>> Date: Wed Apr 13 16:40:02 2011
>> New Revision: 129472
>> 
>> URL: http://llvm.org/viewvc/llvm-project?rev=129472&view=rev
>> Log:
>> Vectors with different number of elements of the same element type can have
>> the same allocation size but different primitive sizes(e.g., <3xi32> and
>> <4xi32>).  When ScalarRepl promotes them, it can't use a bit cast but
>> should use a shuffle vector instead.
>> 
>> Modified:
>>    llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp
>>    llvm/trunk/test/Transforms/ScalarRepl/vector_promote.ll
>> 
>> Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=129472&r1=129471&r2=129472&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original)
>> +++ llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Wed Apr 13 16:40:02 2011
>> @@ -690,15 +690,45 @@
>>  ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
>>                            uint64_t Offset, IRBuilder<> &Builder) {
>>   // If the load is of the whole new alloca, no conversion is needed.
>> -  if (FromVal->getType() == ToType && Offset == 0)
>> +  const Type *FromType = FromVal->getType();
>> +  if (FromType == ToType && Offset == 0)
>>     return FromVal;
>> 
>>   // If the result alloca is a vector type, this is either an element
>>   // access or a bitcast to another vector type of the same size.
>> -  if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
>> +  if (const VectorType *VTy = dyn_cast<VectorType>(FromType)) {
>>     unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
>> -    if (ToTypeSize == AllocaSize)
>> -      return Builder.CreateBitCast(FromVal, ToType, "tmp");
>> +    if (ToTypeSize == AllocaSize) {
>> +      if (FromType->getPrimitiveSizeInBits() ==
>> +          ToType->getPrimitiveSizeInBits())
>> +        return Builder.CreateBitCast(FromVal, ToType, "tmp");
>> +      else {
>> +        // Vectors with the same element type can have the same allocation
>> +        // size but different primitive sizes (e.g., <3 x i32> and <4 x i32>)
>> +        // In this case, use a shuffle vector instead of a bit cast.
>> +        const VectorType *ToVTy = dyn_cast<VectorType>(ToType);
>> +        assert(ToVTy && (ToVTy->getElementType() == VTy->getElementType()) &&
>> +               "Vectors must have the same element type");
>> +        LLVMContext &Context = FromVal->getContext();
>> +        Value *UnV = UndefValue::get(FromType);
>> +        unsigned numEltsFrom = VTy->getNumElements();
>> +        unsigned numEltsTo = ToVTy->getNumElements();
>> +
>> +        SmallVector<Constant*, 3> Args;
>> +        unsigned minNumElts = std::min(numEltsFrom, numEltsTo);
>> +        unsigned i;
>> +        for (i=0; i != minNumElts; ++i)
>> +          Args.push_back(ConstantInt::get(Type::getInt32Ty(Context), i));
>> +
>> +        if (i < numEltsTo) {
>> +          Constant* UnC = UndefValue::get(Type::getInt32Ty(Context));
>> +          for (; i != numEltsTo; ++i)
>> +            Args.push_back(UnC);
>> +        }
>> +        Constant *Mask = ConstantVector::get(Args);
>> +        return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV");
>> +      }
>> +    }
>> 
>>     if (ToType->isVectorTy()) {
>>       assert(isPowerOf2_64(AllocaSize / ToTypeSize) &&
>> @@ -837,8 +867,36 @@
>> 
>>     // Changing the whole vector with memset or with an access of a different
>>     // vector type?
>> -    if (ValSize == VecSize)
>> -      return Builder.CreateBitCast(SV, AllocaType, "tmp");
>> +    if (ValSize == VecSize) {
>> +      if (VTy->getPrimitiveSizeInBits() ==
>> +          SV->getType()->getPrimitiveSizeInBits())
>> +        return Builder.CreateBitCast(SV, AllocaType, "tmp");
>> +      else {
>> +        // Vectors with the same element type can have the same allocation
>> +        // size but different primitive sizes (e.g., <3 x i32> and <4 x i32>)
>> +        // In this case, use a shuffle vector instead of a bit cast.
>> +        const VectorType *SVVTy = dyn_cast<VectorType>(SV->getType());
>> +        assert(SVVTy && (SVVTy->getElementType() == VTy->getElementType()) &&
>> +               "Vectors must have the same element type");
>> +        Value *UnV = UndefValue::get(SVVTy);
>> +        unsigned numEltsFrom = SVVTy->getNumElements();
>> +        unsigned numEltsTo = VTy->getNumElements();
>> +
>> +        SmallVector<Constant*, 3> Args;
>> +        unsigned minNumElts = std::min(numEltsFrom, numEltsTo);
>> +        unsigned i;
>> +        for (i=0; i != minNumElts; ++i)
>> +          Args.push_back(ConstantInt::get(Type::getInt32Ty(Context), i));
>> +
>> +        if (i < numEltsTo) {
>> +          Constant* UnC = UndefValue::get(Type::getInt32Ty(Context));
>> +          for (; i != numEltsTo; ++i)
>> +            Args.push_back(UnC);
>> +        }
>> +        Constant *Mask = ConstantVector::get(Args);
>> +        return Builder.CreateShuffleVector(SV, UnV, Mask, "tmpV");
>> +      }
>> +    }
>> 
>>     if (SV->getType()->isVectorTy() && isPowerOf2_64(VecSize / ValSize)) {
>>       assert(Offset == 0 && "Can't insert a value of a smaller vector type at "
> 
> Can you refactor the duplicated code into a helper routine?
> 
> -Eli
> 
>> Modified: llvm/trunk/test/Transforms/ScalarRepl/vector_promote.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarRepl/vector_promote.ll?rev=129472&r1=129471&r2=129472&view=diff
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/ScalarRepl/vector_promote.ll (original)
>> +++ llvm/trunk/test/Transforms/ScalarRepl/vector_promote.ll Wed Apr 13 16:40:02 2011
>> @@ -202,3 +202,49 @@
>>  ; CHECK-NOT: alloca
>>  ; CHECK: bitcast <4 x float> %x to i128
>>  }
>> +
>> +define <3 x float> @test14(<3 x float> %x)  {
>> +entry:
>> +  %x.addr = alloca <3 x float>, align 16
>> +  %r = alloca <3 x i32>, align 16
>> +  %extractVec = shufflevector <3 x float> %x, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
>> +  %storetmp = bitcast <3 x float>* %x.addr to <4 x float>*
>> +  store <4 x float> %extractVec, <4 x float>* %storetmp, align 16
>> +  %tmp = load <3 x float>* %x.addr, align 16
>> +  %cmp = fcmp une <3 x float> %tmp, zeroinitializer
>> +  %sext = sext <3 x i1> %cmp to <3 x i32>
>> +  %and = and <3 x i32> <i32 1065353216, i32 1065353216, i32 1065353216>, %sext
>> +  %extractVec1 = shufflevector <3 x i32> %and, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
>> +  %storetmp2 = bitcast <3 x i32>* %r to <4 x i32>*
>> +  store <4 x i32> %extractVec1, <4 x i32>* %storetmp2, align 16
>> +  %tmp3 = load <3 x i32>* %r, align 16
>> +  %0 = bitcast <3 x i32> %tmp3 to <3 x float>
>> +  %tmp4 = load <3 x float>* %x.addr, align 16
>> +  ret <3 x float> %tmp4
>> +; CHECK: @test14
>> +; CHECK-NOT: alloca
>> +; CHECK: shufflevector <4 x i32> %extractVec1, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
>> +}
>> +
>> +define void @test15(<3 x i64>* sret %agg.result, <3 x i64> %x, <3 x i64> %min) {
>> +entry:
>> +  %x.addr = alloca <3 x i64>, align 32
>> +  %min.addr = alloca <3 x i64>, align 32
>> +  %extractVec = shufflevector <3 x i64> %x, <3 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
>> +  %storetmp = bitcast <3 x i64>* %x.addr to <4 x i64>*
>> +  store <4 x i64> %extractVec, <4 x i64>* %storetmp, align 32
>> +  %extractVec1 = shufflevector <3 x i64> %min, <3 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
>> +  %storetmp2 = bitcast <3 x i64>* %min.addr to <4 x i64>*
>> +  store <4 x i64> %extractVec1, <4 x i64>* %storetmp2, align 32
>> +  %tmp = load <3 x i64>* %x.addr
>> +  %tmp5 = extractelement <3 x i64> %tmp, i32 0
>> +  %tmp11 = insertelement <3 x i64> %tmp, i64 %tmp5, i32 0
>> +  store <3 x i64> %tmp11, <3 x i64>* %x.addr
>> +  %tmp30 = load <3 x i64>* %x.addr, align 32
>> +  store <3 x i64> %tmp30, <3 x i64>* %agg.result
>> +  ret void
>> +; CHECK: @test15
>> +; CHECK-NOT: alloca
>> +; CHECK: shufflevector <4 x i64> %tmpV2, <4 x i64> undef, <3 x i32> <i32 0, i32 1, i32 2>
>> +}
>> +
>> 
>> 
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>