[llvm-commits] [llvm] r127317 - in /llvm/trunk: lib/Transforms/Scalar/ScalarReplAggregates.cpp test/Transforms/ScalarRepl/vector_promote.ll
Cameron Zwarich
zwarich at apple.com
Tue Mar 8 21:43:05 PST 2011
Author: zwarich
Date: Tue Mar 8 23:43:05 2011
New Revision: 127317
URL: http://llvm.org/viewvc/llvm-project?rev=127317&view=rev
Log:
Add support to scalar replacement for partial vector accesses of an alloca, e.g.
a union of a float, <2 x float>, and <4 x float>. This mostly comes up with the
use of vector intrinsics, especially in NEON when programmers know the layout of
the register file. This enables codegen to eliminate a lot of the subregister
traffic it would otherwise generate.
This commit only enables this for a small number of floating-point cases, but a
lot more integer cases. I assume this is okay for all ports, but I did not do
extensive testing of the quality of code involving i512 vectors and the like. If
there is a use case where this generates worse code than before, let me know and
we can scale it back.
This fixes <rdar://problem/9036264>.
Modified:
llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp
llvm/trunk/test/Transforms/ScalarRepl/vector_promote.ll
Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=127317&r1=127316&r2=127317&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Tue Mar 8 23:43:05 2011
@@ -295,12 +295,16 @@
/// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy)
/// so far at the offset specified by Offset (which is specified in bytes).
///
-/// There are two cases we handle here:
+/// There are three cases we handle here:
/// 1) A union of vector types of the same size and potentially its elements.
/// Here we turn element accesses into insert/extract element operations.
/// This promotes a <4 x float> with a store of float to the third element
/// into a <4 x float> that uses insert element.
-/// 2) A fully general blob of memory, which we turn into some (potentially
+/// 2) A union of vector types with power-of-2 size differences, e.g. a float,
+/// <2 x float> and <4 x float>. Here we turn element accesses into insert
+/// and extract element operations, and <2 x float> accesses into a cast to
+/// <2 x double>, an extract, and a cast back to <2 x float>.
+/// 3) A fully general blob of memory, which we turn into some (potentially
/// large) integer type with extract and insert operations where the loads
/// and stores would mutate the memory. We mark this by setting VectorTy
/// to VoidTy.
@@ -346,18 +350,68 @@
// Remember if we saw a vector type.
HadAVector = true;
- if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
- // If we're storing/loading a vector of the right size, allow it as a
- // vector. If this the first vector we see, remember the type so that
- // we know the element size. If this is a subsequent access, ignore it
- // even if it is a differing type but the same size. Worst case we can
- // bitcast the resultant vectors.
- if (VectorTy == 0)
- VectorTy = VInTy;
+ // TODO: Support nonzero offsets?
+ if (Offset != 0)
+ return false;
+
+ // Only allow vectors that are a power-of-2 away from the size of the alloca.
+ if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8)))
+ return false;
+
+ // If this the first vector we see, remember the type so that we know the
+ // element size.
+ if (!VectorTy) {
+ VectorTy = VInTy;
return true;
}
- return false;
+ unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
+ unsigned InBitWidth = VInTy->getBitWidth();
+
+ // Vectors of the same size can be converted using a simple bitcast.
+ if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8))
+ return true;
+
+ const Type *ElementTy = cast<VectorType>(VectorTy)->getElementType();
+ const Type *InElementTy = cast<VectorType>(VectorTy)->getElementType();
+
+ // Do not allow mixed integer and floating-point accesses from vectors of
+ // different sizes.
+ if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy())
+ return false;
+
+ if (ElementTy->isFloatingPointTy()) {
+ // Only allow floating-point vectors of different sizes if they have the
+ // same element type.
+ // TODO: This could be loosened a bit, but would anything benefit?
+ if (ElementTy != InElementTy)
+ return false;
+
+ // There are no arbitrary-precision floating-point types, which limits the
+ // number of legal vector types with larger element types that we can form
+ // to bitcast and extract a subvector.
+ // TODO: We could support some more cases with mixed fp128 and double here.
+ if (!(BitWidth == 64 || BitWidth == 128) ||
+ !(InBitWidth == 64 || InBitWidth == 128))
+ return false;
+ } else {
+ assert(ElementTy->isIntegerTy() && "Vector elements must be either integer "
+ "or floating-point.");
+ unsigned BitWidth = ElementTy->getPrimitiveSizeInBits();
+ unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits();
+
+ // Do not allow integer types smaller than a byte or types whose widths are
+ // not a multiple of a byte.
+ if (BitWidth < 8 || InBitWidth < 8 ||
+ BitWidth % 8 != 0 || InBitWidth % 8 != 0)
+ return false;
+ }
+
+ // Pick the largest of the two vector types.
+ if (InBitWidth > BitWidth)
+ VectorTy = VInTy;
+
+ return true;
}
/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all
@@ -586,6 +640,26 @@
}
}
+/// getScaledElementType - Gets a scaled element type for a partial vector
+/// access of an alloca. The input type must be an integer or float, and
+/// the resulting type must be an integer, float or double.
+static const Type *getScaledElementType(const Type *OldTy, unsigned Scale) {
+ assert((OldTy->isIntegerTy() || OldTy->isFloatTy()) && "Partial vector "
+ "accesses must be scaled from integer or float elements.");
+
+ LLVMContext &Context = OldTy->getContext();
+ unsigned Size = OldTy->getPrimitiveSizeInBits() * Scale;
+
+ if (OldTy->isIntegerTy())
+ return Type::getIntNTy(Context, Size);
+ if (Size == 32)
+ return Type::getFloatTy(Context);
+ if (Size == 64)
+ return Type::getDoubleTy(Context);
+
+ llvm_unreachable("Invalid type for a partial vector access of an alloca!");
+}
+
/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
/// or vector value FromVal, extracting the bits from the offset specified by
/// Offset. This returns the value, which is of type ToType.
@@ -606,8 +680,27 @@
// If the result alloca is a vector type, this is either an element
// access or a bitcast to another vector type of the same size.
if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
- if (ToType->isVectorTy())
+ if (ToType->isVectorTy()) {
+ if (isPowerOf2_64(AllocaSize / TD.getTypeAllocSize(ToType))) {
+ assert(Offset == 0 && "Can't extract a value of a smaller vector type "
+ "from a nonzero offset.");
+
+ const Type *ToElementTy = cast<VectorType>(ToType)->getElementType();
+ unsigned Scale = AllocaSize / TD.getTypeAllocSize(ToType);
+ const Type *CastElementTy = getScaledElementType(ToElementTy, Scale);
+ unsigned NumCastVectorElements = VTy->getNumElements() / Scale;
+
+ LLVMContext &Context = FromVal->getContext();
+ const Type *CastTy = VectorType::get(CastElementTy,
+ NumCastVectorElements);
+ Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp");
+ Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get(
+ Type::getInt32Ty(Context), 0), "tmp");
+ return Builder.CreateBitCast(Extract, ToType, "tmp");
+ }
+
return Builder.CreateBitCast(FromVal, ToType, "tmp");
+ }
// Otherwise it must be an element access.
unsigned Elt = 0;
@@ -728,6 +821,28 @@
if (ValSize == VecSize)
return Builder.CreateBitCast(SV, AllocaType, "tmp");
+ if (SV->getType()->isVectorTy() && isPowerOf2_64(VecSize / ValSize)) {
+ assert(Offset == 0 && "Can't insert a value of a smaller vector type at "
+ "a nonzero offset.");
+
+ const Type *ToElementTy =
+ cast<VectorType>(SV->getType())->getElementType();
+ unsigned Scale = VecSize / ValSize;
+ const Type *CastElementTy = getScaledElementType(ToElementTy, Scale);
+ unsigned NumCastVectorElements = VTy->getNumElements() / Scale;
+
+ LLVMContext &Context = SV->getContext();
+ const Type *OldCastTy = VectorType::get(CastElementTy,
+ NumCastVectorElements);
+ Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp");
+
+ Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp");
+ Value *Insert =
+ Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get(
+ Type::getInt32Ty(Context), 0), "tmp");
+ return Builder.CreateBitCast(Insert, AllocaType, "tmp");
+ }
+
uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
// Must be an element insertion.
Modified: llvm/trunk/test/Transforms/ScalarRepl/vector_promote.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarRepl/vector_promote.ll?rev=127317&r1=127316&r2=127317&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/ScalarRepl/vector_promote.ll (original)
+++ llvm/trunk/test/Transforms/ScalarRepl/vector_promote.ll Tue Mar 8 23:43:05 2011
@@ -98,3 +98,62 @@
; CHECK: ret i64
}
+define float @test7(<4 x float> %x) {
+ %a = alloca <4 x float>
+ store <4 x float> %x, <4 x float>* %a
+ %p = bitcast <4 x float>* %a to <2 x float>*
+ %b = load <2 x float>* %p
+ %q = getelementptr <4 x float>* %a, i32 0, i32 2
+ %c = load float* %q
+ ret float %c
+; CHECK: @test7
+; CHECK-NOT: alloca
+; CHECK: bitcast <4 x float> %x to <2 x double>
+; CHECK-NEXT: extractelement <2 x double>
+; CHECK-NEXT: bitcast double %tmp4 to <2 x float>
+; CHECK-NEXT: extractelement <4 x float>
+}
+
+define void @test8(<4 x float> %x, <2 x float> %y) {
+ %a = alloca <4 x float>
+ store <4 x float> %x, <4 x float>* %a
+ %p = bitcast <4 x float>* %a to <2 x float>*
+ store <2 x float> %y, <2 x float>* %p
+ ret void
+; CHECK: @test8
+; CHECK-NOT: alloca
+; CHECK: bitcast <4 x float> %x to <2 x double>
+; CHECK-NEXT: bitcast <2 x float> %y to double
+; CHECK-NEXT: insertelement <2 x double>
+; CHECK-NEXT: bitcast <2 x double> %tmp2 to <4 x float>
+}
+
+define i256 @test9(<4 x i256> %x) {
+ %a = alloca <4 x i256>
+ store <4 x i256> %x, <4 x i256>* %a
+ %p = bitcast <4 x i256>* %a to <2 x i256>*
+ %b = load <2 x i256>* %p
+ %q = getelementptr <4 x i256>* %a, i32 0, i32 2
+ %c = load i256* %q
+ ret i256 %c
+; CHECK: @test9
+; CHECK-NOT: alloca
+; CHECK: bitcast <4 x i256> %x to <2 x i512>
+; CHECK-NEXT: extractelement <2 x i512>
+; CHECK-NEXT: bitcast i512 %tmp4 to <2 x i256>
+; CHECK-NEXT: extractelement <4 x i256>
+}
+
+define void @test10(<4 x i256> %x, <2 x i256> %y) {
+ %a = alloca <4 x i256>
+ store <4 x i256> %x, <4 x i256>* %a
+ %p = bitcast <4 x i256>* %a to <2 x i256>*
+ store <2 x i256> %y, <2 x i256>* %p
+ ret void
+; CHECK: @test10
+; CHECK-NOT: alloca
+; CHECK: bitcast <4 x i256> %x to <2 x i512>
+; CHECK-NEXT: bitcast <2 x i256> %y to i512
+; CHECK-NEXT: insertelement <2 x i512>
+; CHECK-NEXT: bitcast <2 x i512> %tmp2 to <4 x i256>
+}
More information about the llvm-commits
mailing list