[llvm] r308784 - X86InterleaveAccess: A fix for bug33826
Farhana Aleen via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 21 14:35:00 PDT 2017
Author: faaleen
Date: Fri Jul 21 14:35:00 2017
New Revision: 308784
URL: http://llvm.org/viewvc/llvm-project?rev=308784&view=rev
Log:
X86InterleaveAccess: A fix for bug33826
Reviewers: DavidKreitzer
Differential Revision: https://reviews.llvm.org/D35638
Modified:
llvm/trunk/lib/Target/X86/X86InterleavedAccess.cpp
llvm/trunk/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll
Modified: llvm/trunk/lib/Target/X86/X86InterleavedAccess.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InterleavedAccess.cpp?rev=308784&r1=308783&r2=308784&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InterleavedAccess.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InterleavedAccess.cpp Fri Jul 21 14:35:00 2017
@@ -98,18 +98,22 @@ public:
bool X86InterleavedAccessGroup::isSupported() const {
VectorType *ShuffleVecTy = Shuffles[0]->getType();
- uint64_t ShuffleVecSize = DL.getTypeSizeInBits(ShuffleVecTy);
Type *ShuffleEltTy = ShuffleVecTy->getVectorElementType();
+ unsigned ShuffleElemSize = DL.getTypeSizeInBits(ShuffleEltTy);
+ unsigned SupportedNumElem = 4;
+ unsigned WideInstSize;
// Currently, lowering is supported for 4-element vectors of 64 bits on AVX.
- uint64_t ExpectedShuffleVecSize;
- if (isa<LoadInst>(Inst))
- ExpectedShuffleVecSize = 256;
- else
- ExpectedShuffleVecSize = 1024;
+ if (isa<LoadInst>(Inst)) {
+ if (DL.getTypeSizeInBits(ShuffleVecTy) != SupportedNumElem * ShuffleElemSize)
+ return false;
+
+ WideInstSize = DL.getTypeSizeInBits(Inst->getType());
+ } else
+ WideInstSize = DL.getTypeSizeInBits(Shuffles[0]->getType());
- if (!Subtarget.hasAVX() || ShuffleVecSize != ExpectedShuffleVecSize ||
- DL.getTypeSizeInBits(ShuffleEltTy) != 64 || Factor != 4)
+ if (!Subtarget.hasAVX() || Factor != 4 || ShuffleElemSize != 64 ||
+ WideInstSize != (Factor * ShuffleElemSize * SupportedNumElem))
return false;
return true;
@@ -137,8 +141,9 @@ void X86InterleavedAccessGroup::decompos
for (unsigned i = 0; i < NumSubVectors; ++i)
DecomposedVectors.push_back(
cast<ShuffleVectorInst>(Builder.CreateShuffleVector(
- Op0, Op1, createSequentialMask(Builder, Indices[i],
- SubVecTy->getVectorNumElements(), 0))));
+ Op0, Op1,
+ createSequentialMask(Builder, Indices[i],
+ SubVecTy->getVectorNumElements(), 0))));
return;
}
@@ -219,8 +224,8 @@ bool X86InterleavedAccessGroup::lowerInt
// Lower the interleaved stores:
// 1. Decompose the interleaved wide shuffle into individual shuffle
// vectors.
- decompose(Shuffles[0], Factor,
- VectorType::get(ShuffleEltTy, NumSubVecElems), DecomposedVectors);
+ decompose(Shuffles[0], Factor, VectorType::get(ShuffleEltTy, NumSubVecElems),
+ DecomposedVectors);
// 2. Transpose the interleaved-vectors into vectors of contiguous
// elements.
Modified: llvm/trunk/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll?rev=308784&r1=308783&r2=308784&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll (original)
+++ llvm/trunk/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll Fri Jul 21 14:35:00 2017
@@ -217,3 +217,20 @@ define void @store_factorf64_4_arbitrary
store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16
ret void
}
+
+; This verifies whether the test passes and does not hit any assertions.
+; Today, X86InterleavedAccess could have handled this case and
+; generate transposed sequence by extending the current implementation
+; which would be creating dummy vectors of undef. But it decided not to
+; optimize these cases where the load-size is less than Factor * NumberOfElements.
+; Because a better sequence can easily be generated by CG.
+
+ at a = local_unnamed_addr global <4 x double> zeroinitializer, align 32
+; Function Attrs: norecurse nounwind readonly uwtable
+define <4 x double> @test_unhandled(<4 x double> %b) {
+entry:
+ %0 = load <4 x double>, <4 x double>* @a, align 32
+ %1 = shufflevector <4 x double> %0, <4 x double> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef>
+ %shuffle = shufflevector <4 x double> %1, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 0, i32 0>
+ ret <4 x double> %shuffle
+}
More information about the llvm-commits
mailing list