[PATCH] D35638: A fix for bug33826
Farhana Aleen via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 20 15:15:03 PDT 2017
Farhana updated this revision to Diff 107591.
https://reviews.llvm.org/D35638
Files:
lib/Target/X86/X86InterleavedAccess.cpp
test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll
Index: lib/Target/X86/X86InterleavedAccess.cpp
===================================================================
--- lib/Target/X86/X86InterleavedAccess.cpp
+++ lib/Target/X86/X86InterleavedAccess.cpp
@@ -98,18 +98,22 @@
bool X86InterleavedAccessGroup::isSupported() const {
VectorType *ShuffleVecTy = Shuffles[0]->getType();
- uint64_t ShuffleVecSize = DL.getTypeSizeInBits(ShuffleVecTy);
Type *ShuffleEltTy = ShuffleVecTy->getVectorElementType();
+ unsigned ShuffleElemSize = DL.getTypeSizeInBits(ShuffleEltTy);
+ unsigned SupportedNumElem = 4;
+ unsigned WideInstSize;
// Currently, lowering is supported for 4-element vectors of 64 bits on AVX.
- uint64_t ExpectedShuffleVecSize;
- if (isa<LoadInst>(Inst))
- ExpectedShuffleVecSize = 256;
- else
- ExpectedShuffleVecSize = 1024;
+ if (isa<LoadInst>(Inst)) {
+ if (DL.getTypeSizeInBits(ShuffleVecTy) != SupportedNumElem * ShuffleElemSize)
+ return false;
+
+ WideInstSize = DL.getTypeSizeInBits(Inst->getType());
+ } else
+ WideInstSize = DL.getTypeSizeInBits(Shuffles[0]->getType());
- if (!Subtarget.hasAVX() || ShuffleVecSize != ExpectedShuffleVecSize ||
- DL.getTypeSizeInBits(ShuffleEltTy) != 64 || Factor != 4)
+ if (!Subtarget.hasAVX() || Factor != 4 || ShuffleElemSize != 64 ||
+ WideInstSize != (Factor * ShuffleElemSize * SupportedNumElem))
return false;
return true;
@@ -137,8 +141,9 @@
for (unsigned i = 0; i < NumSubVectors; ++i)
DecomposedVectors.push_back(
cast<ShuffleVectorInst>(Builder.CreateShuffleVector(
- Op0, Op1, createSequentialMask(Builder, Indices[i],
- SubVecTy->getVectorNumElements(), 0))));
+ Op0, Op1,
+ createSequentialMask(Builder, Indices[i],
+ SubVecTy->getVectorNumElements(), 0))));
return;
}
@@ -219,8 +224,8 @@
// Lower the interleaved stores:
// 1. Decompose the interleaved wide shuffle into individual shuffle
// vectors.
- decompose(Shuffles[0], Factor,
- VectorType::get(ShuffleEltTy, NumSubVecElems), DecomposedVectors);
+ decompose(Shuffles[0], Factor, VectorType::get(ShuffleEltTy, NumSubVecElems),
+ DecomposedVectors);
// 2. Transpose the interleaved-vectors into vectors of contiguous
// elements.
Index: test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll
===================================================================
--- test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll
+++ test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll
@@ -217,3 +217,20 @@
store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16
ret void
}
+
+; This verifies whether the test passes and does not hit any assertions.
+; Today, X86InterleavedAccess could have handled this case and
+; generate transposed sequence by extending the current implementation
+; which would be creating dummy vectors of undef. But it decided not to
+; optimize these cases where the load-size is less than Factor * NumberOfElements.
+; Because a better sequence can easily be generated by CG.
+
+ at a = local_unnamed_addr global <4 x double> zeroinitializer, align 32
+; Function Attrs: norecurse nounwind readonly uwtable
+define <4 x double> @test_unhandled(<4 x double> %b) {
+entry:
+ %0 = load <4 x double>, <4 x double>* @a, align 32
+ %1 = shufflevector <4 x double> %0, <4 x double> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef>
+ %shuffle = shufflevector <4 x double> %1, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 0, i32 0>
+ ret <4 x double> %shuffle
+}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D35638.107591.patch
Type: text/x-patch
Size: 3700 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170720/0cdad64e/attachment.bin>
More information about the llvm-commits
mailing list