[llvm] r293673 - InterleaveAccessPass: Avoid constructing invalid shuffle masks
Matthias Braun via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 31 10:37:54 PST 2017
Author: matze
Date: Tue Jan 31 12:37:53 2017
New Revision: 293673
URL: http://llvm.org/viewvc/llvm-project?rev=293673&view=rev
Log:
InterleaveAccessPass: Avoid constructing invalid shuffle masks
Fix a bug where we would construct shufflevector instructions addressing
invalid elements.
Differential Revision: https://reviews.llvm.org/D29313
Modified:
llvm/trunk/lib/CodeGen/InterleavedAccessPass.cpp
llvm/trunk/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll
llvm/trunk/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll
Modified: llvm/trunk/lib/CodeGen/InterleavedAccessPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/InterleavedAccessPass.cpp?rev=293673&r1=293672&r2=293673&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/InterleavedAccessPass.cpp (original)
+++ llvm/trunk/lib/CodeGen/InterleavedAccessPass.cpp Tue Jan 31 12:37:53 2017
@@ -174,7 +174,7 @@ static bool isDeInterleaveMask(ArrayRef<
/// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
/// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
- unsigned MaxFactor) {
+ unsigned MaxFactor, unsigned OpNumElts) {
unsigned NumElts = Mask.size();
if (NumElts < 4)
return false;
@@ -246,6 +246,9 @@ static bool isReInterleaveMask(ArrayRef<
if (StartMask < 0)
break;
+ // We must stay within the vectors; This case can happen with undefs.
+ if (StartMask + LaneLen > OpNumElts*2)
+ break;
}
// Found an interleaved mask of current factor.
@@ -406,7 +409,8 @@ bool InterleavedAccess::lowerInterleaved
// Check if the shufflevector is RE-interleave shuffle.
unsigned Factor;
- if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor))
+ unsigned OpNumElts = SVI->getOperand(0)->getType()->getVectorNumElements();
+ if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts))
return false;
DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
Modified: llvm/trunk/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll?rev=293673&r1=293672&r2=293673&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll (original)
+++ llvm/trunk/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll Tue Jan 31 12:37:53 2017
@@ -547,3 +547,21 @@ define void @store_general_mask_factor3_
store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
ret void
}
+
+ at g = external global <4 x float>
+
+; The following does not give a valid interleaved store
+; NEON-LABEL: define void @no_interleave
+; NEON-NOT: call void @llvm.aarch64.neon.st2
+; NEON: shufflevector
+; NEON: store
+; NEON: ret void
+; NO_NEON-LABEL: define void @no_interleave
+; NO_NEON: shufflevector
+; NO_NEON: store
+; NO_NEON: ret void
+define void @no_interleave(<4 x float> %a0) {
+ %v0 = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 0, i32 3, i32 7, i32 undef>
+ store <4 x float> %v0, <4 x float>* @g, align 16
+ ret void
+}
Modified: llvm/trunk/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll?rev=293673&r1=293672&r2=293673&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll (original)
+++ llvm/trunk/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll Tue Jan 31 12:37:53 2017
@@ -626,3 +626,21 @@ define void @store_general_mask_factor3_
store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
ret void
}
+
+ at g = external global <4 x float>
+
+; The following does not give a valid interleaved store
+; NEON-LABEL: define void @no_interleave
+; NEON-NOT: call void @llvm.arm.neon.vst2
+; NEON: shufflevector
+; NEON: store
+; NEON: ret void
+; NO_NEON-LABEL: define void @no_interleave
+; NO_NEON: shufflevector
+; NO_NEON: store
+; NO_NEON: ret void
+define void @no_interleave(<4 x float> %a0) {
+ %v0 = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 0, i32 7, i32 1, i32 undef>
+ store <4 x float> %v0, <4 x float>* @g, align 16
+ ret void
+}
More information about the llvm-commits
mailing list