[llvm] r357212 - [InterleavedAccessPass] Don't increase the number of bytes loaded.
Eli Friedman via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 28 13:44:50 PDT 2019
Author: efriedma
Date: Thu Mar 28 13:44:50 2019
New Revision: 357212
URL: http://llvm.org/viewvc/llvm-project?rev=357212&view=rev
Log:
[InterleavedAccessPass] Don't increase the number of bytes loaded.
Even if the interleaving transform would otherwise be legal, we shouldn't
introduce an interleaved load that is wider than the original load: it might
have undefined behavior.
It might be possible to perform some sort of mask-narrowing transform in
some cases (using a narrower interleaved load, then extending the
results using shufflevectors). But I haven't tried to implement that,
at least for now.
Fixes https://bugs.llvm.org/show_bug.cgi?id=41245 .
Differential Revision: https://reviews.llvm.org/D59954
Modified:
llvm/trunk/lib/CodeGen/InterleavedAccessPass.cpp
llvm/trunk/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll
Modified: llvm/trunk/lib/CodeGen/InterleavedAccessPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/InterleavedAccessPass.cpp?rev=357212&r1=357211&r2=357212&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/InterleavedAccessPass.cpp (original)
+++ llvm/trunk/lib/CodeGen/InterleavedAccessPass.cpp Thu Mar 28 13:44:50 2019
@@ -163,14 +163,19 @@ static bool isDeInterleaveMaskOfFactor(A
/// <0, 2, 4, 6> (mask of index 0 to extract even elements)
/// <1, 3, 5, 7> (mask of index 1 to extract odd elements)
static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
- unsigned &Index, unsigned MaxFactor) {
+ unsigned &Index, unsigned MaxFactor,
+ unsigned NumLoadElements) {
if (Mask.size() < 2)
return false;
// Check potential Factors.
- for (Factor = 2; Factor <= MaxFactor; Factor++)
+ for (Factor = 2; Factor <= MaxFactor; Factor++) {
+ // Make sure we don't produce a load wider than the input load.
+ if (Mask.size() * Factor > NumLoadElements)
+ return false;
if (isDeInterleaveMaskOfFactor(Mask, Factor, Index))
return true;
+ }
return false;
}
@@ -302,9 +307,10 @@ bool InterleavedAccess::lowerInterleaved
unsigned Factor, Index;
+ unsigned NumLoadElements = LI->getType()->getVectorNumElements();
// Check if the first shufflevector is DE-interleave shuffle.
if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index,
- MaxFactor))
+ MaxFactor, NumLoadElements))
return false;
// Holds the corresponding index for each DE-interleave shuffle.
Modified: llvm/trunk/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll?rev=357212&r1=357211&r2=357212&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll (original)
+++ llvm/trunk/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll Thu Mar 28 13:44:50 2019
@@ -352,9 +352,9 @@ define void @store_undef_mask_factor4(<1
ret void
}
-define void @load_address_space(<4 x i32> addrspace(1)* %ptr) {
+define void @load_address_space(<8 x i32> addrspace(1)* %ptr) {
; NEON-LABEL: @load_address_space(
-; NEON-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> addrspace(1)* %ptr to i8 addrspace(1)*
+; NEON-NEXT: [[TMP1:%.*]] = bitcast <8 x i32> addrspace(1)* %ptr to i8 addrspace(1)*
; NEON-NEXT: [[VLDN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p1i8(i8 addrspace(1)* [[TMP1]], i32 0)
; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 2
; NEON-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 1
@@ -364,10 +364,10 @@ define void @load_address_space(<4 x i32
; NO_NEON-NOT: @llvm.arm.neon
; NO_NEON: ret void
;
- %interleaved.vec = load <4 x i32>, <4 x i32> addrspace(1)* %ptr
- %v0 = shufflevector <4 x i32> %interleaved.vec, <4 x i32> undef, <2 x i32> <i32 0, i32 3>
- %v1 = shufflevector <4 x i32> %interleaved.vec, <4 x i32> undef, <2 x i32> <i32 1, i32 4>
- %v2 = shufflevector <4 x i32> %interleaved.vec, <4 x i32> undef, <2 x i32> <i32 2, i32 5>
+ %interleaved.vec = load <8 x i32>, <8 x i32> addrspace(1)* %ptr
+ %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <2 x i32> <i32 0, i32 3>
+ %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <2 x i32> <i32 1, i32 4>
+ %v2 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <2 x i32> <i32 2, i32 5>
ret void
}
@@ -883,3 +883,16 @@ define void @load_factor2_wide_pointer(<
%v1 = shufflevector <16 x i32*> %interleaved.vec, <16 x i32*> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
ret void
}
+
+; This would be a candidate for interleaving, except that load doesn't
+; actually load enough elements to satisfy the shuffle masks. (It would be
+; possible to produce a vld2.v2i32, but that currently isn't implemented.)
+define void @load_out_of_range(<4 x i32>* %ptr) {
+; ALL-LABEL: @load_out_of_range(
+; ALL-NOT: @llvm.arm.neon
+; ALL: ret void
+ %interleaved.vec = load <4 x i32>, <4 x i32>* %ptr, align 4
+ %v0 = shufflevector <4 x i32> %interleaved.vec, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+ %v1 = shufflevector <4 x i32> %interleaved.vec, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+ ret void
+}
More information about the llvm-commits
mailing list