[llvm] r343931 - [IAI, LV] Avoid creating interleave-groups for predicated accesse
Dorit Nuzman via llvm-commits
llvm-commits at lists.llvm.org
Sat Oct 6 23:57:26 PDT 2018
Author: dorit
Date: Sat Oct 6 23:57:25 2018
New Revision: 343931
URL: http://llvm.org/viewvc/llvm-project?rev=343931&view=rev
Log:
[IAI,LV] Avoid creating interleave-groups for predicated accesse
This patch fixes PR39099.
When strided loads are predicated, each of them will form an interleaved-group
(with gaps). However, subsequent stages of vectorization (planning and
transformation) assume that if a load is part of an Interleave-Group it is not
predicated, resulting in wrong code - unmasked wide loads are created.
The Interleaving Analysis does take care not to have conditional interleave
groups of size > 1, but until we extend the planning and transformation stages
to support masked-interleave-groups we should also avoid having them for
size == 1.
Reviewers: Ayal, hsaito, dcaballe, fhahn
Reviewed By: Ayal
Differential Revision: https://reviews.llvm.org/D52682
Added:
llvm/trunk/test/Transforms/LoopVectorize/X86/x86-pr39099.ll
llvm/trunk/test/Transforms/LoopVectorize/pr39099.ll
Modified:
llvm/trunk/lib/Analysis/VectorUtils.cpp
Modified: llvm/trunk/lib/Analysis/VectorUtils.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/VectorUtils.cpp?rev=343931&r1=343930&r2=343931&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/VectorUtils.cpp (original)
+++ llvm/trunk/lib/Analysis/VectorUtils.cpp Sat Oct 6 23:57:25 2018
@@ -712,7 +712,9 @@ void InterleavedAccessInfo::analyzeInter
// create a group for B, we continue with the bottom-up algorithm to ensure
// we don't break any of B's dependences.
InterleaveGroup *Group = nullptr;
- if (isStrided(DesB.Stride)) {
+ // TODO: Ignore B if it is in a predicated block. This restriction can be
+ // relaxed in the future once we handle masked interleaved groups.
+ if (isStrided(DesB.Stride) && !isPredicated(B->getParent())) {
Group = getInterleaveGroup(B);
if (!Group) {
LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
Added: llvm/trunk/test/Transforms/LoopVectorize/X86/x86-pr39099.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/x86-pr39099.ll?rev=343931&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/X86/x86-pr39099.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/X86/x86-pr39099.ll Sat Oct 6 23:57:25 2018
@@ -0,0 +1,60 @@
+; RUN: opt -mcpu=skx -S -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+
+; This test checks the fix for PR39099.
+;
+; Check that the predicated load is not vectorized as an
+; interleaved-group (which requires proper masking, currently unsupported)
+; but rather as a scalarized accesses.
+; (For SKX, Gather is not supported by the compiler for chars, therefore
+; the only remaining alternative is to scalarize).
+;
+; void masked_strided(const unsigned char* restrict p,
+; unsigned char* restrict q,
+; unsigned char guard) {
+; for(ix=0; ix < 1024; ++ix) {
+; if (ix > guard) {
+; char t = p[2*ix];
+; q[ix] = t;
+; }
+; }
+; }
+
+;CHECK-LABEL: @masked_strided(
+;CHECK: vector.body:
+;CHECK-NEXT: %index = phi i32
+;CHECK-NEXT: %[[VECIND:.+]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+;CHECK-NEXT: %[[VMASK:.+]] = icmp ugt <8 x i32> %[[VECIND]], %{{broadcast.splat*}}
+;CHECK-NEXT: %{{.*}} = shl nuw nsw <8 x i32> %[[VECIND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+;CHECK-NEXT: %[[M:.+]] = extractelement <8 x i1> %[[VMASK]], i32 0
+;CHECK-NEXT: br i1 %[[M]], label %pred.load.if, label %pred.load.continue
+;CHECK-NOT: %[[WIDEVEC:.+]] = load <16 x i8>, <16 x i8>* %{{.*}}, align 1
+;CHECK-NOT: %{{.*}} = shufflevector <16 x i8> %[[WIDEVEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+
+define dso_local void @masked_strided(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr {
+entry:
+ %conv = zext i8 %guard to i32
+ br label %for.body
+
+for.body:
+ %ix.09 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %cmp1 = icmp ugt i32 %ix.09, %conv
+ br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:
+ %mul = shl nuw nsw i32 %ix.09, 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
+ %0 = load i8, i8* %arrayidx, align 1
+ %arrayidx3 = getelementptr inbounds i8, i8* %q, i32 %ix.09
+ store i8 %0, i8* %arrayidx3, align 1
+ br label %for.inc
+
+for.inc:
+ %inc = add nuw nsw i32 %ix.09, 1
+ %exitcond = icmp eq i32 %inc, 1024
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
Added: llvm/trunk/test/Transforms/LoopVectorize/pr39099.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/pr39099.ll?rev=343931&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/pr39099.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/pr39099.ll Sat Oct 6 23:57:25 2018
@@ -0,0 +1,42 @@
+; REQUIRES: asserts
+; RUN: opt -S -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses -debug-only=loop-vectorize,vectorutils -disable-output < %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+
+; Ensure that we don't create interleave groups for predicated
+; strided accesses.
+
+; CHECK: LV: Checking a loop in "masked_strided"
+; CHECK: LV: Analyzing interleaved accesses...
+; CHECK-NOT: LV: Creating an interleave group
+
+define dso_local void @masked_strided(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr {
+entry:
+ %conv = zext i8 %guard to i32
+ br label %for.body
+
+for.body:
+ %ix.017 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %cmp1 = icmp ugt i32 %ix.017, %conv
+ br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:
+ %mul = shl nuw nsw i32 %ix.017, 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
+ %0 = load i8, i8* %arrayidx, align 1
+ %arrayidx4 = getelementptr inbounds i8, i8* %q, i32 %mul
+ store i8 %0, i8* %arrayidx4, align 1
+ %sub = sub i8 0, %0
+ %add = or i32 %mul, 1
+ %arrayidx8 = getelementptr inbounds i8, i8* %q, i32 %add
+ store i8 %sub, i8* %arrayidx8, align 1
+ br label %for.inc
+
+for.inc:
+ %inc = add nuw nsw i32 %ix.017, 1
+ %exitcond = icmp eq i32 %inc, 1024
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
More information about the llvm-commits
mailing list