[PATCH] D52685: [LoopVectorizer] Adjust heuristics for a truncated load

Tue Oct 9 08:25:56 PDT 2018

jonpa updated this revision to Diff 168809.
jonpa added a comment.

Ping!

Added test case which serves as a good example where the loop body has an i64 load which is truncated to i32 and stored. With VF=2, which is the maxVF on trunk since 128/64 is 2, the vectorized loop becomes:

  vl      %v0, 0(%r14)
  vpkg    %v0, %v0, %v0
  vsteg   %v0, 0(%r13), 0
  la      %r3, 2(%r3)
  la      %r13, 8(%r13)
  la      %r14, 16(%r14)
  cgrjlh  %r1, %r3, .LBB0_5

With this patch, VF 4 is also considered, which is in this case better and selected by the cost heuristics:

  vl      %v0, 16(%r14)
  vl      %v1, 0(%r14)
  vpkg    %v0, %v1, %v0
  vst     %v0, 0(%r13)
  la      %r3, 4(%r3)
  la      %r13, 16(%r13)
  la      %r14, 32(%r14)
  cgrjlh  %r1, %r3, .LBB0_5


https://reviews.llvm.org/D52685

Files:
  lib/Transforms/Vectorize/LoopVectorize.cpp
  test/Transforms/LoopVectorize/SystemZ/maxVF_truncload.ll


Index: test/Transforms/LoopVectorize/SystemZ/maxVF_truncload.ll
===================================================================

--- /dev/null
+++ test/Transforms/LoopVectorize/SystemZ/maxVF_truncload.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -mtriple=s390x-unknown-linux -mcpu=z13 -o - -loop-vectorize \
+; RUN:  -debug-only=loop-vectorize 2>&1 | FileCheck %s
+; REQUIRES: asserts
+;
+; CHECK: LV: The Smallest and Widest types: 32 / 32 bits.
+; CHECK: LV: Selecting VF: 4.
+
+define void @fun(i64 %n, i32 %v, i64* %ptr, i32* %dst) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %addr = getelementptr inbounds i64, i64* %ptr, i64 %i
+  %l64 = load i64, i64* %addr
+  %conv = trunc i64 %l64 to i32
+  %addr1 = getelementptr inbounds i32, i32* %dst, i64 %i
+  store i32 %conv, i32* %addr1
+  %iv.next = add nuw nsw i64 %i, 1
+  %cmp = icmp slt i64 %iv.next, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4692,6 +4692,11 @@
           !isAccessInterleaved(&I) && !isLegalGatherOrScatter(&I))
         continue;
 
+      // If the loaded value is truncated, consider the truncated type.
+      if (isa<LoadInst>(&I) && I.hasOneUse() &&
+         (isa<TruncInst>(*I.user_begin()) || isa<FPTruncInst>(*I.user_begin())))
+        T = (*I.user_begin())->getType();
+
       MinWidth = std::min(MinWidth,
                           (unsigned)DL.getTypeSizeInBits(T->getScalarType()));
       MaxWidth = std::max(MaxWidth,


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D52685.168809.patch
Type: text/x-patch
Size: 1717 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20181009/e4e093c9/attachment.bin>