[PATCH] D13277: [SLP] Don't vectorize loads of non-packed types (like i1, i2).

Michael Zolotukhin via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 29 18:29:49 PDT 2015


mzolotukhin created this revision.
mzolotukhin added reviewers: aschwaighofer, nadav, hfinkel.
mzolotukhin added a subscriber: llvm-commits.

Given an array of i2 elements, 4 consecutive scalar loads will be lowered to
i8-sized loads and thus will access 4 consecutive bytes in memory. If we
vectorize these loads into a single <4 x i2> load, it'll access only 1 byte in
memory. Hence, we should prohibit vectorization in such cases.

http://reviews.llvm.org/D13277

Files:
  lib/Transforms/Vectorize/SLPVectorizer.cpp
  test/Transforms/SLPVectorizer/X86/bad_types.ll

Index: test/Transforms/SLPVectorizer/X86/bad_types.ll
===================================================================
--- test/Transforms/SLPVectorizer/X86/bad_types.ll
+++ test/Transforms/SLPVectorizer/X86/bad_types.ll
@@ -47,4 +47,30 @@
   ret void
 }
 
+define i8 @test3(i8 *%addr) {
+; Check that we do not vectorize types that are padded to a bigger ones.
+;
+; CHECK-LABEL: @test3
+; CHECK-NOT:   <4 x i2>
+; CHECK:       ret i8
+entry:
+  %a = bitcast i8* %addr to i2*
+  %a0 = getelementptr inbounds i2, i2* %a, i64 0
+  %a1 = getelementptr inbounds i2, i2* %a, i64 1
+  %a2 = getelementptr inbounds i2, i2* %a, i64 2
+  %a3 = getelementptr inbounds i2, i2* %a, i64 3
+  %l0 = load i2, i2* %a0, align 1
+  %l1 = load i2, i2* %a1, align 1
+  %l2 = load i2, i2* %a2, align 1
+  %l3 = load i2, i2* %a3, align 1
+  br label %bb1
+bb1:                                              ; preds = %entry
+  %p0 = phi i2 [ %l0, %entry ]
+  %p1 = phi i2 [ %l1, %entry ]
+  %p2 = phi i2 [ %l2, %entry ]
+  %p3 = phi i2 [ %l3, %entry ]
+  %r  = zext i2 %p2 to i8
+  ret i8 %r
+}
+
 declare void @f(i64, i64)
Index: lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1128,6 +1128,23 @@
       return;
     }
     case Instruction::Load: {
+      // Check that a vectorized load would load the same memory as a scalar
+      // load.
+      // For example we don't want vectorize loads that are smaller than 8 bit.
+      // Even though we have a packed struct {<i2, i2, i2, i2>} LLVM treats
+      // loading/storing it as an i8 struct. If we vectorize loads/stores from
+      // such a struct we read/write packed bits disagreeing with the
+      // unvectorized version.
+      const DataLayout &DL = F->getParent()->getDataLayout();
+      Type *ScalarTy = VL[0]->getType();
+
+      if (DL.getTypeSizeInBits(ScalarTy) !=
+          DL.getTypeAllocSizeInBits(ScalarTy)) {
+        BS.cancelScheduling(VL);
+        newTreeEntry(VL, false);
+        DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
+        return;
+      }
       // Check if the loads are consecutive or of we need to swizzle them.
       for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
         LoadInst *L = cast<LoadInst>(VL[i]);
@@ -1137,7 +1154,7 @@
           DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
           return;
         }
-        const DataLayout &DL = F->getParent()->getDataLayout();
+
         if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) {
           if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL)) {
             ++NumLoadsWantToChangeOrder;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D13277.36059.patch
Type: text/x-patch
Size: 2739 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150930/a96315d7/attachment.bin>


More information about the llvm-commits mailing list