[PATCH] D20754: [SLP] Pass in correct alignment when query memory access cost

Fri May 27 14:47:35 PDT 2016

Carrot created this revision.
Carrot added a subscriber: llvm-commits.
Herald added a subscriber: mzolotukhin.

This patch fixes bug https://llvm.org/bugs/show_bug.cgi?id=27897.

When query memory access cost, current SLP always passes in alignment value of 1 (unaligned), so it gets a very high cost of scalar memory access, and wrongly vectorize memory loads in the test case.

It can be fixed by simply giving correct alignment.

http://reviews.llvm.org/D20754

Files:
  lib/Transforms/Vectorize/SLPVectorizer.cpp
  test/Transforms/SLPVectorizer/PowerPC/lit.local.cfg
  test/Transforms/SLPVectorizer/PowerPC/pr27897.ll

Index: test/Transforms/SLPVectorizer/PowerPC/pr27897.ll
===================================================================

--- test/Transforms/SLPVectorizer/PowerPC/pr27897.ll
+++ test/Transforms/SLPVectorizer/PowerPC/pr27897.ll
@@ -0,0 +1,29 @@
+; RUN: opt -S -mtriple=powerpc64-linux-gnu -mcpu=pwr8 -mattr=+vsx -slp-vectorizer < %s | FileCheck %s
+
+%struct.A = type { i8*, i8* }
+
+define i64 @foo(%struct.A* nocapture readonly %this) {
+entry:
+  %end.i = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 1
+  %0 = bitcast i8** %end.i to i64*
+  %1 = load i64, i64* %0, align 8
+  %2 = bitcast %struct.A* %this to i64*
+  %3 = load i64, i64* %2, align 8
+  %sub.ptr.sub.i = sub i64 %1, %3
+  %cmp = icmp sgt i64 %sub.ptr.sub.i, 9
+  br i1 %cmp, label %return, label %lor.lhs.false
+
+lor.lhs.false:
+  %4 = inttoptr i64 %3 to i8*
+  %5 = inttoptr i64 %1 to i8*
+  %cmp2 = icmp ugt i8* %5, %4
+  %. = select i1 %cmp2, i64 2, i64 -1
+  ret i64 %.
+
+return:
+  ret i64 2
+}
+
+; CHECK: load i64
+; CHECK-NOT: load <2 x i64>
+; CHECK-NOT: extractelement
Index: test/Transforms/SLPVectorizer/PowerPC/lit.local.cfg
===================================================================
--- test/Transforms/SLPVectorizer/PowerPC/lit.local.cfg
+++ test/Transforms/SLPVectorizer/PowerPC/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'PowerPC' in config.root.targets:
+    config.unsupported = True
Index: lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1726,16 +1726,20 @@
     }
     case Instruction::Load: {
       // Cost of wide load - cost of scalar loads.
+      unsigned alignment = dyn_cast<LoadInst>(VL0)->getAlignment();
       int ScalarLdCost = VecTy->getNumElements() *
-      TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
-      int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, VecTy, 1, 0);
+            TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0);
+      int VecLdCost = TTI->getMemoryOpCost(Instruction::Load,
+                                           VecTy, alignment, 0);
       return VecLdCost - ScalarLdCost;
     }
     case Instruction::Store: {
       // We know that we can merge the stores. Calculate the cost.
+      unsigned alignment = dyn_cast<StoreInst>(VL0)->getAlignment();
       int ScalarStCost = VecTy->getNumElements() *
-      TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
-      int VecStCost = TTI->getMemoryOpCost(Instruction::Store, VecTy, 1, 0);
+            TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0);
+      int VecStCost = TTI->getMemoryOpCost(Instruction::Store,
+                                           VecTy, alignment, 0);
       return VecStCost - ScalarStCost;
     }
     case Instruction::Call: {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D20754.58843.patch
Type: text/x-patch
Size: 2892 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160527/a3729900/attachment.bin>