[PATCH] D26905: [SLP] Vectorize loads of consecutive memory accesses, accessed in non-consecutive (jumbled) way.

Wed Nov 30 13:13:11 PST 2016

RKSimon added inline comments.

================
Comment at: lib/Analysis/LoopAccessAnalysis.cpp:1029
+    unsigned PtrBitWidth = DL.getPointerSizeInBits(AS);
+    Type *Ty = cast<PointerType>(Ptr->getType())->getElementType();
+    APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty));
----------------
You are casting to PointerType and then only using it as a Type.

================
Comment at: lib/Transforms/Vectorize/SLPVectorizer.cpp:1257
       Type *SrcTy = VL0->getOperand(0)->getType();
       for (unsigned i = 0; i < VL.size(); ++i) {
         Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType();
----------------
for (unsigned i = 0, e = VL.size(); i < e; ++i) {

================
Comment at: lib/Transforms/Vectorize/SLPVectorizer.cpp:1352
       // We don't combine GEPs with complicated (nested) indexing.
       for (unsigned j = 0; j < VL.size(); ++j) {
         if (cast<Instruction>(VL[j])->getNumOperands() != 2) {
----------------
for (unsigned j = 0, e = VL.size(); j < e; ++j) {

================
Comment at: lib/Transforms/Vectorize/SLPVectorizer.cpp:1364
       Type *Ty0 = cast<Instruction>(VL0)->getOperand(0)->getType();
       for (unsigned j = 0; j < VL.size(); ++j) {
         Type *CurTy = cast<Instruction>(VL[j])->getOperand(0)->getType();
----------------
for (unsigned j = 0, e = VL.size(); j < e; ++j) {

================
Comment at: lib/Transforms/Vectorize/SLPVectorizer.cpp:1375
       // We don't combine GEPs with non-constant indexes.
       for (unsigned j = 0; j < VL.size(); ++j) {
         auto Op = cast<Instruction>(VL[j])->getOperand(1);
----------------
for (unsigned j = 0, e = VL.size(); j < e; ++j) {

================
Comment at: lib/Transforms/Vectorize/SLPVectorizer.cpp:2567
+        SmallVector<Constant *, 8> Mask;
+        for (unsigned i = 0; i < VecTy->getNumElements(); ++i) {
+          if (ScalarToTreeEntry.count(VL[i])) {
----------------
for (unsigned i = 0, e = VecTy->getNumElements(); i < e; ++i) {

================
Comment at: test/Transforms/SLPVectorizer/X86/reduction_loads.ll:20
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = mul <8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>, [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP3:%.*]] = mul <8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>, [[TMP2]]
----------------
What can be done to avoid this regression?

https://reviews.llvm.org/D26905