[PATCH] D26905: [SLP] Vectorize loads of consecutive memory accesses, accessed in non-consecutive (jumbled) way.
Simon Pilgrim via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 30 13:13:11 PST 2016
RKSimon added inline comments.
================
Comment at: lib/Analysis/LoopAccessAnalysis.cpp:1029
+ unsigned PtrBitWidth = DL.getPointerSizeInBits(AS);
+ Type *Ty = cast<PointerType>(Ptr->getType())->getElementType();
+ APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty));
----------------
You are casting to PointerType and then only using it as a Type.
================
Comment at: lib/Transforms/Vectorize/SLPVectorizer.cpp:1257
Type *SrcTy = VL0->getOperand(0)->getType();
for (unsigned i = 0; i < VL.size(); ++i) {
Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType();
----------------
for (unsigned i = 0, e = VL.size(); i < e; ++i) {
================
Comment at: lib/Transforms/Vectorize/SLPVectorizer.cpp:1352
// We don't combine GEPs with complicated (nested) indexing.
for (unsigned j = 0; j < VL.size(); ++j) {
if (cast<Instruction>(VL[j])->getNumOperands() != 2) {
----------------
for (unsigned j = 0, e = VL.size(); j < e; ++j) {
================
Comment at: lib/Transforms/Vectorize/SLPVectorizer.cpp:1364
Type *Ty0 = cast<Instruction>(VL0)->getOperand(0)->getType();
for (unsigned j = 0; j < VL.size(); ++j) {
Type *CurTy = cast<Instruction>(VL[j])->getOperand(0)->getType();
----------------
for (unsigned j = 0, e = VL.size(); j < e; ++j) {
================
Comment at: lib/Transforms/Vectorize/SLPVectorizer.cpp:1375
// We don't combine GEPs with non-constant indexes.
for (unsigned j = 0; j < VL.size(); ++j) {
auto Op = cast<Instruction>(VL[j])->getOperand(1);
----------------
for (unsigned j = 0, e = VL.size(); j < e; ++j) {
================
Comment at: lib/Transforms/Vectorize/SLPVectorizer.cpp:2567
+ SmallVector<Constant *, 8> Mask;
+ for (unsigned i = 0; i < VecTy->getNumElements(); ++i) {
+ if (ScalarToTreeEntry.count(VL[i])) {
----------------
for (unsigned i = 0, e = VecTy->getNumElements(); i < e; ++i) {
================
Comment at: test/Transforms/SLPVectorizer/X86/reduction_loads.ll:20
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>, [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[TMP3:%.*]] = mul <8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>, [[TMP2]]
----------------
What can be done to avoid this regression?
https://reviews.llvm.org/D26905
More information about the llvm-commits
mailing list