[PATCH] D20601: [x86] make pointer extractions from a vector more expensive (PR27826)
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Wed May 25 10:34:13 PDT 2016
This revision was automatically updated to reflect the committed changes.
Closed by commit rL270729: [x86] avoid code explosion from LoopVectorizer for gather loop (PR27826) (authored by spatel).
Changed prior to commit:
http://reviews.llvm.org/D20601?vs=58323&id=58454#toc
Repository:
rL LLVM
http://reviews.llvm.org/D20601
Files:
llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/trunk/test/Transforms/LoopVectorize/X86/cost-model.ll
Index: llvm/trunk/test/Transforms/LoopVectorize/X86/cost-model.ll
===================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ llvm/trunk/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -39,3 +39,44 @@
for.end: ; preds = %for.body
ret void
}
+
+; This function uses a stride that is generally too big to benefit from vectorization without
+; really good support for a gather load. We were not computing an accurate cost for the
+; vectorization and subsequent scalarization of the pointer induction variables.
+
+define float @PR27826(float* nocapture readonly %a, float* nocapture readonly %b, i32 %n) {
+; CHECK-LABEL: @PR27826(
+; CHECK-NOT: <4 x float>
+; CHECK-NOT: <8 x float>
+; CHECK: ret float %s.0.lcssa
+
+entry:
+ %cmp = icmp sgt i32 %n, 0
+ br i1 %cmp, label %preheader, label %for.end
+
+preheader:
+ %t0 = sext i32 %n to i64
+ br label %for
+
+for:
+ %indvars.iv = phi i64 [ 0, %preheader ], [ %indvars.iv.next, %for ]
+ %s.02 = phi float [ 0.0, %preheader ], [ %add4, %for ]
+ %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
+ %t1 = load float, float* %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds float, float* %b, i64 %indvars.iv
+ %t2 = load float, float* %arrayidx3, align 4
+ %add = fadd fast float %t1, %s.02
+ %add4 = fadd fast float %add, %t2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 8
+ %cmp1 = icmp slt i64 %indvars.iv.next, %t0
+ br i1 %cmp1, label %for, label %loopexit
+
+loopexit:
+ %add4.lcssa = phi float [ %add4, %for ]
+ br label %for.end
+
+for.end:
+ %s.0.lcssa = phi float [ 0.0, %entry ], [ %add4.lcssa, %loopexit ]
+ ret float %s.0.lcssa
+}
+
Index: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -963,6 +963,8 @@
int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
assert(Val->isVectorTy() && "This must be a vector type");
+ Type *ScalarType = Val->getScalarType();
+
if (Index != -1U) {
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
@@ -976,11 +978,17 @@
Index = Index % Width;
// Floating point scalars are already located in index #0.
- if (Val->getScalarType()->isFloatingPointTy() && Index == 0)
+ if (ScalarType->isFloatingPointTy() && Index == 0)
return 0;
}
- return BaseT::getVectorInstrCost(Opcode, Val, Index);
+ // Add to the base cost if we know that the extracted element of a vector is
+ // destined to be moved to and used in the integer register file.
+ int RegisterFileMoveCost = 0;
+ if (Opcode == Instruction::ExtractElement && ScalarType->isPointerTy())
+ RegisterFileMoveCost = 1;
+
+ return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost;
}
int X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D20601.58454.patch
Type: text/x-patch
Size: 3126 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160525/3d3263a0/attachment.bin>
More information about the llvm-commits
mailing list