[llvm] r311285 - Changed basic cost of store operation on X86
Elena Demikhovsky via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 20 05:34:29 PDT 2017
Author: delena
Date: Sun Aug 20 05:34:29 2017
New Revision: 311285
URL: http://llvm.org/viewvc/llvm-project?rev=311285&view=rev
Log:
Changed basic cost of store operation on X86
Store operation takes 2 UOps on X86 processors. The exact cost calculation affects several optimization passes including loop unroling.
This change compensates performance degradation caused by https://reviews.llvm.org/D34458 and shows improvements on some benchmarks.
Differential Revision: https://reviews.llvm.org/D35888
Added:
llvm/trunk/test/Transforms/LoopUnroll/X86/store_cost.ll
Modified:
llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h
llvm/trunk/test/Transforms/LoopVectorize/X86/metadata-enable.ll
Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=311285&r1=311284&r2=311285&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Sun Aug 20 05:34:29 2017
@@ -2113,6 +2113,21 @@ int X86TTIImpl::getIntImmCost(Intrinsic:
return X86TTIImpl::getIntImmCost(Imm, Ty);
}
+unsigned X86TTIImpl::getUserCost(const User *U,
+ ArrayRef<const Value *> Operands) {
+ if (isa<StoreInst>(U)) {
+ Value *Ptr = U->getOperand(1);
+ // Store instruction with index and scale costs 2 Uops.
+ // Check the preceding GEP to identify non-const indices.
+ if (auto GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
+ if (!all_of(GEP->indices(), [](Value *V) { return isa<Constant>(V); }))
+ return TTI::TCC_Basic * 2;
+ }
+ return TTI::TCC_Basic;
+ }
+ return BaseT::getUserCost(U, Operands);
+}
+
// Return an average cost of Gather / Scatter instruction, maybe improved later
int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
unsigned Alignment, unsigned AddressSpace) {
Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h?rev=311285&r1=311284&r2=311285&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h Sun Aug 20 05:34:29 2017
@@ -102,6 +102,8 @@ public:
int getIntImmCost(const APInt &Imm, Type *Ty);
+ unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
+
int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty);
Added: llvm/trunk/test/Transforms/LoopUnroll/X86/store_cost.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/X86/store_cost.ll?rev=311285&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/X86/store_cost.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/X86/store_cost.ll Sun Aug 20 05:34:29 2017
@@ -0,0 +1,104 @@
+; REQUIRES: asserts
+; RUN: opt -mcpu=core-avx2 -loop-unroll --debug-only=loop-unroll -S -unroll-allow-partial < %s 2>&1 | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: Loop Unroll: F[foo] Loop %loop.2.header
+; CHECK: Loop Size = 27
+; CHECK-NOT: UNROLLING loop %loop.2.header
+; CHECK: Loop Unroll: F[foo] Loop %loop.header
+; CHECK: Loop Size = 25
+; CHECK: UNROLLING loop %loop.header by 2
+
+define void @foo(i32 * %out) {
+entry:
+ %0 = alloca [1024 x i32]
+ %x0 = alloca [1024 x i32]
+ %x01 = alloca [1024 x i32]
+ %x02 = alloca [1024 x i32]
+ %x03 = alloca [1024 x i32]
+ %x04 = alloca [1024 x i32]
+ %x05 = alloca [1024 x i32]
+ %x06 = alloca [1024 x i32]
+ br label %loop.header
+
+loop.header:
+ %counter = phi i32 [0, %entry], [%inc, %loop.inc]
+ br label %loop.body
+
+loop.body:
+ %ptr = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 %counter
+ store i32 %counter, i32* %ptr
+ %val = add i32 %counter, 5
+ %xptr = getelementptr [1024 x i32], [1024 x i32]* %x0, i32 0, i32 %counter
+ store i32 %val, i32* %xptr
+ %val1 = add i32 %counter, 6
+ %xptr1 = getelementptr [1024 x i32], [1024 x i32]* %x01, i32 0, i32 %counter
+ store i32 %val1, i32* %xptr1
+ %val2 = add i32 %counter, 7
+ %xptr2 = getelementptr [1024 x i32], [1024 x i32]* %x02, i32 0, i32 %counter
+ store i32 %val2, i32* %xptr2
+ %val3 = add i32 %counter, 8
+ %xptr3 = getelementptr [1024 x i32], [1024 x i32]* %x03, i32 0, i32 %counter
+ store i32 %val3, i32* %xptr3
+ %val4 = add i32 %counter, 9
+ %xptr4 = getelementptr [1024 x i32], [1024 x i32]* %x04, i32 0, i32 %counter
+ store i32 %val4, i32* %xptr4
+ %val5 = add i32 %counter, 10
+ %xptr5 = getelementptr [1024 x i32], [1024 x i32]* %x05, i32 0, i32 %counter
+ store i32 %val5, i32* %xptr5
+ br label %loop.inc
+
+loop.inc:
+ %inc = add i32 %counter, 2
+ %1 = icmp sge i32 %inc, 1023
+ br i1 %1, label %exit.0, label %loop.header
+
+exit.0:
+ %2 = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 5
+ %3 = load i32, i32* %2
+ store i32 %3, i32 * %out
+ br label %loop.2.header
+
+
+loop.2.header:
+ %counter.2 = phi i32 [0, %exit.0], [%inc.2, %loop.2.inc]
+ br label %loop.2.body
+
+loop.2.body:
+ %ptr.2 = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 %counter.2
+ store i32 %counter.2, i32* %ptr.2
+ %val.2 = add i32 %counter.2, 5
+ %xptr.2 = getelementptr [1024 x i32], [1024 x i32]* %x0, i32 0, i32 %counter.2
+ store i32 %val.2, i32* %xptr.2
+ %val1.2 = add i32 %counter.2, 6
+ %xptr1.2 = getelementptr [1024 x i32], [1024 x i32]* %x01, i32 0, i32 %counter.2
+ store i32 %val1, i32* %xptr1.2
+ %val2.2 = add i32 %counter.2, 7
+ %xptr2.2 = getelementptr [1024 x i32], [1024 x i32]* %x02, i32 0, i32 %counter.2
+ store i32 %val2, i32* %xptr2.2
+ %val3.2 = add i32 %counter.2, 8
+ %xptr3.2 = getelementptr [1024 x i32], [1024 x i32]* %x03, i32 0, i32 %counter.2
+ store i32 %val3.2, i32* %xptr3.2
+ %val4.2 = add i32 %counter.2, 9
+ %xptr4.2 = getelementptr [1024 x i32], [1024 x i32]* %x04, i32 0, i32 %counter.2
+ store i32 %val4.2, i32* %xptr4.2
+ %val5.2 = add i32 %counter.2, 10
+ %xptr5.2 = getelementptr [1024 x i32], [1024 x i32]* %x05, i32 0, i32 %counter.2
+ store i32 %val5.2, i32* %xptr5.2
+ %xptr6.2 = getelementptr [1024 x i32], [1024 x i32]* %x06, i32 0, i32 %counter.2
+ store i32 %val5.2, i32* %xptr6.2
+ br label %loop.2.inc
+
+loop.2.inc:
+ %inc.2 = add i32 %counter.2, 2
+ %4 = icmp sge i32 %inc.2, 1023
+ br i1 %4, label %exit.2, label %loop.2.header
+
+exit.2:
+ %x2 = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 6
+ %x3 = load i32, i32* %x2
+ %out2 = getelementptr i32, i32 * %out, i32 1
+ store i32 %3, i32 * %out2
+ ret void
+}
Modified: llvm/trunk/test/Transforms/LoopVectorize/X86/metadata-enable.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/metadata-enable.ll?rev=311285&r1=311284&r2=311285&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/X86/metadata-enable.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/X86/metadata-enable.ll Sun Aug 20 05:34:29 2017
@@ -172,7 +172,7 @@ for.body:
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
store i32 %add, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp eq i64 %indvars.iv.next, 64
+ %exitcond = icmp eq i64 %indvars.iv.next, 48
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
for.end: ; preds = %for.body
More information about the llvm-commits
mailing list