[PATCH] Adjust the cost of vectorized SHL/SRL/SRA
Wei Mi
wmi at google.com
Tue May 26 14:20:22 PDT 2015
I updated the patch.
The cost of vectorized shift with uniform scalar shift amount is adjusted. lowerShift has already contained the logic to lower such shift to ISD::VSHL/VSRL/VSRA properly.
It works for the motivational case in PR23582. llvm unittest passes.
REPOSITORY
rL LLVM
http://reviews.llvm.org/D9923
Files:
lib/Target/X86/X86TargetTransformInfo.cpp
lib/Transforms/Vectorize/LoopVectorize.cpp
test/Transforms/LoopVectorize/uniform-shift.ll
Index: lib/Target/X86/X86TargetTransformInfo.cpp
===================================================================
--- lib/Target/X86/X86TargetTransformInfo.cpp
+++ lib/Target/X86/X86TargetTransformInfo.cpp
@@ -223,6 +223,25 @@
return LT.first * SSE2UniformConstCostTable[Idx].Cost;
}
+ static const CostTblEntry<MVT::SimpleValueType> SSE2UniformCostTable[] = {
+ { ISD::SHL, MVT::v8i16, 1 }, // psllw.
+ { ISD::SHL, MVT::v4i32, 1 }, // pslld
+ { ISD::SHL, MVT::v2i64, 1 }, // psllq.
+
+ { ISD::SRL, MVT::v8i16, 1 }, // psrlw.
+ { ISD::SRL, MVT::v4i32, 1 }, // psrld.
+ { ISD::SRL, MVT::v2i64, 1 }, // psrlq.
+
+ { ISD::SRA, MVT::v8i16, 1 }, // psraw.
+ { ISD::SRA, MVT::v4i32, 1 }, // psrad.
+ };
+
+ if (Op2Info == TargetTransformInfo::OK_UniformValue && ST->hasSSE2()) {
+ int Idx = CostTableLookup(SSE2UniformCostTable, ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * SSE2UniformCostTable[Idx].Cost;
+ }
+
if (ISD == ISD::SHL &&
Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) {
EVT VT = LT.second;
Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4535,6 +4535,10 @@
Op2VP = TargetTransformInfo::OP_PowerOf2;
Op2VK = TargetTransformInfo::OK_UniformConstantValue;
}
+ } else if (SE->isSCEVable(Op2->getType())) {
+ const SCEV *Op2SCEV = SE->getSCEV(Op2);
+ if (SE->isLoopInvariant(Op2SCEV, TheLoop))
+ Op2VK = TargetTransformInfo::OK_UniformValue;
}
return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, Op2VK,
Index: test/Transforms/LoopVectorize/uniform-shift.ll
===================================================================
--- test/Transforms/LoopVectorize/uniform-shift.ll
+++ test/Transforms/LoopVectorize/uniform-shift.ll
@@ -0,0 +1,39 @@
+; PR23582
+; RUN: opt < %s -basicaa -loop-vectorize -force-vector-interleave=1 -dce -instcombine -simplifycfg -S | llc | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at k = common global i32 0, align 4
+ at A1 = common global [1024 x i32] zeroinitializer, align 16
+ at B1 = common global [1024 x i32] zeroinitializer, align 16
+ at C1 = common global [1024 x i32] zeroinitializer, align 16
+
+; This test checks that loop vectorizer will generate uniform vshift.
+; CHECK-LABEL: kernel1:
+; CHECK: [[LOOP:^[a-zA-Z0-9_.]+]]:
+; CHECK: movdqa {{.*}}, [[REG:%xmm[0-7]]]
+; CHECK-NEXT: psrad {{%xmm[0-7]}}, [[REG]]
+; CHECK-NEXT: movdqa [[REG]], {{.*}}
+; CHECK-NEXT: addq $16, {{%[a-z0-9]+}}
+; CHECK-NEXT: jne [[LOOP]]
+
+define void @kernel1() {
+entry:
+ %tmp = load i32, i32* @k, align 4
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @B1, i64 0, i64 %indvars.iv
+ %tmp1 = load i32, i32* %arrayidx, align 4
+ %shr = ashr i32 %tmp1, %tmp
+ %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A1, i64 0, i64 %indvars.iv
+ store i32 %shr, i32* %arrayidx2, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
EMAIL PREFERENCES
http://reviews.llvm.org/settings/panel/emailpreferences/
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D9923.26536.patch
Type: text/x-patch
Size: 3591 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150526/bfbd3ee3/attachment.bin>
More information about the llvm-commits
mailing list