[PATCH] Adjust the cost of vectorized SHL/SRL/SRA

Wei Mi wmi at google.com
Tue May 26 14:20:22 PDT 2015


I updated the patch.

The cost of vectorized shift with uniform scalar shift amount is adjusted. lowerShift has already contained the logic to lower such shift to ISD::VSHL/VSRL/VSRA properly.

It works for the motivational case in PR23582. llvm unittest passes.


REPOSITORY
  rL LLVM

http://reviews.llvm.org/D9923

Files:
  lib/Target/X86/X86TargetTransformInfo.cpp
  lib/Transforms/Vectorize/LoopVectorize.cpp
  test/Transforms/LoopVectorize/uniform-shift.ll

Index: lib/Target/X86/X86TargetTransformInfo.cpp
===================================================================
--- lib/Target/X86/X86TargetTransformInfo.cpp
+++ lib/Target/X86/X86TargetTransformInfo.cpp
@@ -223,6 +223,25 @@
       return LT.first * SSE2UniformConstCostTable[Idx].Cost;
   }
 
+  static const CostTblEntry<MVT::SimpleValueType> SSE2UniformCostTable[] = {
+    { ISD::SHL,  MVT::v8i16,  1 }, // psllw.
+    { ISD::SHL,  MVT::v4i32,  1 }, // pslld
+    { ISD::SHL,  MVT::v2i64,  1 }, // psllq.
+
+    { ISD::SRL,  MVT::v8i16,  1 }, // psrlw.
+    { ISD::SRL,  MVT::v4i32,  1 }, // psrld.
+    { ISD::SRL,  MVT::v2i64,  1 }, // psrlq.
+
+    { ISD::SRA,  MVT::v8i16,  1 }, // psraw.
+    { ISD::SRA,  MVT::v4i32,  1 }, // psrad.
+  };
+
+  if (Op2Info == TargetTransformInfo::OK_UniformValue && ST->hasSSE2()) {
+    int Idx = CostTableLookup(SSE2UniformCostTable, ISD, LT.second);
+    if (Idx != -1)
+      return LT.first * SSE2UniformCostTable[Idx].Cost;
+  }
+
   if (ISD == ISD::SHL &&
       Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) {
     EVT VT = LT.second;
Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4535,6 +4535,10 @@
           Op2VP = TargetTransformInfo::OP_PowerOf2;
         Op2VK = TargetTransformInfo::OK_UniformConstantValue;
       }
+    } else if (SE->isSCEVable(Op2->getType())) {
+      const SCEV *Op2SCEV = SE->getSCEV(Op2);
+      if (SE->isLoopInvariant(Op2SCEV, TheLoop))
+        Op2VK = TargetTransformInfo::OK_UniformValue;
     }
 
     return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, Op2VK,
Index: test/Transforms/LoopVectorize/uniform-shift.ll
===================================================================
--- test/Transforms/LoopVectorize/uniform-shift.ll
+++ test/Transforms/LoopVectorize/uniform-shift.ll
@@ -0,0 +1,39 @@
+; PR23582
+; RUN: opt < %s -basicaa -loop-vectorize -force-vector-interleave=1 -dce -instcombine -simplifycfg -S | llc | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at k = common global i32 0, align 4
+ at A1 = common global [1024 x i32] zeroinitializer, align 16
+ at B1 = common global [1024 x i32] zeroinitializer, align 16
+ at C1 = common global [1024 x i32] zeroinitializer, align 16
+
+; This test checks that loop vectorizer will generate uniform vshift.
+; CHECK-LABEL: kernel1:
+; CHECK: [[LOOP:^[a-zA-Z0-9_.]+]]:
+; CHECK: movdqa {{.*}}, [[REG:%xmm[0-7]]]
+; CHECK-NEXT: psrad {{%xmm[0-7]}}, [[REG]]
+; CHECK-NEXT: movdqa [[REG]], {{.*}}
+; CHECK-NEXT: addq $16, {{%[a-z0-9]+}}
+; CHECK-NEXT: jne [[LOOP]]
+
+define void @kernel1() {
+entry:
+  %tmp = load i32, i32* @k, align 4
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @B1, i64 0, i64 %indvars.iv
+  %tmp1 = load i32, i32* %arrayidx, align 4
+  %shr = ashr i32 %tmp1, %tmp
+  %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A1, i64 0, i64 %indvars.iv
+  store i32 %shr, i32* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}

EMAIL PREFERENCES
  http://reviews.llvm.org/settings/panel/emailpreferences/
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D9923.26536.patch
Type: text/x-patch
Size: 3591 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150526/bfbd3ee3/attachment.bin>


More information about the llvm-commits mailing list