[llvm] r179975 - SLPVectorize: Add support for vectorization of casts.

Nadav Rotem nrotem at apple.com
Sun Apr 21 01:06:00 PDT 2013


Author: nadav
Date: Sun Apr 21 03:05:59 2013
New Revision: 179975

URL: http://llvm.org/viewvc/llvm-project?rev=179975&view=rev
Log:
SLPVectorize: Add support for vectorization of casts.


Added:
    llvm/trunk/test/Transforms/SLPVectorizer/X86/cast.ll
Modified:
    llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp

Modified: llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp?rev=179975&r1=179974&r2=179975&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp Sun Apr 21 03:05:59 2013
@@ -328,6 +328,18 @@ void BoUpSLP::getTreeUses_rec(ArrayRef<V
   }
 
   switch (Opcode) {
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::FPExt:
+    case Instruction::PtrToInt:
+    case Instruction::IntToPtr:
+    case Instruction::SIToFP:
+    case Instruction::UIToFP:
+    case Instruction::Trunc:
+    case Instruction::FPTrunc:
+    case Instruction::BitCast:
     case Instruction::Add:
     case Instruction::FAdd:
     case Instruction::Sub:
@@ -445,6 +457,41 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Va
   }
 
   switch (Opcode) {
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::FPExt:
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+  case Instruction::SIToFP:
+  case Instruction::UIToFP:
+  case Instruction::Trunc:
+  case Instruction::FPTrunc:
+  case Instruction::BitCast: {
+    int Cost = 0;
+    ValueList Operands;
+    Type *SrcTy = VL0->getOperand(0)->getType();
+    // Prepare the operand vector.
+    for (unsigned j = 0; j < VL.size(); ++j) {
+      Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
+      // Check that the casted type is the same for all users.
+      if (cast<Instruction>(VL[j])->getOperand(0)->getType() != SrcTy)
+        return getScalarizationCost(VecTy);
+    }
+
+    Cost += getTreeCost_rec(Operands, Depth+1);
+    if (Cost >= max_cost) return max_cost;
+
+    // Calculate the cost of this instruction.
+    int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),
+                                                       VL0->getType(), SrcTy);
+
+    VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
+    int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy);
+    Cost += (VecCost - ScalarCost);
+    return Cost;
+  }
   case Instruction::Add:
   case Instruction::FAdd:
   case Instruction::Sub:
@@ -583,6 +630,28 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayR
   }
 
   switch (Opcode) {
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::FPExt:
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+  case Instruction::SIToFP:
+  case Instruction::UIToFP:
+  case Instruction::Trunc:
+  case Instruction::FPTrunc:
+  case Instruction::BitCast: {
+    ValueList INVL;
+    for (int i = 0; i < VF; ++i)
+      INVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
+    Value *InVec = vectorizeTree_rec(INVL, VF);
+    IRBuilder<> Builder(GetLastInstr(VL, VF));
+    CastInst *CI = dyn_cast<CastInst>(VL0);
+    Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
+    VectorizedValues[VL0] = V;
+    return V;
+  }
   case Instruction::Add:
   case Instruction::FAdd:
   case Instruction::Sub:

Added: llvm/trunk/test/Transforms/SLPVectorizer/X86/cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/cast.ll?rev=179975&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/cast.ll (added)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/cast.ll Sun Apr 21 03:05:59 2013
@@ -0,0 +1,38 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; int foo(int * restrict A, char * restrict B) {
+;     A[0] = B[0];
+;     A[1] = B[1];
+;     A[2] = B[2];
+;     A[3] = B[3];
+; }
+;CHECK: @foo
+;CHECK: load <4 x i8>
+;CHECK: sext
+;CHECK: store <4 x i32>
+define i32 @foo(i32* noalias nocapture %A, i8* noalias nocapture %B) {
+entry:
+  %0 = load i8* %B, align 1
+  %conv = sext i8 %0 to i32
+  store i32 %conv, i32* %A, align 4
+  %arrayidx2 = getelementptr inbounds i8* %B, i64 1
+  %1 = load i8* %arrayidx2, align 1
+  %conv3 = sext i8 %1 to i32
+  %arrayidx4 = getelementptr inbounds i32* %A, i64 1
+  store i32 %conv3, i32* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i8* %B, i64 2
+  %2 = load i8* %arrayidx5, align 1
+  %conv6 = sext i8 %2 to i32
+  %arrayidx7 = getelementptr inbounds i32* %A, i64 2
+  store i32 %conv6, i32* %arrayidx7, align 4
+  %arrayidx8 = getelementptr inbounds i8* %B, i64 3
+  %3 = load i8* %arrayidx8, align 1
+  %conv9 = sext i8 %3 to i32
+  %arrayidx10 = getelementptr inbounds i32* %A, i64 3
+  store i32 %conv9, i32* %arrayidx10, align 4
+  ret i32 undef
+}
+





More information about the llvm-commits mailing list