[llvm] r205855 - SLPVectorizer: Only vectorize intrinsics whose operands are widened equally
Arnold Schwaighofer
aschwaighofer at apple.com
Wed Apr 9 07:20:47 PDT 2014
Author: arnolds
Date: Wed Apr 9 09:20:47 2014
New Revision: 205855
URL: http://llvm.org/viewvc/llvm-project?rev=205855&view=rev
Log:
SLPVectorizer: Only vectorize intrinsics whose operands are widened equally
The vectorizer only knows how to vectorize intrinics by widening all operands by
the same factor.
Patch by Tyler Nowicki!
Added:
llvm/trunk/include/llvm/Transforms/Utils/VectorUtils.h
llvm/trunk/test/Transforms/SLPVectorizer/X86/non-vectorizable-intrinsic.ll
Modified:
llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
Added: llvm/trunk/include/llvm/Transforms/Utils/VectorUtils.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/VectorUtils.h?rev=205855&view=auto
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/VectorUtils.h (added)
+++ llvm/trunk/include/llvm/Transforms/Utils/VectorUtils.h Wed Apr 9 09:20:47 2014
@@ -0,0 +1,54 @@
+//===- llvm/Transforms/Utils/VectorUtils.h - Vector utilities -*- C++ -*-=====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some vectorizer utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
+#define LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
+
+namespace llvm {
+
+/// \brief Identify if the intrinsic is trivially vectorizable.
+///
+/// This method returns true if the intrinsic's argument types are all
+/// scalars for the scalar form of the intrinsic and all vectors for
+/// the vector form of the intrinsic.
+static inline bool isTriviallyVectorizable(Intrinsic::ID ID) {
+ switch (ID) {
+ case Intrinsic::sqrt:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::log:
+ case Intrinsic::log10:
+ case Intrinsic::log2:
+ case Intrinsic::fabs:
+ case Intrinsic::copysign:
+ case Intrinsic::floor:
+ case Intrinsic::ceil:
+ case Intrinsic::trunc:
+ case Intrinsic::rint:
+ case Intrinsic::nearbyint:
+ case Intrinsic::round:
+ case Intrinsic::ctpop:
+ case Intrinsic::pow:
+ case Intrinsic::fma:
+ case Intrinsic::fmuladd:
+ return true;
+ default:
+ return false;
+ }
+}
+
+} // llvm namespace
+
+#endif
Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=205855&r1=205854&r2=205855&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Wed Apr 9 09:20:47 2014
@@ -91,6 +91,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/VectorUtils.h"
#include <algorithm>
#include <map>
@@ -2266,32 +2267,12 @@ static Intrinsic::ID
getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
// If we have an intrinsic call, check if it is trivially vectorizable.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
- switch (II->getIntrinsicID()) {
- case Intrinsic::sqrt:
- case Intrinsic::sin:
- case Intrinsic::cos:
- case Intrinsic::exp:
- case Intrinsic::exp2:
- case Intrinsic::log:
- case Intrinsic::log10:
- case Intrinsic::log2:
- case Intrinsic::fabs:
- case Intrinsic::copysign:
- case Intrinsic::floor:
- case Intrinsic::ceil:
- case Intrinsic::trunc:
- case Intrinsic::rint:
- case Intrinsic::nearbyint:
- case Intrinsic::round:
- case Intrinsic::pow:
- case Intrinsic::fma:
- case Intrinsic::fmuladd:
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- return II->getIntrinsicID();
- default:
+ Intrinsic::ID ID = II->getIntrinsicID();
+ if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
+ ID == Intrinsic::lifetime_end)
+ return ID;
+ else
return Intrinsic::not_intrinsic;
- }
}
if (!TLI)
Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=205855&r1=205854&r2=205855&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Wed Apr 9 09:20:47 2014
@@ -41,6 +41,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/VectorUtils.h"
#include <algorithm>
#include <map>
@@ -949,7 +950,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
case Instruction::Call: {
// Check if the calls are all to the same vectorizable intrinsic.
IntrinsicInst *II = dyn_cast<IntrinsicInst>(VL[0]);
- if (II==NULL) {
+ Intrinsic::ID ID = II ? II->getIntrinsicID() : Intrinsic::not_intrinsic;
+
+ if (!isTriviallyVectorizable(ID)) {
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
return;
Added: llvm/trunk/test/Transforms/SLPVectorizer/X86/non-vectorizable-intrinsic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/non-vectorizable-intrinsic.ll?rev=205855&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/non-vectorizable-intrinsic.ll (added)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/non-vectorizable-intrinsic.ll Wed Apr 9 09:20:47 2014
@@ -0,0 +1,36 @@
+; RUN: opt < %s -slp-vectorizer -o - -S -slp-threshold=-1000
+
+target datalayout = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx--nvidiacl"
+
+; CTLZ cannot be vectorized currently because the second argument is a scalar
+; for both the scalar and vector forms of the intrinsic. In the future it
+; should be possible to vectorize such functions.
+; Test causes an assert if LLVM tries to vectorize CTLZ.
+
+define <2 x i8> @cltz_test(<2 x i8> %x) #0 {
+entry:
+ %0 = extractelement <2 x i8> %x, i32 0
+ %call.i = call i8 @llvm.ctlz.i8(i8 %0, i1 false)
+ %vecinit = insertelement <2 x i8> undef, i8 %call.i, i32 0
+ %1 = extractelement <2 x i8> %x, i32 1
+ %call.i4 = call i8 @llvm.ctlz.i8(i8 %1, i1 false)
+ %vecinit2 = insertelement <2 x i8> %vecinit, i8 %call.i4, i32 1
+ ret <2 x i8> %vecinit2
+}
+
+define <2 x i8> @cltz_test2(<2 x i8> %x) #1 {
+entry:
+ %0 = extractelement <2 x i8> %x, i32 0
+ %1 = extractelement <2 x i8> %x, i32 1
+ %call.i = call i8 @llvm.ctlz.i8(i8 %0, i1 false)
+ %call.i4 = call i8 @llvm.ctlz.i8(i8 %1, i1 false)
+ %vecinit = insertelement <2 x i8> undef, i8 %call.i, i32 0
+ %vecinit2 = insertelement <2 x i8> %vecinit, i8 %call.i4, i32 1
+ ret <2 x i8> %vecinit2
+}
+
+declare i8 @llvm.ctlz.i8(i8, i1) #3
+
+attributes #0 = { alwaysinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
More information about the llvm-commits
mailing list