[llvm] r232531 - TLI: Add addVectorizableFunctionsFromVecLib.
Michael Zolotukhin
mzolotukhin at apple.com
Tue Mar 17 12:50:56 PDT 2015
Author: mzolotukhin
Date: Tue Mar 17 14:50:55 2015
New Revision: 232531
URL: http://llvm.org/viewvc/llvm-project?rev=232531&view=rev
Log:
TLI: Add addVectorizableFunctionsFromVecLib.
Also, add several entries to vectorizable functions table, and
corresponding tests. The table isn't complete, it'll be populated later.
Review: http://reviews.llvm.org/D8131
Added:
llvm/trunk/test/Transforms/LoopVectorize/X86/veclib-calls.ll
Modified:
llvm/trunk/include/llvm/Analysis/TargetLibraryInfo.h
llvm/trunk/lib/Analysis/TargetLibraryInfo.cpp
Modified: llvm/trunk/include/llvm/Analysis/TargetLibraryInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetLibraryInfo.h?rev=232531&r1=232530&r2=232531&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetLibraryInfo.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetLibraryInfo.h Tue Mar 17 14:50:55 2015
@@ -71,6 +71,18 @@ class TargetLibraryInfoImpl {
std::vector<VecDesc> ScalarDescs;
public:
+ /// \brief List of known vector-functions libraries.
+ ///
+ /// The vector-functions library defines, which functions are vectorizable
+ /// and with which factor. The library can be specified by either frontend,
+ /// or a commandline option, and then used by
+ /// addVectorizableFunctionsFromVecLib for filling up the tables of
+ /// vectorizable functions.
+ enum VectorLibrary {
+ NoLibrary, // Don't use any vector library.
+ Accelerate // Use Accelerate framework.
+ };
+
TargetLibraryInfoImpl();
explicit TargetLibraryInfoImpl(const Triple &T);
@@ -117,6 +129,10 @@ public:
/// queryable via getVectorizedFunction and getScalarizedFunction.
void addVectorizableFunctions(ArrayRef<VecDesc> Fns);
+ /// Calls addVectorizableFunctions with a known preset of functions for the
+ /// given vector library.
+ void addVectorizableFunctionsFromVecLib(enum VectorLibrary VecLib);
+
/// isFunctionVectorizable - Return true if the function F has a
/// vector equivalent with vectorization factor VF.
bool isFunctionVectorizable(StringRef F, unsigned VF) const {
Modified: llvm/trunk/lib/Analysis/TargetLibraryInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/TargetLibraryInfo.cpp?rev=232531&r1=232530&r2=232531&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/TargetLibraryInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/TargetLibraryInfo.cpp Tue Mar 17 14:50:55 2015
@@ -13,8 +13,18 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
+ "vector-library", cl::Hidden, cl::desc("Vector functions library"),
+ cl::init(TargetLibraryInfoImpl::NoLibrary),
+ cl::values(clEnumValN(TargetLibraryInfoImpl::NoLibrary, "none",
+ "No vector functions library"),
+ clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate",
+ "Accelerate framework"),
+ clEnumValEnd));
+
const char *const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = {
#define TLI_DEFINE_STRING
#include "llvm/Analysis/TargetLibraryInfo.def"
@@ -345,6 +355,8 @@ static void initialize(TargetLibraryInfo
TLI.setUnavailable(LibFunc::statvfs64);
TLI.setUnavailable(LibFunc::tmpfile64);
}
+
+ TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary);
}
TargetLibraryInfoImpl::TargetLibraryInfoImpl() {
@@ -452,6 +464,28 @@ void TargetLibraryInfoImpl::addVectoriza
std::sort(ScalarDescs.begin(), ScalarDescs.end(), compareByVectorFnName);
}
+void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
+ enum VectorLibrary VecLib) {
+ switch (VecLib) {
+ case Accelerate: {
+ const VecDesc VecFuncs[] = {
+ {"expf", "vexpf", 4},
+ {"llvm.exp.f32", "vexpf", 4},
+ {"logf", "vlogf", 4},
+ {"llvm.log.f32", "vlogf", 4},
+ {"sqrtf", "vsqrtf", 4},
+ {"llvm.sqrt.f32", "vsqrtf", 4},
+ {"fabsf", "vfabsf", 4},
+ {"llvm.fabs.f32", "vfabsf", 4},
+ };
+ addVectorizableFunctions(VecFuncs);
+ break;
+ }
+ case NoLibrary:
+ break;
+ }
+}
+
bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const {
funcName = sanitizeFunctionName(funcName);
if (funcName.empty())
Added: llvm/trunk/test/Transforms/LoopVectorize/X86/veclib-calls.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/veclib-calls.ll?rev=232531&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/X86/veclib-calls.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/X86/veclib-calls.ll Tue Mar 17 14:50:55 2015
@@ -0,0 +1,182 @@
+; RUN: opt < %s -vector-library=Accelerate -loop-vectorize -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK-LABEL: @sqrt_f32(
+;CHECK: vsqrtf{{.*}}<4 x float>
+;CHECK: ret void
+declare float @sqrtf(float) nounwind readnone
+define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4
+ %call = tail call float @sqrtf(float %0) nounwind readnone
+ %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
+ store float %call, float* %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+;CHECK-LABEL: @exp_f32(
+;CHECK: vexpf{{.*}}<4 x float>
+;CHECK: ret void
+declare float @expf(float) nounwind readnone
+define void @exp_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4
+ %call = tail call float @expf(float %0) nounwind readnone
+ %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
+ store float %call, float* %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+;CHECK-LABEL: @log_f32(
+;CHECK: vlogf{{.*}}<4 x float>
+;CHECK: ret void
+declare float @logf(float) nounwind readnone
+define void @log_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4
+ %call = tail call float @logf(float %0) nounwind readnone
+ %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
+ store float %call, float* %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+; For abs instruction we'll generate vector intrinsic, as it's cheaper than a lib call.
+;CHECK-LABEL: @fabs_f32(
+;CHECK: fabs{{.*}}<4 x float>
+;CHECK: ret void
+declare float @fabsf(float) nounwind readnone
+define void @fabs_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4
+ %call = tail call float @fabsf(float %0) nounwind readnone
+ %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
+ store float %call, float* %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+; Test that we can vectorize an intrinsic into a vector call.
+;CHECK-LABEL: @exp_f32_intrin(
+;CHECK: vexpf{{.*}}<4 x float>
+;CHECK: ret void
+declare float @llvm.exp.f32(float) nounwind readnone
+define void @exp_f32_intrin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4
+ %call = tail call float @llvm.exp.f32(float %0) nounwind readnone
+ %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
+ store float %call, float* %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+; Test that we don't vectorize arbitrary functions.
+;CHECK-LABEL: @foo_f32(
+;CHECK-NOT: foo{{.*}}<4 x float>
+;CHECK: ret void
+declare float @foo(float) nounwind readnone
+define void @foo_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4
+ %call = tail call float @foo(float %0) nounwind readnone
+ %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
+ store float %call, float* %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+; Test that we don't vectorize calls with nobuiltin attribute.
+;CHECK-LABEL: @sqrt_f32_nobuiltin(
+;CHECK-NOT: vsqrtf{{.*}}<4 x float>
+;CHECK: ret void
+define void @sqrt_f32_nobuiltin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4
+ %call = tail call float @sqrtf(float %0) nounwind readnone nobuiltin
+ %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
+ store float %call, float* %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
More information about the llvm-commits
mailing list