[llvm] r259357 - Reapply commit r258404 with fix.
Matthew Simpson via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 1 11:15:49 PST 2016
Test case added in r259385. Thanks again for reducing the test case!
-- Matt
From: Matthew Simpson [mailto:mssimpso at codeaurora.org]
Sent: Monday, February 01, 2016 1:42 PM
To: 'David Majnemer'
Cc: 'llvm-commits'
Subject: RE: [llvm] r259357 - Reapply commit r258404 with fix.
Will do. Thanks, David.
-- Matt
From: David Majnemer [mailto:david.majnemer at gmail.com]
Sent: Monday, February 01, 2016 1:17 PM
To: Matthew Simpson
Cc: llvm-commits
Subject: Re: [llvm] r259357 - Reapply commit r258404 with fix.
I don't believe you have added a test case to make sure we do not regress and re-trigger PR26364. Please add one.
On Mon, Feb 1, 2016 at 5:38 AM, Matthew Simpson via llvm-commits <llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org> > wrote:
Author: mssimpso
Date: Mon Feb 1 07:38:29 2016
New Revision: 259357
URL: http://llvm.org/viewvc/llvm-project?rev=259357 <http://llvm.org/viewvc/llvm-project?rev=259357&view=rev> &view=rev
Log:
Reapply commit r258404 with fix.
The previous patch caused PR26364. The fix is to ensure that we don't enter a
cycle when iterating over use-def chains.
Modified:
llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll
Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=259357 <http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=259357&r1=259356&r2=259357&view=diff> &r1=259356&r2=259357&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Mon Feb 1 07:38:29 2016
@@ -15,22 +15,24 @@
// "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Vectorize.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
@@ -45,7 +47,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/Transforms/Vectorize.h"
#include <algorithm>
#include <map>
#include <memory>
@@ -364,9 +366,9 @@ public:
BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti,
TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li,
- DominatorTree *Dt, AssumptionCache *AC)
+ DominatorTree *Dt, AssumptionCache *AC, DemandedBits *DB)
: NumLoadsWantToKeepOrder(0), NumLoadsWantToChangeOrder(0), F(Func),
- SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt),
+ SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), AC(AC), DB(DB),
Builder(Se->getContext()) {
CodeMetrics::collectEphemeralValues(F, AC, EphValues);
}
@@ -400,6 +402,7 @@ public:
BlockScheduling *BS = Iter.second.get();
BS->clear();
}
+ MinBWs.clear();
}
/// \brief Perform LICM and CSE on the newly generated gather sequences.
@@ -417,6 +420,10 @@ public:
/// vectorization factors.
unsigned getVectorElementSize(Value *V);
+ /// Compute the minimum type sizes required to represent the entries in a
+ /// vectorizable tree.
+ void computeMinimumValueSizes();
+
private:
struct TreeEntry;
@@ -914,8 +921,14 @@ private:
AliasAnalysis *AA;
LoopInfo *LI;
DominatorTree *DT;
+ AssumptionCache *AC;
+ DemandedBits *DB;
/// Instruction builder to construct the vectorized tree.
IRBuilder<> Builder;
+
+ /// A map of scalar integer values to the smallest bit width with which they
+ /// can legally be represented.
+ MapVector<Value *, uint64_t> MinBWs;
};
#ifndef NDEBUG
@@ -1471,6 +1484,12 @@ int BoUpSLP::getEntryCost(TreeEntry *E)
ScalarTy = SI->getValueOperand()->getType();
VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
+ // If we have computed a smaller type for the expression, update VecTy so
+ // that the costs will be accurate.
+ if (MinBWs.count(VL[0]))
+ VecTy = VectorType::get(IntegerType::get(F->getContext(), MinBWs[VL[0]]),
+ VL.size());
+
if (E->NeedToGather) {
if (allConstant(VL))
return 0;
@@ -1799,9 +1818,19 @@ int BoUpSLP::getTreeCost() {
if (EphValues.count(EU.User))
continue;
- VectorType *VecTy = VectorType::get(EU.Scalar->getType(), BundleWidth);
- ExtractCost += TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy,
- EU.Lane);
+ // If we plan to rewrite the tree in a smaller type, we will need to sign
+ // extend the extracted value back to the original type. Here, we account
+ // for the extract and the added cost of the sign extend if needed.
+ auto *VecTy = VectorType::get(EU.Scalar->getType(), BundleWidth);
+ auto *ScalarRoot = VectorizableTree[0].Scalars[0];
+ if (MinBWs.count(ScalarRoot)) {
+ auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot]);
+ VecTy = VectorType::get(MinTy, BundleWidth);
+ ExtractCost +=
+ TTI->getCastInstrCost(Instruction::SExt, EU.Scalar->getType(), MinTy);
+ }
+ ExtractCost +=
+ TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, EU.Lane);
}
Cost += getSpillCost();
@@ -2499,7 +2528,21 @@ Value *BoUpSLP::vectorizeTree() {
}
Builder.SetInsertPoint(&F->getEntryBlock().front());
- vectorizeTree(&VectorizableTree[0]);
+ auto *VectorRoot = vectorizeTree(&VectorizableTree[0]);
+
+ // If the vectorized tree can be rewritten in a smaller type, we truncate the
+ // vectorized root. InstCombine will then rewrite the entire expression. We
+ // sign extend the extracted values below.
+ auto *ScalarRoot = VectorizableTree[0].Scalars[0];
+ if (MinBWs.count(ScalarRoot)) {
+ if (auto *I = dyn_cast<Instruction>(VectorRoot))
+ Builder.SetInsertPoint(&*++BasicBlock::iterator(I));
+ auto BundleWidth = VectorizableTree[0].Scalars.size();
+ auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot]);
+ auto *VecTy = VectorType::get(MinTy, BundleWidth);
+ auto *Trunc = Builder.CreateTrunc(VectorRoot, VecTy);
+ VectorizableTree[0].VectorizedValue = Trunc;
+ }
DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size() << " values .\n");
@@ -2532,6 +2575,8 @@ Value *BoUpSLP::vectorizeTree() {
if (PH->getIncomingValue(i) == Scalar) {
Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator());
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+ if (MinBWs.count(ScalarRoot))
+ Ex = Builder.CreateSExt(Ex, Scalar->getType());
CSEBlocks.insert(PH->getIncomingBlock(i));
PH->setOperand(i, Ex);
}
@@ -2539,12 +2584,16 @@ Value *BoUpSLP::vectorizeTree() {
} else {
Builder.SetInsertPoint(cast<Instruction>(User));
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+ if (MinBWs.count(ScalarRoot))
+ Ex = Builder.CreateSExt(Ex, Scalar->getType());
CSEBlocks.insert(cast<Instruction>(User)->getParent());
User->replaceUsesOfWith(Scalar, Ex);
}
} else {
Builder.SetInsertPoint(&F->getEntryBlock().front());
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+ if (MinBWs.count(ScalarRoot))
+ Ex = Builder.CreateSExt(Ex, Scalar->getType());
CSEBlocks.insert(&F->getEntryBlock());
User->replaceUsesOfWith(Scalar, Ex);
}
@@ -3113,7 +3162,7 @@ unsigned BoUpSLP::getVectorElementSize(V
// If the current instruction is a load, update MaxWidth to reflect the
// width of the loaded value.
else if (isa<LoadInst>(I))
- MaxWidth = std::max(MaxWidth, (unsigned)DL.getTypeSizeInBits(Ty));
+ MaxWidth = std::max<unsigned>(MaxWidth, DL.getTypeSizeInBits(Ty));
// Otherwise, we need to visit the operands of the instruction. We only
// handle the interesting cases from buildTree here. If an operand is an
@@ -3140,6 +3189,171 @@ unsigned BoUpSLP::getVectorElementSize(V
return MaxWidth;
}
+// Determine if a value V in a vectorizable expression Expr can be demoted to a
+// smaller type with a truncation. We collect the values that will be demoted
+// in ToDemote and additional roots that require investigating in Roots.
+static bool collectValuesToDemote(Value *V, SmallPtrSetImpl<Value *> &Expr,
+ SmallVectorImpl<Value *> &ToDemote,
+ SmallVectorImpl<Value *> &Roots) {
+
+ // We can always demote constants.
+ if (isa<Constant>(V)) {
+ ToDemote.push_back(V);
+ return true;
+ }
+
+ // If the value is not an instruction in the expression with only one use, it
+ // cannot be demoted.
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I || !I->hasOneUse() || !Expr.count(I))
+ return false;
+
+ switch (I->getOpcode()) {
+
+ // We can always demote truncations and extensions. Since truncations can
+ // seed additional demotion, we save the truncated value.
+ case Instruction::Trunc:
+ Roots.push_back(I->getOperand(0));
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ break;
+
+ // We can demote certain binary operations if we can demote both of their
+ // operands.
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ if (!collectValuesToDemote(I->getOperand(0), Expr, ToDemote, Roots) ||
+ !collectValuesToDemote(I->getOperand(1), Expr, ToDemote, Roots))
+ return false;
+ break;
+
+ // We can demote selects if we can demote their true and false values.
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(I);
+ if (!collectValuesToDemote(SI->getTrueValue(), Expr, ToDemote, Roots) ||
+ !collectValuesToDemote(SI->getFalseValue(), Expr, ToDemote, Roots))
+ return false;
+ break;
+ }
+
+ // We can demote phis if we can demote all their incoming operands. Note that
+ // we don't need to worry about cycles since we ensure single use above.
+ case Instruction::PHI: {
+ PHINode *PN = cast<PHINode>(I);
+ for (Value *IncValue : PN->incoming_values())
+ if (!collectValuesToDemote(IncValue, Expr, ToDemote, Roots))
+ return false;
+ break;
+ }
+
+ // Otherwise, conservatively give up.
+ default:
+ return false;
+ }
+
+ // Record the value that we can demote.
+ ToDemote.push_back(V);
+ return true;
+}
+
+void BoUpSLP::computeMinimumValueSizes() {
+ auto &DL = F->getParent()->getDataLayout();
+
+ // If there are no external uses, the expression tree must be rooted by a
+ // store. We can't demote in-memory values, so there is nothing to do here.
+ if (ExternalUses.empty())
+ return;
+
+ // We only attempt to truncate integer expressions.
+ auto &TreeRoot = VectorizableTree[0].Scalars;
+ auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType());
+ if (!TreeRootIT)
+ return;
+
+ // If the expression is not rooted by a store, these roots should have
+ // external uses. We will rely on InstCombine to rewrite the expression in
+ // the narrower type. However, InstCombine only rewrites single-use values.
+ // This means that if a tree entry other than a root is used externally, it
+ // must have multiple uses and InstCombine will not rewrite it. The code
+ // below ensures that only the roots are used externally.
+ SmallPtrSet<Value *, 16> Expr(TreeRoot.begin(), TreeRoot.end());
+ for (auto &EU : ExternalUses)
+ if (!Expr.erase(EU.Scalar))
+ return;
+ if (!Expr.empty())
+ return;
+
+ // Collect the scalar values in one lane of the vectorizable expression. We
+ // will use this context to determine which values can be demoted. If we see
+ // a truncation, we mark it as seeding another demotion.
+ for (auto &Entry : VectorizableTree)
+ Expr.insert(Entry.Scalars[0]);
+
+ // Ensure the root of the vectorizable tree doesn't form a cycle. It must
+ // have a single external user that is not in the vectorizable tree.
+ if (!TreeRoot[0]->hasOneUse() || Expr.count(*TreeRoot[0]->user_begin()))
+ return;
+
+ // Conservatively determine if we can actually truncate the root of the
+ // expression. Collect the values that can be demoted in ToDemote and
+ // additional roots that require investigating in Roots.
+ SmallVector<Value *, 32> ToDemote;
+ SmallVector<Value *, 2> Roots;
+ if (!collectValuesToDemote(TreeRoot[0], Expr, ToDemote, Roots))
+ return;
+
+ // The maximum bit width required to represent all the values that can be
+ // demoted without loss of precision. It would be safe to truncate the root
+ // of the expression to this width.
+ auto MaxBitWidth = 8u;
+
+ // We first check if all the bits of the root are demanded. If they're not,
+ // we can truncate the root to this narrower type.
+ auto Mask = DB->getDemandedBits(cast<Instruction>(TreeRoot[0]));
+ if (Mask.countLeadingZeros() > 0)
+ MaxBitWidth = std::max<unsigned>(
+ Mask.getBitWidth() - Mask.countLeadingZeros(), MaxBitWidth);
+
+ // If all the bits of the root are demanded, we can try a little harder to
+ // compute a narrower type. This can happen, for example, if the roots are
+ // getelementptr indices. InstCombine promotes these indices to the pointer
+ // width. Thus, all their bits are technically demanded even though the
+ // address computation might be vectorized in a smaller type.
+ //
+ // We start by looking at each entry that can be demoted. We compute the
+ // maximum bit width required to store the scalar by using ValueTracking to
+ // compute the number of high-order bits we can truncate.
+ else
+ for (auto *Scalar : ToDemote) {
+ auto NumSignBits = ComputeNumSignBits(Scalar, DL, 0, AC, 0, DT);
+ auto NumTypeBits = DL.getTypeSizeInBits(Scalar->getType());
+ MaxBitWidth = std::max<unsigned>(NumTypeBits - NumSignBits, MaxBitWidth);
+ }
+
+ // Round MaxBitWidth up to the next power-of-two.
+ if (!isPowerOf2_64(MaxBitWidth))
+ MaxBitWidth = NextPowerOf2(MaxBitWidth);
+
+ // If the maximum bit width we compute is less than the with of the roots'
+ // type, we can proceed with the narrowing. Otherwise, do nothing.
+ if (MaxBitWidth >= TreeRootIT->getBitWidth())
+ return;
+
+ // If we can truncate the root, we must collect additional values that might
+ // be demoted as a result. That is, those seeded by truncations we will
+ // modify.
+ while (!Roots.empty())
+ collectValuesToDemote(Roots.pop_back_val(), Expr, ToDemote, Roots);
+
+ // Finally, map the values we can demote to the maximum bit with we computed.
+ for (auto *Scalar : ToDemote)
+ MinBWs[Scalar] = MaxBitWidth;
+}
+
/// The SLPVectorizer Pass.
struct SLPVectorizer : public FunctionPass {
typedef SmallVector<StoreInst *, 8> StoreList;
@@ -3161,6 +3375,7 @@ struct SLPVectorizer : public FunctionPa
LoopInfo *LI;
DominatorTree *DT;
AssumptionCache *AC;
+ DemandedBits *DB;
bool runOnFunction(Function &F) override {
if (skipOptnoneFunction(F))
@@ -3174,6 +3389,7 @@ struct SLPVectorizer : public FunctionPa
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ DB = &getAnalysis<DemandedBits>();
Stores.clear();
GEPs.clear();
@@ -3203,7 +3419,7 @@ struct SLPVectorizer : public FunctionPa
// Use the bottom up slp vectorizer to construct chains that start with
// store instructions.
- BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC);
+ BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB);
// A general note: the vectorizer must use BoUpSLP::eraseInstruction() to
// delete instructions.
@@ -3246,6 +3462,7 @@ struct SLPVectorizer : public FunctionPa
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<DemandedBits>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<AAResultsWrapperPass>();
@@ -3350,6 +3567,7 @@ bool SLPVectorizer::vectorizeStoreChain(
ArrayRef<Value *> Operands = Chain.slice(i, VF);
R.buildTree(Operands);
+ R.computeMinimumValueSizes();
int Cost = R.getTreeCost();
@@ -3549,6 +3767,7 @@ bool SLPVectorizer::tryToVectorizeList(A
Value *ReorderedOps[] = { Ops[1], Ops[0] };
R.buildTree(ReorderedOps, None);
}
+ R.computeMinimumValueSizes();
int Cost = R.getTreeCost();
if (Cost < -SLPCostThreshold) {
@@ -3815,6 +4034,7 @@ public:
for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
V.buildTree(makeArrayRef(&ReducedVals[i], ReduxWidth), ReductionOps);
+ V.computeMinimumValueSizes();
// Estimate cost.
int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]);
Modified: llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll?rev=259357 <http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll?rev=259357&r1=259356&r2=259357&view=diff> &r1=259356&r2=259357&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll Mon Feb 1 07:38:29 2016
@@ -1,4 +1,5 @@
-; RUN: opt -S -slp-vectorizer -dce -instcombine < %s | FileCheck %s
+; RUN: opt -S -slp-vectorizer -dce -instcombine < %s | FileCheck %s --check-prefix=PROFITABLE
+; RUN: opt -S -slp-vectorizer -slp-threshold=-12 -dce -instcombine < %s | FileCheck %s --check-prefix=UNPROFITABLE
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"
@@ -18,13 +19,13 @@ target triple = "aarch64--linux-gnu"
; return sum;
; }
-; CHECK-LABEL: @gather_reduce_8x16_i32
+; PROFITABLE-LABEL: @gather_reduce_8x16_i32
;
-; CHECK: [[L:%[a-zA-Z0-9.]+]] = load <8 x i16>
-; CHECK: zext <8 x i16> [[L]] to <8 x i32>
-; CHECK: [[S:%[a-zA-Z0-9.]+]] = sub nsw <8 x i32>
-; CHECK: [[X:%[a-zA-Z0-9.]+]] = extractelement <8 x i32> [[S]]
-; CHECK: sext i32 [[X]] to i64
+; PROFITABLE: [[L:%[a-zA-Z0-9.]+]] = load <8 x i16>
+; PROFITABLE: zext <8 x i16> [[L]] to <8 x i32>
+; PROFITABLE: [[S:%[a-zA-Z0-9.]+]] = sub nsw <8 x i32>
+; PROFITABLE: [[X:%[a-zA-Z0-9.]+]] = extractelement <8 x i32> [[S]]
+; PROFITABLE: sext i32 [[X]] to i64
;
define i32 @gather_reduce_8x16_i32(i16* nocapture readonly %a, i16* nocapture readonly %b, i16* nocapture readonly %g, i32 %n) {
entry:
@@ -137,14 +138,18 @@ for.body:
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
}
-; CHECK-LABEL: @gather_reduce_8x16_i64
+; UNPROFITABLE-LABEL: @gather_reduce_8x16_i64
;
-; CHECK-NOT: load <8 x i16>
-;
-; FIXME: We are currently unable to vectorize the case with i64 subtraction
-; because the zero extensions are too expensive. The solution here is to
-; convert the i64 subtractions to i32 subtractions during vectorization.
-; This would then match the case above.
+; UNPROFITABLE: [[L:%[a-zA-Z0-9.]+]] = load <8 x i16>
+; UNPROFITABLE: zext <8 x i16> [[L]] to <8 x i32>
+; UNPROFITABLE: [[S:%[a-zA-Z0-9.]+]] = sub nsw <8 x i32>
+; UNPROFITABLE: [[X:%[a-zA-Z0-9.]+]] = extractelement <8 x i32> [[S]]
+; UNPROFITABLE: sext i32 [[X]] to i64
+;
+; TODO: Although we can now vectorize this case while converting the i64
+; subtractions to i32, the cost model currently finds vectorization to be
+; unprofitable. The cost model is penalizing the sign and zero
+; extensions in the vectorized version, but they are actually free.
;
define i32 @gather_reduce_8x16_i64(i16* nocapture readonly %a, i16* nocapture readonly %b, i16* nocapture readonly %g, i32 %n) {
entry:
_______________________________________________
llvm-commits mailing list
llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>
http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160201/28b0e3bb/attachment-0001.html>
More information about the llvm-commits
mailing list