[llvm-branch-commits] [llvm-branch] r322675 - Merging r321870, r321872, and r321994:
Hans Wennborg via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Jan 17 08:04:05 PST 2018
Author: hans
Date: Wed Jan 17 08:04:05 2018
New Revision: 322675
URL: http://llvm.org/viewvc/llvm-project?rev=322675&view=rev
Log:
Merging r321870, r321872, and r321994:
------------------------------------------------------------------------
r321870 | abataev | 2018-01-05 07:20:40 -0800 (Fri, 05 Jan 2018) | 1 line
[SLP] Update test checks, NFC.
------------------------------------------------------------------------
------------------------------------------------------------------------
r321872 | abataev | 2018-01-05 08:15:17 -0800 (Fri, 05 Jan 2018) | 1 line
[SLP] Update more test checks, NFC.
------------------------------------------------------------------------
------------------------------------------------------------------------
r321994 | abataev | 2018-01-08 06:43:06 -0800 (Mon, 08 Jan 2018) | 13 lines
[SLP] Fix PR35777: Incorrect handling of aggregate values.
Summary:
Fixes the bug with incorrect handling of InsertValue|InsertElement
instrucions in SLP vectorizer. Currently, we may use incorrect
ExtractElement instructions as the operands of the original
InsertValue|InsertElement instructions.
Reviewers: mkuper, hfinkel, RKSimon, spatel
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D41767
------------------------------------------------------------------------
Added:
llvm/branches/release_60/test/Transforms/SLPVectorizer/X86/PR35777.ll
- copied unchanged from r321994, llvm/trunk/test/Transforms/SLPVectorizer/X86/PR35777.ll
Modified:
llvm/branches/release_60/ (props changed)
llvm/branches/release_60/include/llvm/Transforms/Vectorize/SLPVectorizer.h
llvm/branches/release_60/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/branches/release_60/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
llvm/branches/release_60/test/Transforms/SLPVectorizer/X86/insertvalue.ll
llvm/branches/release_60/test/Transforms/SLPVectorizer/X86/value-bug.ll
Propchange: llvm/branches/release_60/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Wed Jan 17 08:04:05 2018
@@ -1,3 +1,3 @@
/llvm/branches/Apple/Pertwee:110850,110961
/llvm/branches/type-system-rewrite:133420-134817
-/llvm/trunk:155241,321789,321791,321862,321980,321991,321993,322056,322103,322473,322623
+/llvm/trunk:155241,321789,321791,321862,321870,321872,321980,321991,321993-321994,322056,322103,322473,322623
Modified: llvm/branches/release_60/include/llvm/Transforms/Vectorize/SLPVectorizer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/include/llvm/Transforms/Vectorize/SLPVectorizer.h?rev=322675&r1=322674&r2=322675&view=diff
==============================================================================
--- llvm/branches/release_60/include/llvm/Transforms/Vectorize/SLPVectorizer.h (original)
+++ llvm/branches/release_60/include/llvm/Transforms/Vectorize/SLPVectorizer.h Wed Jan 17 08:04:05 2018
@@ -95,14 +95,9 @@ private:
bool tryToVectorizePair(Value *A, Value *B, slpvectorizer::BoUpSLP &R);
/// \brief Try to vectorize a list of operands.
- /// \@param BuildVector A list of users to ignore for the purpose of
- /// scheduling and cost estimation when NeedExtraction
- /// is false.
/// \returns true if a value was vectorized.
bool tryToVectorizeList(ArrayRef<Value *> VL, slpvectorizer::BoUpSLP &R,
- ArrayRef<Value *> BuildVector = None,
- bool AllowReorder = false,
- bool NeedExtraction = false);
+ bool AllowReorder = false);
/// \brief Try to vectorize a chain that may start at the operands of \p I.
bool tryToVectorize(Instruction *I, slpvectorizer::BoUpSLP &R);
Modified: llvm/branches/release_60/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=322675&r1=322674&r2=322675&view=diff
==============================================================================
--- llvm/branches/release_60/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/branches/release_60/lib/Transforms/Vectorize/SLPVectorizer.cpp Wed Jan 17 08:04:05 2018
@@ -4416,13 +4416,11 @@ bool SLPVectorizerPass::tryToVectorizePa
if (!A || !B)
return false;
Value *VL[] = { A, B };
- return tryToVectorizeList(VL, R, None, true);
+ return tryToVectorizeList(VL, R, true);
}
bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
- ArrayRef<Value *> BuildVector,
- bool AllowReorder,
- bool NeedExtraction) {
+ bool AllowReorder) {
if (VL.size() < 2)
return false;
@@ -4516,12 +4514,7 @@ bool SLPVectorizerPass::tryToVectorizeLi
<< "\n");
ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
- ArrayRef<Value *> EmptyArray;
- ArrayRef<Value *> BuildVectorSlice;
- if (!BuildVector.empty())
- BuildVectorSlice = BuildVector.slice(I, OpsWidth);
-
- R.buildTree(Ops, NeedExtraction ? EmptyArray : BuildVectorSlice);
+ R.buildTree(Ops);
// TODO: check if we can allow reordering for more cases.
if (AllowReorder && R.shouldReorder()) {
// Conceptually, there is nothing actually preventing us from trying to
@@ -4529,7 +4522,6 @@ bool SLPVectorizerPass::tryToVectorizeLi
// reductions. However, at this point, we only expect to get here when
// there are exactly two operations.
assert(Ops.size() == 2);
- assert(BuildVectorSlice.empty());
Value *ReorderedOps[] = {Ops[1], Ops[0]};
R.buildTree(ReorderedOps, None);
}
@@ -4549,31 +4541,7 @@ bool SLPVectorizerPass::tryToVectorizeLi
<< " and with tree size "
<< ore::NV("TreeSize", R.getTreeSize()));
- Value *VectorizedRoot = R.vectorizeTree();
-
- // Reconstruct the build vector by extracting the vectorized root. This
- // way we handle the case where some elements of the vector are
- // undefined.
- // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))
- if (!BuildVectorSlice.empty()) {
- // The insert point is the last build vector instruction. The
- // vectorized root will precede it. This guarantees that we get an
- // instruction. The vectorized tree could have been constant folded.
- Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back());
- unsigned VecIdx = 0;
- for (auto &V : BuildVectorSlice) {
- IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
- ++BasicBlock::iterator(InsertAfter));
- Instruction *I = cast<Instruction>(V);
- assert(isa<InsertElementInst>(I) || isa<InsertValueInst>(I));
- Instruction *Extract =
- cast<Instruction>(Builder.CreateExtractElement(
- VectorizedRoot, Builder.getInt32(VecIdx++)));
- I->setOperand(1, Extract);
- I->moveAfter(Extract);
- InsertAfter = I;
- }
- }
+ R.vectorizeTree();
// Move to the next bundle.
I += VF - 1;
NextInst = I + 1;
@@ -5494,11 +5462,9 @@ private:
///
/// Returns true if it matches
static bool findBuildVector(InsertElementInst *LastInsertElem,
- SmallVectorImpl<Value *> &BuildVector,
SmallVectorImpl<Value *> &BuildVectorOpds) {
Value *V = nullptr;
do {
- BuildVector.push_back(LastInsertElem);
BuildVectorOpds.push_back(LastInsertElem->getOperand(1));
V = LastInsertElem->getOperand(0);
if (isa<UndefValue>(V))
@@ -5507,7 +5473,6 @@ static bool findBuildVector(InsertElemen
if (!LastInsertElem || !LastInsertElem->hasOneUse())
return false;
} while (true);
- std::reverse(BuildVector.begin(), BuildVector.end());
std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());
return true;
}
@@ -5516,11 +5481,9 @@ static bool findBuildVector(InsertElemen
///
/// \return true if it matches.
static bool findBuildAggregate(InsertValueInst *IV,
- SmallVectorImpl<Value *> &BuildVector,
SmallVectorImpl<Value *> &BuildVectorOpds) {
Value *V;
do {
- BuildVector.push_back(IV);
BuildVectorOpds.push_back(IV->getInsertedValueOperand());
V = IV->getAggregateOperand();
if (isa<UndefValue>(V))
@@ -5529,7 +5492,6 @@ static bool findBuildAggregate(InsertVal
if (!IV || !IV->hasOneUse())
return false;
} while (true);
- std::reverse(BuildVector.begin(), BuildVector.end());
std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());
return true;
}
@@ -5705,27 +5667,25 @@ bool SLPVectorizerPass::vectorizeInsertV
if (!R.canMapToVector(IVI->getType(), DL))
return false;
- SmallVector<Value *, 16> BuildVector;
SmallVector<Value *, 16> BuildVectorOpds;
- if (!findBuildAggregate(IVI, BuildVector, BuildVectorOpds))
+ if (!findBuildAggregate(IVI, BuildVectorOpds))
return false;
DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n");
// Aggregate value is unlikely to be processed in vector register, we need to
// extract scalars into scalar registers, so NeedExtraction is set true.
- return tryToVectorizeList(BuildVectorOpds, R, BuildVector, false, true);
+ return tryToVectorizeList(BuildVectorOpds, R);
}
bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
BasicBlock *BB, BoUpSLP &R) {
- SmallVector<Value *, 16> BuildVector;
SmallVector<Value *, 16> BuildVectorOpds;
- if (!findBuildVector(IEI, BuildVector, BuildVectorOpds))
+ if (!findBuildVector(IEI, BuildVectorOpds))
return false;
// Vectorize starting with the build vector operands ignoring the BuildVector
// instructions for the purpose of scheduling and user extraction.
- return tryToVectorizeList(BuildVectorOpds, R, BuildVector);
+ return tryToVectorizeList(BuildVectorOpds, R);
}
bool SLPVectorizerPass::vectorizeCmpInst(CmpInst *CI, BasicBlock *BB,
@@ -5803,8 +5763,8 @@ bool SLPVectorizerPass::vectorizeChainsI
// is done when there are exactly two elements since tryToVectorizeList
// asserts that there are only two values when AllowReorder is true.
bool AllowReorder = NumElts == 2;
- if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R,
- None, AllowReorder)) {
+ if (NumElts > 1 &&
+ tryToVectorizeList(makeArrayRef(IncIt, NumElts), R, AllowReorder)) {
// Success start over because instructions might have been changed.
HaveVectorizedPhiNodes = true;
Changed = true;
Modified: llvm/branches/release_60/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll?rev=322675&r1=322674&r2=322675&view=diff
==============================================================================
--- llvm/branches/release_60/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll (original)
+++ llvm/branches/release_60/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll Wed Jan 17 08:04:05 2018
@@ -7,8 +7,8 @@ target triple = "x86_64-apple-macosx10.8
define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @simple_select(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
@@ -20,8 +20,8 @@ define <4 x float> @simple_select(<4 x f
; CHECK-NEXT: ret <4 x float> [[RD]]
;
; ZEROTHRESH-LABEL: @simple_select(
-; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
-; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
+; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
+; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
@@ -64,18 +64,18 @@ declare void @llvm.assume(i1) nounwind
; This entire tree is ephemeral, don't vectorize any of it.
define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @simple_select_eph(
-; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
-; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
-; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
-; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
-; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
-; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
-; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2
-; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3
-; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
-; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
-; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2
-; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3
+; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
+; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
+; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
+; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
+; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
+; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
+; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
+; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
+; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
+; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[C2]], 0
@@ -100,18 +100,18 @@ define <4 x float> @simple_select_eph(<4
; CHECK-NEXT: ret <4 x float> undef
;
; ZEROTHRESH-LABEL: @simple_select_eph(
-; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
-; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
-; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
-; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
-; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
-; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
-; ZEROTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2
-; ZEROTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3
-; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
-; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
-; ZEROTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2
-; ZEROTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3
+; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
+; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
+; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
+; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
+; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
+; ZEROTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
+; ZEROTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
+; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
+; ZEROTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
+; ZEROTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
; ZEROTHRESH-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
; ZEROTHRESH-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
; ZEROTHRESH-NEXT: [[CMP2:%.*]] = icmp ne i32 [[C2]], 0
@@ -175,8 +175,8 @@ define <4 x float> @simple_select_eph(<4
; doesn't matter
define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @simple_select_insert_out_of_order(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
@@ -188,8 +188,8 @@ define <4 x float> @simple_select_insert
; CHECK-NEXT: ret <4 x float> [[RD]]
;
; ZEROTHRESH-LABEL: @simple_select_insert_out_of_order(
-; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
-; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
+; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
+; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 2
; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
@@ -233,8 +233,8 @@ declare void @f32_user(float) #0
; Multiple users of the final constructed vector
define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @simple_select_users(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
@@ -247,8 +247,8 @@ define <4 x float> @simple_select_users(
; CHECK-NEXT: ret <4 x float> [[RD]]
;
; ZEROTHRESH-LABEL: @simple_select_users(
-; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
-; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
+; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
+; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
@@ -291,18 +291,18 @@ define <4 x float> @simple_select_users(
; Unused insertelement
define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @simple_select_no_users(
-; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
-; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
-; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
-; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
-; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
-; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
-; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2
-; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3
-; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
-; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
-; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2
-; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3
+; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
+; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
+; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
+; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
+; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
+; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
+; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
+; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
+; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
+; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
@@ -330,18 +330,18 @@ define <4 x float> @simple_select_no_use
; CHECK-NEXT: ret <4 x float> [[RD]]
;
; ZEROTHRESH-LABEL: @simple_select_no_users(
-; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
-; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
-; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
-; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
-; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
-; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
-; ZEROTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2
-; ZEROTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3
-; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
-; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
-; ZEROTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2
-; ZEROTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3
+; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
+; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
+; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
+; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
+; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
+; ZEROTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
+; ZEROTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
+; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
+; ZEROTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
+; ZEROTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
; ZEROTHRESH-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
; ZEROTHRESH-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
; ZEROTHRESH-NEXT: [[CMP2:%.*]] = icmp ne i32 [[C2]], 0
@@ -387,25 +387,25 @@ define <4 x float> @simple_select_no_use
; to do this backwards this backwards
define <4 x i32> @reconstruct(<4 x i32> %c) #0 {
; CHECK-LABEL: @reconstruct(
-; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> %c, i32 0
-; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> %c, i32 1
-; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP2]], i32 1
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> %c, i32 2
-; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP3]], i32 2
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> %c, i32 3
-; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP4]], i32 3
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 3
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[C]], i32 2
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[C]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[C]], i32 0
+; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0
+; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP3]], i32 1
+; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP2]], i32 2
+; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP1]], i32 3
; CHECK-NEXT: ret <4 x i32> [[RD]]
;
; ZEROTHRESH-LABEL: @reconstruct(
-; ZEROTHRESH-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> %c, i32 0
-; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
-; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> %c, i32 1
-; ZEROTHRESH-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP2]], i32 1
-; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> %c, i32 2
-; ZEROTHRESH-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP3]], i32 2
-; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> %c, i32 3
-; ZEROTHRESH-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP4]], i32 3
+; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
+; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
+; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
+; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
+; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[C0]], i32 0
+; ZEROTHRESH-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[C1]], i32 1
+; ZEROTHRESH-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[C2]], i32 2
+; ZEROTHRESH-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[C3]], i32 3
; ZEROTHRESH-NEXT: ret <4 x i32> [[RD]]
;
%c0 = extractelement <4 x i32> %c, i32 0
@@ -421,8 +421,8 @@ define <4 x i32> @reconstruct(<4 x i32>
define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %c) #0 {
; CHECK-LABEL: @simple_select_v2(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> %c, zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x float> %a, <2 x float> %b
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[C:%.*]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[RA:%.*]] = insertelement <2 x float> undef, float [[TMP3]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
@@ -430,12 +430,12 @@ define <2 x float> @simple_select_v2(<2
; CHECK-NEXT: ret <2 x float> [[RB]]
;
; ZEROTHRESH-LABEL: @simple_select_v2(
-; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <2 x i32> %c, i32 0
-; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <2 x i32> %c, i32 1
-; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <2 x float> %a, i32 0
-; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <2 x float> %a, i32 1
-; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <2 x float> %b, i32 0
-; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <2 x float> %b, i32 1
+; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <2 x i32> [[C:%.*]], i32 0
+; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <2 x i32> [[C]], i32 1
+; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <2 x float> [[A:%.*]], i32 0
+; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <2 x float> [[A]], i32 1
+; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <2 x float> [[B:%.*]], i32 0
+; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <2 x float> [[B]], i32 1
; ZEROTHRESH-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
; ZEROTHRESH-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
; ZEROTHRESH-NEXT: [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]]
@@ -464,12 +464,12 @@ define <2 x float> @simple_select_v2(<2
; (low cost threshold needed to force this to happen)
define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @simple_select_partial_vector(
-; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
-; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
-; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
-; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
-; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
-; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
+; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
+; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
+; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
+; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
@@ -485,12 +485,12 @@ define <4 x float> @simple_select_partia
; CHECK-NEXT: ret <4 x float> [[RB]]
;
; ZEROTHRESH-LABEL: @simple_select_partial_vector(
-; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
-; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
-; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
-; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
-; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
-; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
+; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
+; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
+; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
+; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
@@ -530,7 +530,7 @@ define <4 x float> @simple_select_partia
; must be rescheduled. The case here is from compiling Julia.
define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @reschedule_extract(
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
@@ -542,7 +542,7 @@ define <4 x float> @reschedule_extract(<
; CHECK-NEXT: ret <4 x float> [[V3]]
;
; ZEROTHRESH-LABEL: @reschedule_extract(
-; ZEROTHRESH-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b
+; ZEROTHRESH-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; ZEROTHRESH-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
@@ -576,7 +576,7 @@ define <4 x float> @reschedule_extract(<
; instructions that are erased.
define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @take_credit(
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
@@ -588,7 +588,7 @@ define <4 x float> @take_credit(<4 x flo
; CHECK-NEXT: ret <4 x float> [[V3]]
;
; ZEROTHRESH-LABEL: @take_credit(
-; ZEROTHRESH-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b
+; ZEROTHRESH-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; ZEROTHRESH-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
@@ -622,10 +622,10 @@ define <4 x float> @take_credit(<4 x flo
define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) {
; CHECK-LABEL: @multi_tree(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double %w, i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double %x, i32 1
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double %y, i32 2
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double %z, i32 3
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double [[W:%.*]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[X:%.*]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 2
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[Z:%.*]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>
; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0
@@ -640,10 +640,10 @@ define <4 x double> @multi_tree(double %
;
; ZEROTHRESH-LABEL: @multi_tree(
; ZEROTHRESH-NEXT: entry:
-; ZEROTHRESH-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double %w, i32 0
-; ZEROTHRESH-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double %x, i32 1
-; ZEROTHRESH-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double %y, i32 2
-; ZEROTHRESH-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double %z, i32 3
+; ZEROTHRESH-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double [[W:%.*]], i32 0
+; ZEROTHRESH-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[X:%.*]], i32 1
+; ZEROTHRESH-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 2
+; ZEROTHRESH-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[Z:%.*]], i32 3
; ZEROTHRESH-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>
; ZEROTHRESH-NEXT: [[TMP5:%.*]] = fmul <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[TMP4]]
; ZEROTHRESH-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0
@@ -675,7 +675,7 @@ entry:
define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr #0 {
; CHECK-LABEL: @_vadd256(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = fadd <8 x float> %a, %b
+; CHECK-NEXT: [[TMP0:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP0]], i32 1
@@ -696,7 +696,7 @@ define <8 x float> @_vadd256(<8 x float>
;
; ZEROTHRESH-LABEL: @_vadd256(
; ZEROTHRESH-NEXT: entry:
-; ZEROTHRESH-NEXT: [[TMP0:%.*]] = fadd <8 x float> %a, %b
+; ZEROTHRESH-NEXT: [[TMP0:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[TMP0]], i32 0
; ZEROTHRESH-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[TMP1]], i32 0
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP0]], i32 1
Modified: llvm/branches/release_60/test/Transforms/SLPVectorizer/X86/insertvalue.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/Transforms/SLPVectorizer/X86/insertvalue.ll?rev=322675&r1=322674&r2=322675&view=diff
==============================================================================
--- llvm/branches/release_60/test/Transforms/SLPVectorizer/X86/insertvalue.ll (original)
+++ llvm/branches/release_60/test/Transforms/SLPVectorizer/X86/insertvalue.ll Wed Jan 17 08:04:05 2018
@@ -1,11 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
-; CHECK-LABEL: julia_2xdouble
-; CHECK: load <2 x double>
-; CHECK: load <2 x double>
-; CHECK: fmul <2 x double>
-; CHECK: fadd <2 x double>
define void @julia_2xdouble([2 x double]* sret, [2 x double]*, [2 x double]*, [2 x double]*) {
+; CHECK-LABEL: @julia_2xdouble(
+; CHECK-NEXT: top:
+; CHECK-NEXT: [[PX0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP2:%.*]], i64 0, i64 0
+; CHECK-NEXT: [[PY0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP3:%.*]], i64 0, i64 0
+; CHECK-NEXT: [[PX1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP2]], i64 0, i64 1
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[PX0]] to <2 x double>*
+; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 4
+; CHECK-NEXT: [[PY1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP3]], i64 0, i64 1
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[PY0]] to <2 x double>*
+; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP5]], [[TMP7]]
+; CHECK-NEXT: [[PZ0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP1:%.*]], i64 0, i64 0
+; CHECK-NEXT: [[PZ1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP1]], i64 0, i64 1
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[PZ0]] to <2 x double>*
+; CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[TMP9]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP8]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP11]], i32 0
+; CHECK-NEXT: [[I0:%.*]] = insertvalue [2 x double] undef, double [[TMP12]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x double> [[TMP11]], i32 1
+; CHECK-NEXT: [[I1:%.*]] = insertvalue [2 x double] [[I0]], double [[TMP13]], 1
+; CHECK-NEXT: store [2 x double] [[I1]], [2 x double]* [[TMP0:%.*]], align 4
+; CHECK-NEXT: ret void
+;
top:
%px0 = getelementptr inbounds [2 x double], [2 x double]* %2, i64 0, i64 0
%x0 = load double, double* %px0, align 4
@@ -29,12 +48,40 @@ top:
ret void
}
-; CHECK-LABEL: julia_4xfloat
-; CHECK: load <4 x float>
-; CHECK: load <4 x float>
-; CHECK: fmul <4 x float>
-; CHECK: fadd <4 x float>
define void @julia_4xfloat([4 x float]* sret, [4 x float]*, [4 x float]*, [4 x float]*) {
+; CHECK-LABEL: @julia_4xfloat(
+; CHECK-NEXT: top:
+; CHECK-NEXT: [[PX0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2:%.*]], i64 0, i64 0
+; CHECK-NEXT: [[PY0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3:%.*]], i64 0, i64 0
+; CHECK-NEXT: [[PX1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 1
+; CHECK-NEXT: [[PY1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 1
+; CHECK-NEXT: [[PX2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 2
+; CHECK-NEXT: [[PY2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 2
+; CHECK-NEXT: [[PX3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 3
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[PX0]] to <4 x float>*
+; CHECK-NEXT: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4
+; CHECK-NEXT: [[PY3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 3
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[PY0]] to <4 x float>*
+; CHECK-NEXT: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x float> [[TMP5]], [[TMP7]]
+; CHECK-NEXT: [[PZ0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1:%.*]], i64 0, i64 0
+; CHECK-NEXT: [[PZ1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 1
+; CHECK-NEXT: [[PZ2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 2
+; CHECK-NEXT: [[PZ3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 3
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[PZ0]] to <4 x float>*
+; CHECK-NEXT: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[TMP9]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x float> [[TMP8]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP11]], i32 0
+; CHECK-NEXT: [[I0:%.*]] = insertvalue [4 x float] undef, float [[TMP12]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP11]], i32 1
+; CHECK-NEXT: [[I1:%.*]] = insertvalue [4 x float] [[I0]], float [[TMP13]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[TMP11]], i32 2
+; CHECK-NEXT: [[I2:%.*]] = insertvalue [4 x float] [[I1]], float [[TMP14]], 2
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP11]], i32 3
+; CHECK-NEXT: [[I3:%.*]] = insertvalue [4 x float] [[I2]], float [[TMP15]], 3
+; CHECK-NEXT: store [4 x float] [[I3]], [4 x float]* [[TMP0:%.*]], align 4
+; CHECK-NEXT: ret void
+;
top:
%px0 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0
%x0 = load float, float* %px0, align 4
@@ -76,9 +123,27 @@ top:
ret void
}
-; CHECK-LABEL: julia_load_array_of_float
-; CHECK: fsub <4 x float>
define void @julia_load_array_of_float([4 x float]* %a, [4 x float]* %b, [4 x float]* %c) {
+; CHECK-LABEL: @julia_load_array_of_float(
+; CHECK-NEXT: top:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x float]* [[A:%.*]] to <4 x float>*
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
+; CHECK-NEXT: [[A_ARR:%.*]] = load [4 x float], [4 x float]* [[A]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast [4 x float]* [[B:%.*]] to <4 x float>*
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT: [[B_ARR:%.*]] = load [4 x float], [4 x float]* [[B]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
+; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x float] undef, float [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
+; CHECK-NEXT: [[C_ARR1:%.*]] = insertvalue [4 x float] [[C_ARR0]], float [[TMP6]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
+; CHECK-NEXT: [[C_ARR2:%.*]] = insertvalue [4 x float] [[C_ARR1]], float [[TMP7]], 2
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
+; CHECK-NEXT: [[C_ARR3:%.*]] = insertvalue [4 x float] [[C_ARR2]], float [[TMP8]], 3
+; CHECK-NEXT: store [4 x float] [[C_ARR3]], [4 x float]* [[C:%.*]], align 4
+; CHECK-NEXT: ret void
+;
top:
%a_arr = load [4 x float], [4 x float]* %a, align 4
%a0 = extractvalue [4 x float] %a_arr, 0
@@ -102,11 +167,27 @@ top:
ret void
}
-; CHECK-LABEL: julia_load_array_of_i32
-; CHECK: load <4 x i32>
-; CHECK: load <4 x i32>
-; CHECK: sub <4 x i32>
define void @julia_load_array_of_i32([4 x i32]* %a, [4 x i32]* %b, [4 x i32]* %c) {
+; CHECK-LABEL: @julia_load_array_of_i32(
+; CHECK-NEXT: top:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x i32]* [[A:%.*]] to <4 x i32>*
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT: [[A_ARR:%.*]] = load [4 x i32], [4 x i32]* [[A]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast [4 x i32]* [[B:%.*]] to <4 x i32>*
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT: [[B_ARR:%.*]] = load [4 x i32], [4 x i32]* [[B]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
+; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x i32] undef, i32 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1
+; CHECK-NEXT: [[C_ARR1:%.*]] = insertvalue [4 x i32] [[C_ARR0]], i32 [[TMP6]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
+; CHECK-NEXT: [[C_ARR2:%.*]] = insertvalue [4 x i32] [[C_ARR1]], i32 [[TMP7]], 2
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
+; CHECK-NEXT: [[C_ARR3:%.*]] = insertvalue [4 x i32] [[C_ARR2]], i32 [[TMP8]], 3
+; CHECK-NEXT: store [4 x i32] [[C_ARR3]], [4 x i32]* [[C:%.*]], align 4
+; CHECK-NEXT: ret void
+;
top:
%a_arr = load [4 x i32], [4 x i32]* %a, align 4
%a0 = extractvalue [4 x i32] %a_arr, 0
@@ -132,9 +213,30 @@ top:
; Almost identical to previous test, but for type that should NOT be vectorized.
;
-; CHECK-LABEL: julia_load_array_of_i16
-; CHECK-NOT: i2>
define void @julia_load_array_of_i16([4 x i16]* %a, [4 x i16]* %b, [4 x i16]* %c) {
+; CHECK-LABEL: @julia_load_array_of_i16(
+; CHECK-NEXT: top:
+; CHECK-NEXT: [[A_ARR:%.*]] = load [4 x i16], [4 x i16]* [[A:%.*]], align 4
+; CHECK-NEXT: [[A0:%.*]] = extractvalue [4 x i16] [[A_ARR]], 0
+; CHECK-NEXT: [[A2:%.*]] = extractvalue [4 x i16] [[A_ARR]], 2
+; CHECK-NEXT: [[A1:%.*]] = extractvalue [4 x i16] [[A_ARR]], 1
+; CHECK-NEXT: [[B_ARR:%.*]] = load [4 x i16], [4 x i16]* [[B:%.*]], align 4
+; CHECK-NEXT: [[B0:%.*]] = extractvalue [4 x i16] [[B_ARR]], 0
+; CHECK-NEXT: [[B2:%.*]] = extractvalue [4 x i16] [[B_ARR]], 2
+; CHECK-NEXT: [[B1:%.*]] = extractvalue [4 x i16] [[B_ARR]], 1
+; CHECK-NEXT: [[A3:%.*]] = extractvalue [4 x i16] [[A_ARR]], 3
+; CHECK-NEXT: [[C1:%.*]] = sub i16 [[A1]], [[B1]]
+; CHECK-NEXT: [[B3:%.*]] = extractvalue [4 x i16] [[B_ARR]], 3
+; CHECK-NEXT: [[C0:%.*]] = sub i16 [[A0]], [[B0]]
+; CHECK-NEXT: [[C2:%.*]] = sub i16 [[A2]], [[B2]]
+; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x i16] undef, i16 [[C0]], 0
+; CHECK-NEXT: [[C_ARR1:%.*]] = insertvalue [4 x i16] [[C_ARR0]], i16 [[C1]], 1
+; CHECK-NEXT: [[C3:%.*]] = sub i16 [[A3]], [[B3]]
+; CHECK-NEXT: [[C_ARR2:%.*]] = insertvalue [4 x i16] [[C_ARR1]], i16 [[C2]], 2
+; CHECK-NEXT: [[C_ARR3:%.*]] = insertvalue [4 x i16] [[C_ARR2]], i16 [[C3]], 3
+; CHECK-NEXT: store [4 x i16] [[C_ARR3]], [4 x i16]* [[C:%.*]], align 4
+; CHECK-NEXT: ret void
+;
top:
%a_arr = load [4 x i16], [4 x i16]* %a, align 4
%a0 = extractvalue [4 x i16] %a_arr, 0
@@ -160,11 +262,27 @@ top:
%pseudovec = type { float, float, float, float }
-; CHECK-LABEL: julia_load_struct_of_float
-; CHECK: load <4 x float>
-; CHECK: load <4 x float>
-; CHECK: fsub <4 x float>
define void @julia_load_struct_of_float(%pseudovec* %a, %pseudovec* %b, %pseudovec* %c) {
+; CHECK-LABEL: @julia_load_struct_of_float(
+; CHECK-NEXT: top:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast %pseudovec* [[A:%.*]] to <4 x float>*
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
+; CHECK-NEXT: [[A_STRUCT:%.*]] = load [[PSEUDOVEC:%.*]], %pseudovec* [[A]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast %pseudovec* [[B:%.*]] to <4 x float>*
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT: [[B_STRUCT:%.*]] = load [[PSEUDOVEC]], %pseudovec* [[B]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
+; CHECK-NEXT: [[C_STRUCT0:%.*]] = insertvalue [[PSEUDOVEC]] undef, float [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
+; CHECK-NEXT: [[C_STRUCT1:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct0, float [[TMP6]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
+; CHECK-NEXT: [[C_STRUCT2:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct1, float [[TMP7]], 2
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
+; CHECK-NEXT: [[C_STRUCT3:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct2, float [[TMP8]], 3
+; CHECK-NEXT: store [[PSEUDOVEC]] %c_struct3, %pseudovec* [[C:%.*]], align 4
+; CHECK-NEXT: ret void
+;
top:
%a_struct = load %pseudovec, %pseudovec* %a, align 4
%a0 = extractvalue %pseudovec %a_struct, 0
Modified: llvm/branches/release_60/test/Transforms/SLPVectorizer/X86/value-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_60/test/Transforms/SLPVectorizer/X86/value-bug.ll?rev=322675&r1=322674&r2=322675&view=diff
==============================================================================
--- llvm/branches/release_60/test/Transforms/SLPVectorizer/X86/value-bug.ll (original)
+++ llvm/branches/release_60/test/Transforms/SLPVectorizer/X86/value-bug.ll Wed Jan 17 08:04:05 2018
@@ -1,15 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -slp-vectorizer < %s -S -mtriple="x86_64-grtev3-linux-gnu" -mcpu=corei7-avx | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-grtev3-linux-gnu"
; We used to crash on this example because we were building a constant
; expression during vectorization and the vectorizer expects instructions
; as elements of the vectorized tree.
-; CHECK-LABEL: @test
; PR19621
define void @test() {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: bb279:
+; CHECK-NEXT: br label [[BB283:%.*]]
+; CHECK: bb283:
+; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ [[TMP13:%.*]], [[EXIT]] ]
+; CHECK-NEXT: br label [[BB284:%.*]]
+; CHECK: bb284:
+; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[TMP0]] to <2 x double>
+; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef
+; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef
+; CHECK-NEXT: br label [[BB21_I:%.*]]
+; CHECK: bb21.i:
+; CHECK-NEXT: br i1 undef, label [[BB22_I:%.*]], label [[EXIT]]
+; CHECK: bb22.i:
+; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]]
+; CHECK-NEXT: br label [[BB32_I:%.*]]
+; CHECK: bb32.i:
+; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x double> [ [[TMP5]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ]
+; CHECK-NEXT: br i1 undef, label [[BB32_I]], label [[BB21_I]]
+; CHECK: exit:
+; CHECK-NEXT: [[TMP7:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
+; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> <double undef, double 0.000000e+00>, [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> undef, [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> undef, [[TMP9]]
+; CHECK-NEXT: [[TMP11]] = fptrunc <2 x double> [[TMP10]] to <2 x float>
+; CHECK-NEXT: [[TMP317:%.*]] = fptrunc double undef to float
+; CHECK-NEXT: [[TMP319:%.*]] = fptrunc double undef to float
+; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP317]], i32 0
+; CHECK-NEXT: [[TMP13]] = insertelement <2 x float> [[TMP12]], float [[TMP319]], i32 1
+; CHECK-NEXT: br label [[BB283]]
+;
bb279:
br label %bb283
@@ -62,6 +93,12 @@ exit:
; vectorizer starts at the type (%t2, %t3) and wil constant fold the tree.
; The code that handles insertelement instructions must handle this.
define <4 x double> @constant_folding() {
+; CHECK-LABEL: @constant_folding(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double 1.000000e+00, i32 1
+; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x double> [[I1]], double 2.000000e+00, i32 0
+; CHECK-NEXT: ret <4 x double> [[I2]]
+;
entry:
%t0 = fadd double 1.000000e+00 , 0.000000e+00
%t1 = fadd double 1.000000e+00 , 1.000000e+00
@@ -71,10 +108,3 @@ entry:
%i2 = insertelement <4 x double> %i1, double %t3, i32 0
ret <4 x double> %i2
}
-
-; CHECK-LABEL: @constant_folding
-; CHECK: %[[V0:.+]] = extractelement <2 x double> <double 1.000000e+00, double 2.000000e+00>, i32 0
-; CHECK: %[[V1:.+]] = insertelement <4 x double> undef, double %[[V0]], i32 1
-; CHECK: %[[V2:.+]] = extractelement <2 x double> <double 1.000000e+00, double 2.000000e+00>, i32 1
-; CHECK: %[[V3:.+]] = insertelement <4 x double> %[[V1]], double %[[V2]], i32 0
-; CHECK: ret <4 x double> %[[V3]]
More information about the llvm-branch-commits
mailing list