[llvm] a7478fa - [SLP] Fix order of `insertelement`/`insertvalue` seed operands
Anton Afanasyev via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 6 12:10:38 PDT 2020
Author: Anton Afanasyev
Date: 2020-08-06T22:09:24+03:00
New Revision: a7478fab6ce82532c1545614362a3688e8a4ed36
URL: https://github.com/llvm/llvm-project/commit/a7478fab6ce82532c1545614362a3688e8a4ed36
DIFF: https://github.com/llvm/llvm-project/commit/a7478fab6ce82532c1545614362a3688e8a4ed36.diff
LOG: [SLP] Fix order of `insertelement`/`insertvalue` seed operands
Summary:
This patch takes the indices operands of `insertelement`/`insertvalue`
into account while generation of seed elements for `findBuildAggregate()`.
This function has kept the original order of `insert`s before.
Also this patch optimizes `findBuildAggregate()` preventing it from
redundant temporary vector allocations and its multiple reversing.
Fixes llvm.org/pr44067
Subscribers: hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83779
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/PhaseOrdering/X86/horiz-math.ll
llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll
llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b5b3fa59b8e9..8410f7201c48 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7084,6 +7084,94 @@ class HorizontalReduction {
} // end anonymous namespace
+static Optional<unsigned> getAggregateSize(Instruction *InsertInst) {
+ if (auto *IE = dyn_cast<InsertElementInst>(InsertInst))
+ return cast<FixedVectorType>(IE->getType())->getNumElements();
+
+ unsigned AggregateSize = 1;
+ auto *IV = cast<InsertValueInst>(InsertInst);
+ Type *CurrentType = IV->getType();
+ do {
+ if (auto *ST = dyn_cast<StructType>(CurrentType)) {
+ for (auto *Elt : ST->elements())
+ if (Elt != ST->getElementType(0)) // check homogeneity
+ return None;
+ AggregateSize *= ST->getNumElements();
+ CurrentType = ST->getElementType(0);
+ } else if (auto *AT = dyn_cast<ArrayType>(CurrentType)) {
+ AggregateSize *= AT->getNumElements();
+ CurrentType = AT->getElementType();
+ } else if (auto *VT = dyn_cast<FixedVectorType>(CurrentType)) {
+ AggregateSize *= VT->getNumElements();
+ return AggregateSize;
+ } else if (CurrentType->isSingleValueType()) {
+ return AggregateSize;
+ } else {
+ return None;
+ }
+ } while (true);
+}
+
+static Optional<unsigned> getOperandIndex(Instruction *InsertInst,
+ unsigned OperandOffset) {
+ unsigned OperandIndex = OperandOffset;
+ if (auto *IE = dyn_cast<InsertElementInst>(InsertInst)) {
+ if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2))) {
+ auto *VT = cast<FixedVectorType>(IE->getType());
+ OperandIndex *= VT->getNumElements();
+ OperandIndex += CI->getZExtValue();
+ return OperandIndex;
+ }
+ return None;
+ }
+
+ auto *IV = cast<InsertValueInst>(InsertInst);
+ Type *CurrentType = IV->getType();
+ for (unsigned int Index : IV->indices()) {
+ if (auto *ST = dyn_cast<StructType>(CurrentType)) {
+ OperandIndex *= ST->getNumElements();
+ CurrentType = ST->getElementType(Index);
+ } else if (auto *AT = dyn_cast<ArrayType>(CurrentType)) {
+ OperandIndex *= AT->getNumElements();
+ CurrentType = AT->getElementType();
+ } else {
+ return None;
+ }
+ OperandIndex += Index;
+ }
+ return OperandIndex;
+}
+
+static bool findBuildAggregate_rec(Instruction *LastInsertInst,
+ TargetTransformInfo *TTI,
+ SmallVectorImpl<Value *> &BuildVectorOpds,
+ SmallVectorImpl<Value *> &InsertElts,
+ unsigned OperandOffset) {
+ do {
+ Value *InsertedOperand = LastInsertInst->getOperand(1);
+ Optional<unsigned> OperandIndex =
+ getOperandIndex(LastInsertInst, OperandOffset);
+ if (!OperandIndex)
+ return false;
+ if (isa<InsertElementInst>(InsertedOperand) ||
+ isa<InsertValueInst>(InsertedOperand)) {
+ if (!findBuildAggregate_rec(cast<Instruction>(InsertedOperand), TTI,
+ BuildVectorOpds, InsertElts, *OperandIndex))
+ return false;
+ } else {
+ BuildVectorOpds[*OperandIndex] = InsertedOperand;
+ InsertElts[*OperandIndex] = LastInsertInst;
+ }
+ if (isa<UndefValue>(LastInsertInst->getOperand(0)))
+ return true;
+ LastInsertInst = dyn_cast<Instruction>(LastInsertInst->getOperand(0));
+ } while (LastInsertInst != nullptr &&
+ (isa<InsertValueInst>(LastInsertInst) ||
+ isa<InsertElementInst>(LastInsertInst)) &&
+ LastInsertInst->hasOneUse());
+ return false;
+}
+
/// Recognize construction of vectors like
/// %ra = insertelement <4 x float> undef, float %s0, i32 0
/// %rb = insertelement <4 x float> %ra, float %s1, i32 1
@@ -7091,54 +7179,41 @@ class HorizontalReduction {
/// %rd = insertelement <4 x float> %rc, float %s3, i32 3
/// starting from the last insertelement or insertvalue instruction.
///
-/// Also recognize aggregates like {<2 x float>, <2 x float>},
+/// Also recognize homogeneous aggregates like {<2 x float>, <2 x float>},
/// {{float, float}, {float, float}}, [2 x {float, float}] and so on.
/// See llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll for examples.
///
/// Assume LastInsertInst is of InsertElementInst or InsertValueInst type.
///
/// \return true if it matches.
-static bool findBuildAggregate(Value *LastInsertInst, TargetTransformInfo *TTI,
+static bool findBuildAggregate(Instruction *LastInsertInst,
+ TargetTransformInfo *TTI,
SmallVectorImpl<Value *> &BuildVectorOpds,
SmallVectorImpl<Value *> &InsertElts) {
+
assert((isa<InsertElementInst>(LastInsertInst) ||
isa<InsertValueInst>(LastInsertInst)) &&
"Expected insertelement or insertvalue instruction!");
- do {
- Value *InsertedOperand;
- auto *IE = dyn_cast<InsertElementInst>(LastInsertInst);
- if (IE) {
- InsertedOperand = IE->getOperand(1);
- LastInsertInst = IE->getOperand(0);
- } else {
- auto *IV = cast<InsertValueInst>(LastInsertInst);
- InsertedOperand = IV->getInsertedValueOperand();
- LastInsertInst = IV->getAggregateOperand();
- }
- if (isa<InsertElementInst>(InsertedOperand) ||
- isa<InsertValueInst>(InsertedOperand)) {
- SmallVector<Value *, 8> TmpBuildVectorOpds;
- SmallVector<Value *, 8> TmpInsertElts;
- if (!findBuildAggregate(InsertedOperand, TTI, TmpBuildVectorOpds,
- TmpInsertElts))
- return false;
- BuildVectorOpds.append(TmpBuildVectorOpds.rbegin(),
- TmpBuildVectorOpds.rend());
- InsertElts.append(TmpInsertElts.rbegin(), TmpInsertElts.rend());
- } else {
- BuildVectorOpds.push_back(InsertedOperand);
- InsertElts.push_back(IE);
- }
- if (isa<UndefValue>(LastInsertInst))
- break;
- if ((!isa<InsertValueInst>(LastInsertInst) &&
- !isa<InsertElementInst>(LastInsertInst)) ||
- !LastInsertInst->hasOneUse())
- return false;
- } while (true);
- std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());
- std::reverse(InsertElts.begin(), InsertElts.end());
- return true;
+
+ assert((BuildVectorOpds.empty() && InsertElts.empty()) &&
+ "Expected empty result vectors!");
+
+ Optional<unsigned> AggregateSize = getAggregateSize(LastInsertInst);
+ if (!AggregateSize)
+ return false;
+ BuildVectorOpds.resize(*AggregateSize);
+ InsertElts.resize(*AggregateSize);
+
+ if (findBuildAggregate_rec(LastInsertInst, TTI, BuildVectorOpds, InsertElts,
+ 0)) {
+ llvm::erase_if(BuildVectorOpds,
+ [](const Value *V) { return V == nullptr; });
+ llvm::erase_if(InsertElts, [](const Value *V) { return V == nullptr; });
+ if (BuildVectorOpds.size() >= 2)
+ return true;
+ }
+
+ return false;
}
static bool PhiTypeSorterFunc(Value *V, Value *V2) {
@@ -7308,8 +7383,7 @@ bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,
SmallVector<Value *, 16> BuildVectorOpds;
SmallVector<Value *, 16> BuildVectorInsts;
- if (!findBuildAggregate(IVI, TTI, BuildVectorOpds, BuildVectorInsts) ||
- BuildVectorOpds.size() < 2)
+ if (!findBuildAggregate(IVI, TTI, BuildVectorOpds, BuildVectorInsts))
return false;
LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n");
@@ -7324,7 +7398,6 @@ bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
SmallVector<Value *, 16> BuildVectorInsts;
SmallVector<Value *, 16> BuildVectorOpds;
if (!findBuildAggregate(IEI, TTI, BuildVectorOpds, BuildVectorInsts) ||
- BuildVectorOpds.size() < 2 ||
(llvm::all_of(BuildVectorOpds,
[](Value *V) { return isa<ExtractElementInst>(V); }) &&
isShuffle(BuildVectorOpds)))
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/horiz-math.ll b/llvm/test/Transforms/PhaseOrdering/X86/horiz-math.ll
index 749ed967b842..37f6968c876b 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/horiz-math.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/horiz-math.ll
@@ -37,11 +37,10 @@ define <4 x float> @hadd_reverse_v4f32(<4 x float> %a, <4 x float> %b) #0 {
define <4 x float> @reverse_hadd_v4f32(<4 x float> %a, <4 x float> %b) #0 {
; CHECK-LABEL: @reverse_hadd_v4f32(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[A:%.*]], <4 x i32> <i32 2, i32 0, i32 6, i32 4>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> [[A]], <4 x i32> <i32 3, i32 1, i32 7, i32 5>
; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: ret <4 x float> [[TMP4]]
+; CHECK-NEXT: ret <4 x float> [[TMP3]]
;
%vecext = extractelement <4 x float> %a, i32 0
%vecext1 = extractelement <4 x float> %a, i32 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
index 632bec58d5c0..016d40c79952 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
@@ -126,13 +126,16 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32>
; doesn't matter
define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; ANY-LABEL: @simple_select_insert_out_of_order(
-; ANY-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
-; ANY-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
-; ANY-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+; ANY-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[C:%.*]], <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
+; ANY-NEXT: [[REORDER_SHUFFLE1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
+; ANY-NEXT: [[REORDER_SHUFFLE2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
+; ANY-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[REORDER_SHUFFLE]], zeroinitializer
+; ANY-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[REORDER_SHUFFLE1]], <4 x float> [[REORDER_SHUFFLE2]]
+; ANY-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 2
; ANY-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 2
; ANY-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
; ANY-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP4]], i32 1
-; ANY-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2
+; ANY-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; ANY-NEXT: [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[TMP5]], i32 0
; ANY-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
; ANY-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP6]], i32 3
@@ -447,19 +450,19 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
; Make sure we handle multiple trees that feed one build vector correctly.
define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) {
; ANY-LABEL: @multi_tree(
-; ANY-NEXT: [[TMP1:%.*]] = insertelement <4 x double> undef, double [[W:%.*]], i32 0
-; ANY-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[X:%.*]], i32 1
-; ANY-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[Y:%.*]], i32 2
-; ANY-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[Z:%.*]], i32 3
-; ANY-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>
+; ANY-NEXT: [[TMP1:%.*]] = insertelement <4 x double> undef, double [[Z:%.*]], i32 0
+; ANY-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 1
+; ANY-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[X:%.*]], i32 2
+; ANY-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[W:%.*]], i32 3
+; ANY-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], <double 3.000000e+00, double 2.000000e+00, double 1.000000e+00, double 0.000000e+00>
; ANY-NEXT: [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
-; ANY-NEXT: [[TMP7:%.*]] = extractelement <4 x double> [[TMP6]], i32 0
+; ANY-NEXT: [[TMP7:%.*]] = extractelement <4 x double> [[TMP6]], i32 3
; ANY-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double [[TMP7]], i32 3
-; ANY-NEXT: [[TMP8:%.*]] = extractelement <4 x double> [[TMP6]], i32 1
+; ANY-NEXT: [[TMP8:%.*]] = extractelement <4 x double> [[TMP6]], i32 2
; ANY-NEXT: [[I2:%.*]] = insertelement <4 x double> [[I1]], double [[TMP8]], i32 2
-; ANY-NEXT: [[TMP9:%.*]] = extractelement <4 x double> [[TMP6]], i32 2
+; ANY-NEXT: [[TMP9:%.*]] = extractelement <4 x double> [[TMP6]], i32 1
; ANY-NEXT: [[I3:%.*]] = insertelement <4 x double> [[I2]], double [[TMP9]], i32 1
-; ANY-NEXT: [[TMP10:%.*]] = extractelement <4 x double> [[TMP6]], i32 3
+; ANY-NEXT: [[TMP10:%.*]] = extractelement <4 x double> [[TMP6]], i32 0
; ANY-NEXT: [[I4:%.*]] = insertelement <4 x double> [[I3]], double [[TMP10]], i32 0
; ANY-NEXT: ret <4 x double> [[I4]]
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll
index 2a41de10f09c..18ce596fc1c5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll
@@ -147,13 +147,13 @@ define {%StructTy, %StructTy} @StructOfStruct(float *%Ptr) {
define {%StructTy, float, float} @NonHomogeneousStruct(float *%Ptr) {
; CHECK-LABEL: @NonHomogeneousStruct(
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0
-; CHECK-NEXT: [[L0:%.*]] = load float, float* [[GEP0]]
+; CHECK-NEXT: [[L0:%.*]] = load float, float* [[GEP0]], align 4
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 1
-; CHECK-NEXT: [[L1:%.*]] = load float, float* [[GEP1]]
+; CHECK-NEXT: [[L1:%.*]] = load float, float* [[GEP1]], align 4
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 2
-; CHECK-NEXT: [[L2:%.*]] = load float, float* [[GEP2]]
+; CHECK-NEXT: [[L2:%.*]] = load float, float* [[GEP2]], align 4
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 3
-; CHECK-NEXT: [[L3:%.*]] = load float, float* [[GEP3]]
+; CHECK-NEXT: [[L3:%.*]] = load float, float* [[GEP3]], align 4
; CHECK-NEXT: [[FADD0:%.*]] = fadd fast float [[L0]], 1.100000e+01
; CHECK-NEXT: [[FADD1:%.*]] = fadd fast float [[L1]], 1.200000e+01
; CHECK-NEXT: [[FADD2:%.*]] = fadd fast float [[L2]], 1.300000e+01
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll
index 55bee662a613..ab7cad6ad8b4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll
@@ -8,11 +8,10 @@ define <2 x float> @foo({{float, float}}* %A) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast { { float, float } }* [[A:%.*]] to <2 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 8
-; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[REORDER_SHUFFLE]], <float 2.000000e+00, float 2.000000e+00>
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], <float 2.000000e+00, float 2.000000e+00>
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
; CHECK-NEXT: [[INS1:%.*]] = insertelement <2 x float> undef, float [[TMP3]], i32 1
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[INS0:%.*]] = insertelement <2 x float> [[INS1]], float [[TMP4]], i32 0
; CHECK-NEXT: ret <2 x float> [[INS0]]
;
@@ -44,23 +43,22 @@ define {%Struct2Ty, %Struct2Ty} @StructOfStructOfStruct(i16 *%Ptr) {
; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 7
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[GEP0]] to <8 x i16>*
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2
-; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 1, i32 0, i32 2, i32 3>
-; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i16> [[REORDER_SHUFFLE]], <i16 5, i16 6, i16 8, i16 7, i16 2, i16 1, i16 3, i16 4>
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 4
+; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 1
; CHECK-NEXT: [[STRUCTIN0:%.*]] = insertvalue [[STRUCT1TY:%.*]] undef, i16 [[TMP4]], 1
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i16> [[TMP3]], i32 5
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
; CHECK-NEXT: [[STRUCTIN1:%.*]] = insertvalue [[STRUCT1TY]] %StructIn0, i16 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i16> [[TMP3]], i32 6
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i16> [[TMP3]], i32 2
; CHECK-NEXT: [[STRUCTIN2:%.*]] = insertvalue [[STRUCT1TY]] undef, i16 [[TMP6]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i16> [[TMP3]], i32 7
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3
; CHECK-NEXT: [[STRUCTIN3:%.*]] = insertvalue [[STRUCT1TY]] %StructIn2, i16 [[TMP7]], 1
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i16> [[TMP3]], i32 4
; CHECK-NEXT: [[STRUCTIN4:%.*]] = insertvalue [[STRUCT1TY]] undef, i16 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i16> [[TMP3]], i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i16> [[TMP3]], i32 5
; CHECK-NEXT: [[STRUCTIN5:%.*]] = insertvalue [[STRUCT1TY]] %StructIn4, i16 [[TMP9]], 1
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i16> [[TMP3]], i32 2
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i16> [[TMP3]], i32 7
; CHECK-NEXT: [[STRUCTIN6:%.*]] = insertvalue [[STRUCT1TY]] undef, i16 [[TMP10]], 1
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP3]], i32 6
; CHECK-NEXT: [[STRUCTIN7:%.*]] = insertvalue [[STRUCT1TY]] %StructIn6, i16 [[TMP11]], 0
; CHECK-NEXT: [[STRUCT2IN0:%.*]] = insertvalue [[STRUCT2TY:%.*]] undef, [[STRUCT1TY]] %StructIn1, 0
; CHECK-NEXT: [[STRUCT2IN1:%.*]] = insertvalue [[STRUCT2TY]] %Struct2In0, [[STRUCT1TY]] %StructIn3, 1
More information about the llvm-commits
mailing list