[llvm] r277214 - [X86][AVX] Fix VBROADCASTF128 selection bug (PR28770)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 29 14:05:10 PDT 2016
Author: rksimon
Date: Fri Jul 29 16:05:10 2016
New Revision: 277214
URL: http://llvm.org/viewvc/llvm-project?rev=277214&view=rev
Log:
[X86][AVX] Fix VBROADCASTF128 selection bug (PR28770)
Support for lowering to VBROADCASTF128 etc. in D22460 was not correctly ensuring that the only users of the 128-bit vector load were the insertions of the vector into the lower/upper subvectors.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx-vbroadcastf128.ll
llvm/trunk/test/CodeGen/X86/avx2-vbroadcasti128.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=277214&r1=277213&r2=277214&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Jul 29 16:05:10 2016
@@ -4750,6 +4750,13 @@ static SDValue peekThroughBitcasts(SDVal
return V;
}
+static SDValue peekThroughOneUseBitcasts(SDValue V) {
+ while (V.getNode() && V.getOpcode() == ISD::BITCAST &&
+ V.getOperand(0).hasOneUse())
+ V = V.getOperand(0);
+ return V;
+}
+
static bool getTargetShuffleMaskIndices(SDValue MaskNode,
unsigned MaskEltSizeInBits,
SmallVectorImpl<uint64_t> &RawMask) {
@@ -12785,6 +12792,14 @@ static SDValue LowerEXTRACT_SUBVECTOR(SD
return SDValue();
}
+static bool areOnlyUsersOf(SDNode *N, ArrayRef<SDValue> ValidUsers) {
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I)
+ if (llvm::all_of(ValidUsers,
+ [&I](SDValue V) { return V.getNode() != *I; }))
+ return false;
+ return true;
+}
+
// Lower a node with an INSERT_SUBVECTOR opcode. This may result in a
// simple superregister reference or explicit instructions to insert
// the upper bits of a vector.
@@ -12818,23 +12833,26 @@ static SDValue LowerINSERT_SUBVECTOR(SDV
OpVT.is256BitVector() && SubVecVT.is128BitVector()) {
auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2));
if (Idx2 && Idx2->getZExtValue() == 0) {
+ SDValue SubVec2 = Vec.getOperand(1);
// If needed, look through bitcasts to get to the load.
- SDValue SubVec2 = peekThroughBitcasts(Vec.getOperand(1));
- if (auto *FirstLd = dyn_cast<LoadSDNode>(SubVec2)) {
+ if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(SubVec2))) {
bool Fast;
unsigned Alignment = FirstLd->getAlignment();
unsigned AS = FirstLd->getAddressSpace();
const X86TargetLowering *TLI = Subtarget.getTargetLowering();
if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
OpVT, AS, Alignment, &Fast) && Fast) {
- SDValue Ops[] = { SubVec2, SubVec };
+ SDValue Ops[] = {SubVec2, SubVec};
if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
return Ld;
}
-
- // If lower/upper loads are the same then lower to a VBROADCASTF128.
- if (SubVec2 == peekThroughBitcasts(SubVec))
- return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec);
+ }
+ // If lower/upper loads are the same and the only users of the load, then
+ // lower to a VBROADCASTF128/VBROADCASTI128/etc.
+ if (SubVec2 == SubVec &&
+ isa<LoadSDNode>(peekThroughOneUseBitcasts(SubVec2)) &&
+ areOnlyUsersOf(SubVec2.getNode(), {Op, Vec})) {
+ return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec);
}
}
}
Modified: llvm/trunk/test/CodeGen/X86/avx-vbroadcastf128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-vbroadcastf128.ll?rev=277214&r1=277213&r2=277214&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-vbroadcastf128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-vbroadcastf128.ll Fri Jul 29 16:05:10 2016
@@ -97,3 +97,135 @@ define <32 x i8> @test_broadcast_16i8_32
%2 = shufflevector <16 x i8> %1, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <32 x i8> %2
}
+
+define <4 x double> @test_broadcast_2f64_4f64_reuse(<2 x double>* %p0, <2 x double>* %p1) {
+; X32-LABEL: test_broadcast_2f64_4f64_reuse:
+; X32: ## BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: vmovaps (%ecx), %xmm1
+; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
+; X32-NEXT: vmovaps %xmm1, (%eax)
+; X32-NEXT: retl
+;
+; X64-LABEL: test_broadcast_2f64_4f64_reuse:
+; X64: ## BB#0:
+; X64-NEXT: vmovaps (%rdi), %xmm1
+; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
+; X64-NEXT: vmovaps %xmm1, (%rsi)
+; X64-NEXT: retq
+ %1 = load <2 x double>, <2 x double>* %p0
+ %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ store <2 x double> %1, <2 x double>* %p1
+ ret <4 x double> %2
+}
+
+define <4 x i64> @test_broadcast_2i64_4i64_reuse(<2 x i64>* %p0, <2 x i64>* %p1) {
+; X32-LABEL: test_broadcast_2i64_4i64_reuse:
+; X32: ## BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: vmovaps (%ecx), %xmm1
+; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
+; X32-NEXT: vmovaps %xmm1, (%eax)
+; X32-NEXT: retl
+;
+; X64-LABEL: test_broadcast_2i64_4i64_reuse:
+; X64: ## BB#0:
+; X64-NEXT: vmovaps (%rdi), %xmm1
+; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
+; X64-NEXT: vmovaps %xmm1, (%rsi)
+; X64-NEXT: retq
+ %1 = load <2 x i64>, <2 x i64>* %p0
+ %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ store <2 x i64> %1, <2 x i64>* %p1
+ ret <4 x i64> %2
+}
+
+define <8 x float> @test_broadcast_4f32_8f32_reuse(<4 x float>* %p0, <4 x float>* %p1) {
+; X32-LABEL: test_broadcast_4f32_8f32_reuse:
+; X32: ## BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: vmovaps (%ecx), %xmm1
+; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
+; X32-NEXT: vmovaps %xmm1, (%eax)
+; X32-NEXT: retl
+;
+; X64-LABEL: test_broadcast_4f32_8f32_reuse:
+; X64: ## BB#0:
+; X64-NEXT: vmovaps (%rdi), %xmm1
+; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
+; X64-NEXT: vmovaps %xmm1, (%rsi)
+; X64-NEXT: retq
+ %1 = load <4 x float>, <4 x float>* %p0
+ %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ store <4 x float> %1, <4 x float>* %p1
+ ret <8 x float> %2
+}
+
+define <8 x i32> @test_broadcast_4i32_8i32_reuse(<4 x i32>* %p0, <4 x i32>* %p1) {
+; X32-LABEL: test_broadcast_4i32_8i32_reuse:
+; X32: ## BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: vmovaps (%ecx), %xmm1
+; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
+; X32-NEXT: vmovaps %xmm1, (%eax)
+; X32-NEXT: retl
+;
+; X64-LABEL: test_broadcast_4i32_8i32_reuse:
+; X64: ## BB#0:
+; X64-NEXT: vmovaps (%rdi), %xmm1
+; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
+; X64-NEXT: vmovaps %xmm1, (%rsi)
+; X64-NEXT: retq
+ %1 = load <4 x i32>, <4 x i32>* %p0
+ %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ store <4 x i32> %1, <4 x i32>* %p1
+ ret <8 x i32> %2
+}
+
+define <16 x i16> @test_broadcast_8i16_16i16_reuse(<8 x i16> *%p0, <8 x i16> *%p1) nounwind {
+; X32-LABEL: test_broadcast_8i16_16i16_reuse:
+; X32: ## BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: vmovaps (%ecx), %xmm1
+; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
+; X32-NEXT: vmovaps %xmm1, (%eax)
+; X32-NEXT: retl
+;
+; X64-LABEL: test_broadcast_8i16_16i16_reuse:
+; X64: ## BB#0:
+; X64-NEXT: vmovaps (%rdi), %xmm1
+; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
+; X64-NEXT: vmovaps %xmm1, (%rsi)
+; X64-NEXT: retq
+ %1 = load <8 x i16>, <8 x i16> *%p0
+ %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ store <8 x i16> %1, <8 x i16>* %p1
+ ret <16 x i16> %2
+}
+
+define <32 x i8> @test_broadcast_16i8_32i8_reuse(<16 x i8> *%p0, <16 x i8> *%p1) nounwind {
+; X32-LABEL: test_broadcast_16i8_32i8_reuse:
+; X32: ## BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: vmovaps (%ecx), %xmm1
+; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
+; X32-NEXT: vmovaps %xmm1, (%eax)
+; X32-NEXT: retl
+;
+; X64-LABEL: test_broadcast_16i8_32i8_reuse:
+; X64: ## BB#0:
+; X64-NEXT: vmovaps (%rdi), %xmm1
+; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
+; X64-NEXT: vmovaps %xmm1, (%rsi)
+; X64-NEXT: retq
+ %1 = load <16 x i8>, <16 x i8> *%p0
+ %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ store <16 x i8> %1, <16 x i8>* %p1
+ ret <32 x i8> %2
+}
Modified: llvm/trunk/test/CodeGen/X86/avx2-vbroadcasti128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-vbroadcasti128.ll?rev=277214&r1=277213&r2=277214&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-vbroadcasti128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-vbroadcasti128.ll Fri Jul 29 16:05:10 2016
@@ -115,3 +115,153 @@ define <32 x i8> @test_broadcast_16i8_32
%3 = add <32 x i8> %2, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32>
ret <32 x i8> %3
}
+
+define <4 x double> @test_broadcast_2f64_4f64_reuse(<2 x double>* %p0, <2 x double>* %p1) {
+; X32-LABEL: test_broadcast_2f64_4f64_reuse:
+; X32: ## BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: vmovapd (%ecx), %xmm1
+; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
+; X32-NEXT: vaddpd LCPI6_0, %ymm0, %ymm0
+; X32-NEXT: vmovapd %xmm1, (%eax)
+; X32-NEXT: retl
+;
+; X64-LABEL: test_broadcast_2f64_4f64_reuse:
+; X64: ## BB#0:
+; X64-NEXT: vmovapd (%rdi), %xmm1
+; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
+; X64-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0
+; X64-NEXT: vmovapd %xmm1, (%rsi)
+; X64-NEXT: retq
+ %1 = load <2 x double>, <2 x double>* %p0
+ %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ %3 = fadd <4 x double> %2, <double 1.0, double 2.0, double 3.0, double 4.0>
+ store <2 x double> %1, <2 x double>* %p1
+ ret <4 x double> %3
+}
+
+define <4 x i64> @test_broadcast_2i64_4i64_reuse(<2 x i64>* %p0, <2 x i64>* %p1) {
+; X32-LABEL: test_broadcast_2i64_4i64_reuse:
+; X32: ## BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: vmovdqa (%ecx), %xmm1
+; X32-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm0
+; X32-NEXT: vpaddq LCPI7_0, %ymm0, %ymm0
+; X32-NEXT: vmovdqa %xmm1, (%eax)
+; X32-NEXT: retl
+;
+; X64-LABEL: test_broadcast_2i64_4i64_reuse:
+; X64: ## BB#0:
+; X64-NEXT: vmovdqa (%rdi), %xmm1
+; X64-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm0
+; X64-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
+; X64-NEXT: vmovdqa %xmm1, (%rsi)
+; X64-NEXT: retq
+ %1 = load <2 x i64>, <2 x i64>* %p0
+ %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ %3 = add <4 x i64> %2, <i64 1, i64 2, i64 3, i64 4>
+ store <2 x i64> %1, <2 x i64>* %p1
+ ret <4 x i64> %3
+}
+
+define <8 x float> @test_broadcast_4f32_8f32_reuse(<4 x float>* %p0, <4 x float>* %p1) {
+; X32-LABEL: test_broadcast_4f32_8f32_reuse:
+; X32: ## BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: vmovaps (%ecx), %xmm1
+; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
+; X32-NEXT: vaddps LCPI8_0, %ymm0, %ymm0
+; X32-NEXT: vmovaps %xmm1, (%eax)
+; X32-NEXT: retl
+;
+; X64-LABEL: test_broadcast_4f32_8f32_reuse:
+; X64: ## BB#0:
+; X64-NEXT: vmovaps (%rdi), %xmm1
+; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm0
+; X64-NEXT: vaddps {{.*}}(%rip), %ymm0, %ymm0
+; X64-NEXT: vmovaps %xmm1, (%rsi)
+; X64-NEXT: retq
+ %1 = load <4 x float>, <4 x float>* %p0
+ %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %3 = fadd <8 x float> %2, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>
+ store <4 x float> %1, <4 x float>* %p1
+ ret <8 x float> %3
+}
+
+define <8 x i32> @test_broadcast_4i32_8i32_reuse(<4 x i32>* %p0, <4 x i32>* %p1) {
+; X32-LABEL: test_broadcast_4i32_8i32_reuse:
+; X32: ## BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: vmovdqa (%ecx), %xmm1
+; X32-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm0
+; X32-NEXT: vpaddd LCPI9_0, %ymm0, %ymm0
+; X32-NEXT: vmovdqa %xmm1, (%eax)
+; X32-NEXT: retl
+;
+; X64-LABEL: test_broadcast_4i32_8i32_reuse:
+; X64: ## BB#0:
+; X64-NEXT: vmovdqa (%rdi), %xmm1
+; X64-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm0
+; X64-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
+; X64-NEXT: vmovdqa %xmm1, (%rsi)
+; X64-NEXT: retq
+ %1 = load <4 x i32>, <4 x i32>* %p0
+ %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %3 = add <8 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+ store <4 x i32> %1, <4 x i32>* %p1
+ ret <8 x i32> %3
+}
+
+define <16 x i16> @test_broadcast_8i16_16i16_reuse(<8 x i16> *%p0, <8 x i16> *%p1) nounwind {
+; X32-LABEL: test_broadcast_8i16_16i16_reuse:
+; X32: ## BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: vmovdqa (%ecx), %xmm1
+; X32-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm0
+; X32-NEXT: vpaddw LCPI10_0, %ymm0, %ymm0
+; X32-NEXT: vmovdqa %xmm1, (%eax)
+; X32-NEXT: retl
+;
+; X64-LABEL: test_broadcast_8i16_16i16_reuse:
+; X64: ## BB#0:
+; X64-NEXT: vmovdqa (%rdi), %xmm1
+; X64-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm0
+; X64-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0
+; X64-NEXT: vmovdqa %xmm1, (%rsi)
+; X64-NEXT: retq
+ %1 = load <8 x i16>, <8 x i16> *%p0
+ %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %3 = add <16 x i16> %2, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16>
+ store <8 x i16> %1, <8 x i16>* %p1
+ ret <16 x i16> %3
+}
+
+define <32 x i8> @test_broadcast_16i8_32i8_reuse(<16 x i8> *%p0, <16 x i8> *%p1) nounwind {
+; X32-LABEL: test_broadcast_16i8_32i8_reuse:
+; X32: ## BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: vmovdqa (%ecx), %xmm1
+; X32-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm0
+; X32-NEXT: vpaddb LCPI11_0, %ymm0, %ymm0
+; X32-NEXT: vmovdqa %xmm1, (%eax)
+; X32-NEXT: retl
+;
+; X64-LABEL: test_broadcast_16i8_32i8_reuse:
+; X64: ## BB#0:
+; X64-NEXT: vmovdqa (%rdi), %xmm1
+; X64-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm0
+; X64-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0
+; X64-NEXT: vmovdqa %xmm1, (%rsi)
+; X64-NEXT: retq
+ %1 = load <16 x i8>, <16 x i8> *%p0
+ %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %3 = add <32 x i8> %2, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32>
+ store <16 x i8> %1, <16 x i8>* %p1
+ ret <32 x i8> %3
+}
More information about the llvm-commits
mailing list