[llvm] r259991 - [X86][SSE] Don't replace an existing 32-bit load with its duplicate
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 6 07:37:10 PST 2016
Author: rksimon
Date: Sat Feb 6 09:37:09 2016
New Revision: 259991
URL: http://llvm.org/viewvc/llvm-project?rev=259991&view=rev
Log:
[X86][SSE] Don't replace an existing 32-bit load with its duplicate
If we are already loading a single 32-bit float/integer then just reuse it.
Fix for regression in D16729
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=259991&r1=259990&r2=259991&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Feb 6 09:37:09 2016
@@ -5677,7 +5677,8 @@ static SDValue EltsFromConsecutiveLoads(
MVT VecSVT = VT.isFloatingPoint() ? MVT::f32 : MVT::i32;
MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / 32);
if (TLI.isTypeLegal(VecVT)) {
- SDValue V = CreateLoad(VecSVT, LDBase);
+ SDValue V = LastLoadedElt != 0 ? CreateLoad(VecSVT, LDBase)
+ : DAG.getBitcast(VecSVT, EltBase);
V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, V);
V = DAG.getNode(X86ISD::VZEXT_MOVL, DL, VecVT, V);
return DAG.getBitcast(VT, V);
Modified: llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll?rev=259991&r1=259990&r2=259991&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll Sat Feb 6 09:37:09 2016
@@ -602,6 +602,48 @@ define <16 x i8> @merge_16i8_i8_0123uu67
%res7 = insertelement <16 x i8> %res6, i8 %val7, i32 7
%resD = insertelement <16 x i8> %res7, i8 0, i32 13
%resE = insertelement <16 x i8> %resD, i8 0, i32 14
- %resF = insertelement <16 x i8> %resE, i8 0, i32 15
- ret <16 x i8> %resF
-}
+ %resF = insertelement <16 x i8> %resE, i8 0, i32 15
+ ret <16 x i8> %resF
+}
+
+define void @merge_4i32_i32_combine(<4 x i32>* %dst, i32* %src) {
+; SSE-LABEL: merge_4i32_i32_combine:
+; SSE: # BB#0:
+; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT: movaps %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: merge_4i32_i32_combine:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: vmovaps %xmm0, (%rdi)
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: merge_4i32_i32_combine:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX2-NEXT: vmovaps %xmm0, (%rdi)
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: merge_4i32_i32_combine:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512F-NEXT: vmovdqa %xmm0, (%rdi)
+; AVX512F-NEXT: retq
+;
+; X32-SSE-LABEL: merge_4i32_i32_combine:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE-NEXT: movaps %xmm0, (%eax)
+; X32-SSE-NEXT: retl
+ %1 = getelementptr i32, i32* %src, i32 0
+ %2 = load i32, i32* %1
+ %3 = insertelement <4 x i32> undef, i32 %2, i32 0
+ %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer
+ %5 = lshr <4 x i32> %4, <i32 0, i32 undef, i32 undef, i32 undef>
+ %6 = and <4 x i32> %5, <i32 -1, i32 0, i32 0, i32 0>
+ store <4 x i32> %6, <4 x i32>* %dst
+ ret void
+}
More information about the llvm-commits
mailing list