[llvm] r264922 - [X86][AVX] Ensure EltsFromConsecutiveLoads tests the entire vector for consecutive loads/zeros
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 30 13:52:24 PDT 2016
Author: rksimon
Date: Wed Mar 30 15:52:24 2016
New Revision: 264922
URL: http://llvm.org/viewvc/llvm-project?rev=264922&view=rev
Log:
[X86][AVX] Ensure EltsFromConsecutiveLoads tests the entire vector for consecutive loads/zeros
Fix for issue introduced D17297, where we were breaking early from the loop detecting consecutive loads which could leave us thinking a consecutive load with zeros was possible.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=264922&r1=264921&r2=264922&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Mar 30 15:52:24 2016
@@ -5772,7 +5772,6 @@ static SDValue EltsFromConsecutiveLoads(
}
} else if (ZeroMask[i]) {
IsConsecutiveLoad = false;
- break;
}
}
Modified: llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll?rev=264922&r1=264921&r2=264922&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-128.ll Wed Mar 30 15:52:24 2016
@@ -742,3 +742,42 @@ define <4 x float> @merge_4f32_f32_2345_
%res3 = insertelement <4 x float> %res2, float %val3, i32 3
ret <4 x float> %res3
}
+
+;
+; Non-consecutive test.
+;
+
+define <4 x float> @merge_4f32_f32_X0YY(float* %ptr0, float* %ptr1) nounwind uwtable noinline ssp {
+; SSE-LABEL: merge_4f32_f32_X0YY:
+; SSE: # BB#0:
+; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: merge_4f32_f32_X0YY:
+; AVX: # BB#0:
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: retq
+;
+; X32-SSE-LABEL: merge_4f32_f32_X0YY:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE-NEXT: retl
+ %val0 = load float, float* %ptr0, align 4
+ %val1 = load float, float* %ptr1, align 4
+ %res0 = insertelement <4 x float> undef, float %val0, i32 0
+ %res1 = insertelement <4 x float> %res0, float 0.000000e+00, i32 1
+ %res2 = insertelement <4 x float> %res1, float %val1, i32 2
+ %res3 = insertelement <4 x float> %res2, float %val1, i32 3
+ ret <4 x float> %res3
+}
More information about the llvm-commits
mailing list