[llvm-commits] [llvm] r145448 - in /llvm/trunk: lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/vec_shuffle-38.ll
Evan Cheng
evan.cheng at apple.com
Tue Nov 29 14:48:34 PST 2011
Author: evancheng
Date: Tue Nov 29 16:48:34 2011
New Revision: 145448
URL: http://llvm.org/viewvc/llvm-project?rev=145448&view=rev
Log:
Add another missing pattern. llvm-gcc likes f64 but clang likes i64 so it was generating poor code for some SSE builtins.
Modified:
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/vec_shuffle-38.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=145448&r1=145447&r2=145448&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Nov 29 16:48:34 2011
@@ -1151,6 +1151,9 @@
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
(VMOVHPSrm VR128:$src1, addr:$src2)>;
def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
+ (VMOVHPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlhps VR128:$src1,
(bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
(VMOVHPSrm VR128:$src1, addr:$src2)>;
@@ -1184,6 +1187,9 @@
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
(MOVHPSrm VR128:$src1, addr:$src2)>;
def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlhps VR128:$src1,
(bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
(MOVHPSrm VR128:$src1, addr:$src2)>;
Modified: llvm/trunk/test/CodeGen/X86/vec_shuffle-38.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-38.ll?rev=145448&r1=145447&r2=145448&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_shuffle-38.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_shuffle-38.ll Tue Nov 29 16:48:34 2011
@@ -46,7 +46,7 @@
; rdar://10119696
; CHECK: f
-define <4 x float> @f(<4 x float> %x, double* nocapture %y) nounwind uwtable readonly ssp {
+define <4 x float> @f(<4 x float> %x, double* nocapture %y) nounwind readonly ssp {
entry:
; CHECK: movlps (%{{rdi|rdx}}), %xmm0
%u110.i = load double* %y, align 1
@@ -56,3 +56,22 @@
ret <4 x float> %shuffle.i
}
+define <4 x float> @loadhpi2(%struct.Float2* nocapture %vHiCoefPtr_0, %struct.Float2* nocapture %vLoCoefPtr_0, i32 %s) nounwind readonly ssp {
+entry:
+; CHECK: loadhpi2
+; CHECK: movhps (
+; CHECK-NOT: movlhps
+ %0 = bitcast %struct.Float2* %vHiCoefPtr_0 to <1 x i64>*
+ %idx.ext = sext i32 %s to i64
+ %add.ptr = getelementptr inbounds <1 x i64>* %0, i64 %idx.ext
+ %add.ptr.val = load <1 x i64>* %add.ptr, align 1
+ %1 = bitcast <1 x i64> %add.ptr.val to <2 x float>
+ %shuffle.i = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ %2 = bitcast %struct.Float2* %vLoCoefPtr_0 to <1 x i64>*
+ %add.ptr2 = getelementptr inbounds <1 x i64>* %2, i64 %idx.ext
+ %add.ptr2.val = load <1 x i64>* %add.ptr2, align 1
+ %3 = bitcast <1 x i64> %add.ptr2.val to <2 x float>
+ %shuffle.i4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ %shuffle1.i5 = shufflevector <4 x float> %shuffle.i, <4 x float> %shuffle.i4, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+ ret <4 x float> %shuffle1.i5
+}
More information about the llvm-commits
mailing list