[llvm] r233724 - [X86, AVX] fix zero-extending integer operand load patterns to use integer instructions
Sanjay Patel
spatel at rotateright.com
Tue Mar 31 11:43:43 PDT 2015
Author: spatel
Date: Tue Mar 31 13:43:43 2015
New Revision: 233724
URL: http://llvm.org/viewvc/llvm-project?rev=233724&view=rev
Log:
[X86, AVX] fix zero-extending integer operand load patterns to use integer instructions
This is a follow-on to r233704 and another partial fix for PR22685:
https://llvm.org/bugs/show_bug.cgi?id=22685
Modified:
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/2012-1-10-buildvector.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=233724&r1=233723&r2=233724&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Mar 31 13:43:43 2015
@@ -643,9 +643,6 @@ let Predicates = [UseAVX] in {
// Represent the same patterns above but in the form they appear for
// 256-bit types
- def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
- (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
(v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
@@ -653,9 +650,6 @@ let Predicates = [UseAVX] in {
(v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
}
- def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
- (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
- (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>;
// Extract and store.
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
@@ -793,7 +787,7 @@ let Predicates = [UseSSE2] in {
(MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
- // is during lowering, where it's not possible to recognize the fold cause
+ // is during lowering, where it's not possible to recognize the fold because
// it has two uses through a bitcast. One use disappears at isel time and the
// fold opportunity reappears.
def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
@@ -4907,7 +4901,8 @@ let Predicates = [UseAVX] in {
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
(VMOVDI2PDIrr GR32:$src)>;
- // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
+ // AVX 128-bit movd/movq instructions write zeros in the high 128-bit part.
+ // These instructions also write zeros in the high part of a 256-bit register.
let AddedComplexity = 20 in {
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
(VMOVDI2PDIrm addr:$src)>;
@@ -4915,6 +4910,9 @@ let Predicates = [UseAVX] in {
(VMOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
(VMOVDI2PDIrm addr:$src)>;
+ def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrm addr:$src), sub_xmm)>;
}
// Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
@@ -5033,6 +5031,9 @@ let Predicates = [UseAVX], AddedComplexi
(VMOVZQI2PQIrm addr:$src)>;
def : Pat<(v2i64 (X86vzload addr:$src)),
(VMOVZQI2PQIrm addr:$src)>;
+ def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
+ (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrm addr:$src), sub_xmm)>;
}
let Predicates = [UseSSE2], AddedComplexity = 20 in {
Modified: llvm/trunk/test/CodeGen/X86/2012-1-10-buildvector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2012-1-10-buildvector.ll?rev=233724&r1=233723&r2=233724&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2012-1-10-buildvector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2012-1-10-buildvector.ll Tue Mar 31 13:43:43 2015
@@ -17,7 +17,7 @@ entry:
; CHECK-LABEL: bad_insert:
define void @bad_insert(i32 %t) {
entry:
-; CHECK: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovaps %ymm0
; CHECK: ret
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll?rev=233724&r1=233723&r2=233724&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll Tue Mar 31 13:43:43 2015
@@ -827,12 +827,12 @@ define <4 x i64> @insert_reg_and_zero_v4
define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
; AVX1-LABEL: insert_mem_and_zero_v4i64:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX1-NEXT: retq
;
; AVX2-LABEL: insert_mem_and_zero_v4i64:
; AVX2: # BB#0:
-; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX2-NEXT: retq
%a = load i64, i64* %ptr
%v = insertelement <4 x i64> undef, i64 %a, i64 0
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll?rev=233724&r1=233723&r2=233724&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll Tue Mar 31 13:43:43 2015
@@ -2090,3 +2090,20 @@ entry:
%res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x float> %res
}
+
+define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) {
+; AVX1-LABEL: insert_mem_and_zero_v8i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_mem_and_zero_v8i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX2-NEXT: retq
+ %a = load i32, i32* %ptr
+ %v = insertelement <8 x i32> undef, i32 %a, i32 0
+ %shuffle = shufflevector <8 x i32> %v, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <8 x i32> %shuffle
+}
+
More information about the llvm-commits
mailing list