[llvm] r283704 - [AVX-512] Port 128 and 256-bit memory->register sign/zero extend patterns from SSE file. Also add a minimal set for 512-bit.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 9 16:08:39 PDT 2016
Author: ctopper
Date: Sun Oct 9 18:08:39 2016
New Revision: 283704
URL: http://llvm.org/viewvc/llvm-project?rev=283704&view=rev
Log:
[AVX-512] Port 128 and 256-bit memory->register sign/zero extend patterns from SSE file. Also add a minimal set for 512-bit.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/pmul.ll
llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=283704&r1=283703&r2=283704&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sun Oct 9 18:08:39 2016
@@ -7227,6 +7227,148 @@ let Predicates = [HasAVX512] in {
defm : avx512_ext_lowering<"DQZ", v8i64_info, v8i32x_info, extloadvi32>;
}
+multiclass AVX512_pmovx_patterns<string OpcPrefix, string ExtTy,
+ SDNode ExtOp, PatFrag ExtLoad16> {
+ // 128-bit patterns
+ let Predicates = [HasVLX, HasBWI] in {
+ def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
+ def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
+ def : Pat<(v8i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
+ def : Pat<(v8i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
+ def : Pat<(v8i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
+ }
+ let Predicates = [HasVLX] in {
+ def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
+ (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
+ def : Pat<(v4i32 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
+ (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
+ def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
+ def : Pat<(v4i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
+
+ def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),
+ (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
+ def : Pat<(v2i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
+ (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
+ def : Pat<(v2i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
+ def : Pat<(v2i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
+
+ def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
+ def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
+ def : Pat<(v4i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
+ def : Pat<(v4i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
+ def : Pat<(v4i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
+
+ def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
+ (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
+ def : Pat<(v2i64 (ExtOp (v8i16 (vzmovl_v4i32 addr:$src)))),
+ (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
+ def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
+ def : Pat<(v2i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
+
+ def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
+ def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
+ def : Pat<(v2i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
+ def : Pat<(v2i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
+ def : Pat<(v2i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
+ }
+ // 256-bit patterns
+ let Predicates = [HasVLX, HasBWI] in {
+ def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
+ def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
+ def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
+ }
+ let Predicates = [HasVLX] in {
+ def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
+ def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
+ def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
+ def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
+
+ def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
+ (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
+ (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
+
+ def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
+ def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
+ def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
+
+ def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
+
+ def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
+ }
+ // 512-bit patterns
+ let Predicates = [HasBWI] in {
+ def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
+ }
+ let Predicates = [HasAVX512] in {
+ def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
+
+ def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
+
+ def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
+
+ def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
+
+ def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
+ }
+}
+
+defm : AVX512_pmovx_patterns<"VPMOVSX", "s", X86vsext, extloadi32i16>;
+defm : AVX512_pmovx_patterns<"VPMOVZX", "z", X86vzext, loadi16_anyext>;
+
//===----------------------------------------------------------------------===//
// GATHER - SCATTER Operations
Modified: llvm/trunk/test/CodeGen/X86/pmul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pmul.ll?rev=283704&r1=283703&r2=283704&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pmul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pmul.ll Sun Oct 9 18:08:39 2016
@@ -476,9 +476,8 @@ define <32 x i8> @mul_v32i8c(<32 x i8> %
;
; AVX512BW-LABEL: mul_v32i8c:
; AVX512BW: # BB#0: # %entry
-; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
-; AVX512BW-NEXT: vpmovsxbw %ymm1, %zmm1
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
+; AVX512BW-NEXT: vpmovsxbw {{.*}}(%rip), %zmm1
; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: retq
@@ -932,16 +931,15 @@ define <64 x i8> @mul_v64i8c(<64 x i8> %
;
; AVX512BW-LABEL: mul_v64i8c:
; AVX512BW: # BB#0: # %entry
-; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
-; AVX512BW-NEXT: vpmovsxbw %ymm1, %zmm1
-; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm2
-; AVX512BW-NEXT: vpmullw %zmm1, %zmm2, %zmm2
-; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2
+; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm1
+; AVX512BW-NEXT: vpmovsxbw {{.*}}(%rip), %zmm2
+; AVX512BW-NEXT: vpmullw %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
-; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmullw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
+; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
entry:
%A = mul <64 x i8> %i, < i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117 >
Modified: llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll?rev=283704&r1=283703&r2=283704&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll Sun Oct 9 18:08:39 2016
@@ -1057,8 +1057,7 @@ define <4 x float> @load_cvt_8i16_to_4f3
;
; AVX512VL-LABEL: load_cvt_8i16_to_4f32:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa64 (%rdi), %xmm0
-; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; AVX512VL-NEXT: movq %rax, %rcx
@@ -2449,8 +2448,7 @@ define <4 x double> @load_cvt_8i16_to_4f
;
; AVX512VL-LABEL: load_cvt_8i16_to_4f64:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa64 (%rdi), %xmm0
-; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; AVX512VL-NEXT: movq %rax, %rcx
More information about the llvm-commits
mailing list