[llvm] r283720 - [AVX-512] Add missing pattern sext or zext from bytes to quad words with a 128-bit load as input.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 9 23:25:48 PDT 2016
Author: ctopper
Date: Mon Oct 10 01:25:48 2016
New Revision: 283720
URL: http://llvm.org/viewvc/llvm-project?rev=283720&view=rev
Log:
[AVX-512] Add missing pattern sext or zext from bytes to quad words with a 128-bit load as input.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/avx512-pmovxrm.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=283720&r1=283719&r2=283720&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Mon Oct 10 01:25:48 2016
@@ -7354,6 +7354,8 @@ multiclass AVX512_pmovx_patterns<string
def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
+ def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
Modified: llvm/trunk/test/CodeGen/X86/avx512-pmovxrm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-pmovxrm.ll?rev=283720&r1=283719&r2=283720&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-pmovxrm.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-pmovxrm.ll Mon Oct 10 01:25:48 2016
@@ -38,16 +38,14 @@ define <8 x i64> @test_llvm_x86_avx512_p
; X32-LABEL: test_llvm_x86_avx512_pmovsxbq:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vmovdqu (%eax), %xmm0
-; X32-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
+; X32-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
; X32-NEXT: vpsllq $56, %zmm0, %zmm0
; X32-NEXT: vpsraq $56, %zmm0, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: test_llvm_x86_avx512_pmovsxbq:
; X64: ## BB#0:
-; X64-NEXT: vmovdqu (%rdi), %xmm0
-; X64-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
+; X64-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
; X64-NEXT: vpsllq $56, %zmm0, %zmm0
; X64-NEXT: vpsraq $56, %zmm0, %zmm0
; X64-NEXT: retq
@@ -141,8 +139,7 @@ define <8 x i64> @test_llvm_x86_avx512_p
; X32-LABEL: test_llvm_x86_avx512_pmovzxbq:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vmovdqu (%eax), %xmm0
-; X32-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
+; X32-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
; X32-NEXT: vpand %ymm2, %ymm1, %ymm1
@@ -152,8 +149,7 @@ define <8 x i64> @test_llvm_x86_avx512_p
;
; X64-LABEL: test_llvm_x86_avx512_pmovzxbq:
; X64: ## BB#0:
-; X64-NEXT: vmovdqu (%rdi), %xmm0
-; X64-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
+; X64-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
; X64-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; X64-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
; X64-NEXT: vpand %ymm2, %ymm1, %ymm1
More information about the llvm-commits
mailing list