[llvm] r272626 - [AVX512] Use MOVZX32 instead of MOVZ16 for loading single v8/v4/v2/v1 masks when KMOVB is not available. This has better behavior with respect to partial register stalls since it won't need to preserve the upper 16-bits of the GPR.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 13 20:13:00 PDT 2016
Author: ctopper
Date: Mon Jun 13 22:13:00 2016
New Revision: 272626
URL: http://llvm.org/viewvc/llvm-project?rev=272626&view=rev
Log:
[AVX512] Use MOVZX32 instead of MOVZ16 for loading single v8/v4/v2/v1 masks when KMOVB is not available. This has better behavior with respect to partial register stalls since it won't need to preserve the upper 16-bits of the GPR.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
llvm/trunk/test/CodeGen/X86/avx512-select.ll
llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=272626&r1=272625&r2=272626&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Mon Jun 13 22:13:00 2016
@@ -2050,18 +2050,18 @@ let Predicates = [HasAVX512, NoDQI] in {
sub_8bit))>;
def : Pat<(v8i1 (load addr:$src)),
- (COPY_TO_REGCLASS (MOVZX16rm8 addr:$src), VK8)>;
+ (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
def : Pat<(v2i1 (load addr:$src)),
- (COPY_TO_REGCLASS (MOVZX16rm8 addr:$src), VK2)>;
+ (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK2)>;
def : Pat<(v4i1 (load addr:$src)),
- (COPY_TO_REGCLASS (MOVZX16rm8 addr:$src), VK4)>;
+ (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK4)>;
}
let Predicates = [HasAVX512] in {
def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
(KMOVWmk addr:$dst, VK16:$src)>;
def : Pat<(i1 (load addr:$src)),
- (COPY_TO_REGCLASS (AND16ri (MOVZX16rm8 addr:$src), (i16 1)), VK1)>;
+ (COPY_TO_REGCLASS (AND32ri (MOVZX32rm8 addr:$src), (i32 1)), VK1)>;
def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))),
(KMOVWkm addr:$src)>;
}
Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=272626&r1=272625&r2=272626&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Mon Jun 13 22:13:00 2016
@@ -200,7 +200,7 @@ define i16 @test15(i1 *%addr) {
}
;CHECK-LABEL: test16
-;CHECK: movzbw (%rdi), %ax
+;CHECK: movzbl (%rdi), %eax
;CHECK: kmovw
;CHECK: kshiftlw $10
;CHECK: korw
@@ -214,8 +214,8 @@ define i16 @test16(i1 *%addr, i16 %a) {
}
;CHECK-LABEL: test17
-;KNL: movzbw (%rdi), %ax
-;KNL: andw $1, %ax
+;KNL: movzbl (%rdi), %eax
+;KNL: andl $1, %eax
;KNL: kshiftlw $4
;KNL: korw
;SKX: kshiftlb $4
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=272626&r1=272625&r2=272626&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Mon Jun 13 22:13:00 2016
@@ -88,7 +88,7 @@ define void @mask16_mem(i16* %ptr) {
define void @mask8_mem(i8* %ptr) {
; KNL-LABEL: mask8_mem:
; KNL: ## BB#0:
-; KNL-NEXT: movzbw (%rdi), %ax
+; KNL-NEXT: movzbl (%rdi), %eax
; KNL-NEXT: kmovw %eax, %k0
; KNL-NEXT: knotw %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
@@ -1341,7 +1341,7 @@ End:
define <8 x i64> @load_8i1(<8 x i1>* %a) {
; KNL-LABEL: load_8i1:
; KNL: ## BB#0:
-; KNL-NEXT: movzbw (%rdi), %ax
+; KNL-NEXT: movzbl (%rdi), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
; KNL-NEXT: retq
@@ -1376,7 +1376,7 @@ define <16 x i32> @load_16i1(<16 x i1>*
define <2 x i16> @load_2i1(<2 x i1>* %a) {
; KNL-LABEL: load_2i1:
; KNL: ## BB#0:
-; KNL-NEXT: movzbw (%rdi), %ax
+; KNL-NEXT: movzbl (%rdi), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
; KNL-NEXT: retq
@@ -1394,7 +1394,7 @@ define <2 x i16> @load_2i1(<2 x i1>* %a)
define <4 x i16> @load_4i1(<4 x i1>* %a) {
; KNL-LABEL: load_4i1:
; KNL: ## BB#0:
-; KNL-NEXT: movzbw (%rdi), %ax
+; KNL-NEXT: movzbl (%rdi), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqd %zmm0, %ymm0
Modified: llvm/trunk/test/CodeGen/X86/avx512-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-select.ll?rev=272626&r1=272625&r2=272626&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-select.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-select.ll Mon Jun 13 22:13:00 2016
@@ -84,9 +84,9 @@ define i8 @select05(i8 %a.0, i8 %m) {
define i8 @select05_mem(<8 x i1>* %a.0, <8 x i1>* %m) {
; CHECK-LABEL: select05_mem:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbw (%rsi), %ax
+; CHECK-NEXT: movzbl (%rsi), %eax
; CHECK-NEXT: kmovw %eax, %k0
-; CHECK-NEXT: movzbw (%rdi), %ax
+; CHECK-NEXT: movzbl (%rdi), %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
@@ -114,9 +114,9 @@ define i8 @select06(i8 %a.0, i8 %m) {
define i8 @select06_mem(<8 x i1>* %a.0, <8 x i1>* %m) {
; CHECK-LABEL: select06_mem:
; CHECK: ## BB#0:
-; CHECK-NEXT: movzbw (%rsi), %ax
+; CHECK-NEXT: movzbl (%rsi), %eax
; CHECK-NEXT: kmovw %eax, %k0
-; CHECK-NEXT: movzbw (%rdi), %ax
+; CHECK-NEXT: movzbl (%rdi), %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: kandw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
Modified: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll?rev=272626&r1=272625&r2=272626&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll Mon Jun 13 22:13:00 2016
@@ -291,7 +291,7 @@ define <8 x i32> @test7(i32* %base, <8 x
; KNL_32-LABEL: test7:
; KNL_32: # BB#0:
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: movzbw {{[0-9]+}}(%esp), %cx
+; KNL_32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; KNL_32-NEXT: kmovw %ecx, %k1
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_32-NEXT: kmovw %k1, %k2
More information about the llvm-commits
mailing list