[llvm] 6cc820a - [X86][FP16] Relax the pattern condition for VZEXT_MOVL to match more cases
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 20 18:08:35 PST 2021
Author: Phoebe Wang
Date: 2021-11-21T09:14:11+08:00
New Revision: 6cc820a3e284cfd5f4d7f1c329e3c8eafb5fb5c3
URL: https://github.com/llvm/llvm-project/commit/6cc820a3e284cfd5f4d7f1c329e3c8eafb5fb5c3
DIFF: https://github.com/llvm/llvm-project/commit/6cc820a3e284cfd5f4d7f1c329e3c8eafb5fb5c3.diff
LOG: [X86][FP16] Relax the pattern condition for VZEXT_MOVL to match more cases
Fixes pr52560
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D114313
Added:
Modified:
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/test/CodeGen/X86/avx512fp16-mov.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 8aee96e1c504..1db83033ba35 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -12937,8 +12937,8 @@ def : Pat<(v16i32 (X86vzmovl
(iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
-def : Pat<(v8i16 (X86vzmovl (v8i16 (scalar_to_vector (i16 (trunc GR32:$src)))))),
- (VMOVW2SHrr GR32:$src)>;
+def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))),
+ (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
// AVX 128-bit movw instruction write zeros in the high 128-bit part.
def : Pat<(v8i16 (X86vzload16 addr:$src)),
diff --git a/llvm/test/CodeGen/X86/avx512fp16-mov.ll b/llvm/test/CodeGen/X86/avx512fp16-mov.ll
index 7f05b0220981..150b763a17cd 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-mov.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-mov.ll
@@ -1955,3 +1955,73 @@ define <8 x half> @test21(half %a, half %b, half %c) nounwind {
%3 = insertelement <8 x half> %2, half %c, i32 2
ret <8 x half> %3
}
+
+define <16 x i16> @test22(i16* %mem) nounwind {
+; X64-LABEL: test22:
+; X64: # %bb.0:
+; X64-NEXT: movzwl 0, %eax
+; X64-NEXT: andw (%rdi), %ax
+; X64-NEXT: vmovw %eax, %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: test22:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzwl 0, %ecx
+; X86-NEXT: andw (%eax), %cx
+; X86-NEXT: vmovw %ecx, %xmm0
+; X86-NEXT: retl
+ %1 = load i16, i16* null, align 2
+ %2 = load i16, i16* %mem, align 2
+ %3 = and i16 %1, %2
+ %4 = insertelement <16 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %3, i32 0
+ ret <16 x i16> %4
+}
+
+define void @pr52560(i8 %0, <2 x i16> %1, i8* %c) nounwind {
+; X64-LABEL: pr52560:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movsbl %dil, %eax
+; X64-NEXT: vmovw %eax, %xmm1
+; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; X64-NEXT: vpcmpgtw %xmm2, %xmm1, %k1
+; X64-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z}
+; X64-NEXT: vmovw %xmm0, %eax
+; X64-NEXT: testw %ax, %ax
+; X64-NEXT: je .LBB121_2
+; X64-NEXT: # %bb.1: # %for.body.preheader
+; X64-NEXT: movb $0, (%rsi)
+; X64-NEXT: .LBB121_2: # %for.end
+; X64-NEXT: retq
+;
+; X86-LABEL: pr52560:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: vmovw %eax, %xmm1
+; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; X86-NEXT: vpcmpgtw %xmm2, %xmm1, %k1
+; X86-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z}
+; X86-NEXT: vmovw %xmm0, %eax
+; X86-NEXT: testw %ax, %ax
+; X86-NEXT: je .LBB121_2
+; X86-NEXT: # %bb.1: # %for.body.preheader
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movb $0, (%eax)
+; X86-NEXT: .LBB121_2: # %for.end
+; X86-NEXT: retl
+entry:
+ %conv = sext i8 %0 to i16
+ %2 = insertelement <2 x i16> <i16 poison, i16 0>, i16 %conv, i32 0
+ %3 = icmp sgt <2 x i16> %2, zeroinitializer
+ %4 = select <2 x i1> %3, <2 x i16> %1, <2 x i16> <i16 0, i16 poison>
+ %5 = extractelement <2 x i16> %4, i32 0
+ %tobool.not14 = icmp eq i16 %5, 0
+ br i1 %tobool.not14, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ store i8 0, i8* %c, align 1
+ br label %for.end
+
+for.end: ; preds = %for.body.preheader, %entry
+ ret void
+}
More information about the llvm-commits
mailing list