[PATCH] D114313: [X86][FP16] Relax the pattern condition for VZEXT_MOVL to match more cases

Phoebe Wang via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 19 22:22:27 PST 2021


pengfei created this revision.
pengfei added reviewers: LuoYuanke, RKSimon, craig.topper, spatel.
Herald added a subscriber: hiraditya.
pengfei requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Fixes pr52560


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D114313

Files:
  llvm/lib/Target/X86/X86InstrAVX512.td
  llvm/test/CodeGen/X86/avx512fp16-mov.ll


Index: llvm/test/CodeGen/X86/avx512fp16-mov.ll
===================================================================
--- llvm/test/CodeGen/X86/avx512fp16-mov.ll
+++ llvm/test/CodeGen/X86/avx512fp16-mov.ll
@@ -1956,6 +1956,76 @@
   ret <8 x half> %3
 }
 
+define <16 x i16> @test22(i16* %mem) nounwind {
+; X64-LABEL: test22:
+; X64:       # %bb.0:
+; X64-NEXT:    movzwl 0, %eax
+; X64-NEXT:    andw (%rdi), %ax
+; X64-NEXT:    vmovw %eax, %xmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: test22:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movzwl 0, %ecx
+; X86-NEXT:    andw (%eax), %cx
+; X86-NEXT:    vmovw %ecx, %xmm0
+; X86-NEXT:    retl
+  %1 = load i16, i16* null, align 2
+  %2 = load i16, i16* %mem, align 2
+  %3 = and i16 %1, %2
+  %4 = insertelement <16 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %3, i32 0
+  ret <16 x i16> %4
+}
+
+define void @pr52560(i8 %0, <2 x i16> %1, i8* %c) nounwind {
+; X64-LABEL: pr52560:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movsbl %dil, %eax
+; X64-NEXT:    vmovw %eax, %xmm1
+; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; X64-NEXT:    vpcmpgtw %xmm2, %xmm1, %k1
+; X64-NEXT:    vmovdqu16 %xmm0, %xmm0 {%k1} {z}
+; X64-NEXT:    vmovw %xmm0, %eax
+; X64-NEXT:    testw %ax, %ax
+; X64-NEXT:    je .LBB121_2
+; X64-NEXT:  # %bb.1: # %for.body.preheader
+; X64-NEXT:    movb $0, (%rsi)
+; X64-NEXT:  .LBB121_2: # %for.end
+; X64-NEXT:    retq
+;
+; X86-LABEL: pr52560:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vmovw %eax, %xmm1
+; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; X86-NEXT:    vpcmpgtw %xmm2, %xmm1, %k1
+; X86-NEXT:    vmovdqu16 %xmm0, %xmm0 {%k1} {z}
+; X86-NEXT:    vmovw %xmm0, %eax
+; X86-NEXT:    testw %ax, %ax
+; X86-NEXT:    je .LBB121_2
+; X86-NEXT:  # %bb.1: # %for.body.preheader
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb $0, (%eax)
+; X86-NEXT:  .LBB121_2: # %for.end
+; X86-NEXT:    retl
+entry:
+  %conv = sext i8 %0 to i16
+  %2 = insertelement <2 x i16> <i16 poison, i16 0>, i16 %conv, i32 0
+  %3 = icmp sgt <2 x i16> %2, zeroinitializer
+  %4 = select <2 x i1> %3, <2 x i16> %1, <2 x i16> <i16 0, i16 poison>
+  %5 = extractelement <2 x i16> %4, i32 0
+  %tobool.not14 = icmp eq i16 %5, 0
+  br i1 %tobool.not14, label %for.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  store i8 0, i8* %c, align 1
+  br label %for.end
+
+for.end:                                          ; preds = %for.body.preheader, %entry
+  ret void
+}
+
 define <16 x i32> @pr52561(<16 x i32> %a, <16 x i32> %b) "min-legal-vector-width"="256" "prefer-vector-width"="256" nounwind {
 ; X64-LABEL: pr52561:
 ; X64:       # %bb.0:
Index: llvm/lib/Target/X86/X86InstrAVX512.td
===================================================================
--- llvm/lib/Target/X86/X86InstrAVX512.td
+++ llvm/lib/Target/X86/X86InstrAVX512.td
@@ -12937,8 +12937,8 @@
                                      (iPTR 0)))),
           (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
 
-def : Pat<(v8i16 (X86vzmovl (v8i16 (scalar_to_vector (i16 (trunc GR32:$src)))))),
-          (VMOVW2SHrr GR32:$src)>;
+def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))),
+          (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
 
 // AVX 128-bit movw instruction write zeros in the high 128-bit part.
 def : Pat<(v8i16 (X86vzload16 addr:$src)),


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D114313.388687.patch
Type: text/x-patch
Size: 3557 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211120/66805b22/attachment.bin>


More information about the llvm-commits mailing list