[llvm] [X86][BF16] Customize VSELECT for BF16 under AVX-NECONVERT (PR #113322)

Phoebe Wang via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 22 07:27:47 PDT 2024


https://github.com/phoebewang created https://github.com/llvm/llvm-project/pull/113322

Fixes: https://godbolt.org/z/9abGnE8zs

>From fd56af97255288c19d55ded3700d2dc13dd3f7bb Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Tue, 22 Oct 2024 22:24:42 +0800
Subject: [PATCH] [X86][BF16] Customize VSELECT for BF16 under AVX-NECONVERT

Fixes: https://godbolt.org/z/9abGnE8zs
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       |   1 +
 .../CodeGen/X86/avxneconvert-intrinsics.ll    | 114 ++++++++++++++++++
 2 files changed, 115 insertions(+)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index bcb84add65d83e..c453d7ae1d3889 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2393,6 +2393,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::bf16, Custom);
     for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
       setF16Action(VT, Expand);
+      setOperationAction(ISD::VSELECT, VT, Custom);
       setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
diff --git a/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll b/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll
index b311c8831457b8..ef87ac31fcf48c 100644
--- a/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll
@@ -215,3 +215,117 @@ define <8 x bfloat> @test_int_x86_vcvtneps2bf16256(<8 x float> %A) {
 }
 declare <8 x bfloat> @llvm.x86.vcvtneps2bf16256(<8 x float> %A)
 
+define <8 x bfloat> @select(i8 %x, <8 x bfloat> %y) nounwind {
+; X64-LABEL: select:
+; X64:       # %bb.0:
+; X64-NEXT:    vmovaps %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x28,0xc8]
+; X64-NEXT:    movb %dil, %al # encoding: [0x40,0x88,0xf8]
+; X64-NEXT:    movb %al, -{{[0-9]+}}(%rsp) # encoding: [0x88,0x44,0x24,0xff]
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0xff]
+; X64-NEXT:    movl %eax, %ecx # encoding: [0x89,0xc1]
+; X64-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT:    vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT:    shrb %cl # encoding: [0xd0,0xe9]
+; X64-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
+; X64-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT:    shrb $2, %cl # encoding: [0xc0,0xe9,0x02]
+; X64-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
+; X64-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT:    shrb $3, %cl # encoding: [0xc0,0xe9,0x03]
+; X64-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT:    vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03]
+; X64-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT:    shrb $4, %cl # encoding: [0xc0,0xe9,0x04]
+; X64-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
+; X64-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT:    shrb $5, %cl # encoding: [0xc0,0xe9,0x05]
+; X64-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
+; X64-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT:    shrb $6, %cl # encoding: [0xc0,0xe9,0x06]
+; X64-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
+; X64-NEXT:    shrb $7, %al # encoding: [0xc0,0xe8,0x07]
+; X64-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
+; X64-NEXT:    negl %eax # encoding: [0xf7,0xd8]
+; X64-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
+; X64-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdf,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+;
+; X86-LABEL: select:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %eax # encoding: [0x50]
+; X86-NEXT:    vmovaps %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x28,0xc8]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x08]
+; X86-NEXT:    movb %al, {{[0-9]+}}(%esp) # encoding: [0x88,0x44,0x24,0x03]
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x03]
+; X86-NEXT:    movl %eax, %ecx # encoding: [0x89,0xc1]
+; X86-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT:    vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT:    shrb %cl # encoding: [0xd0,0xe9]
+; X86-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
+; X86-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT:    shrb $2, %cl # encoding: [0xc0,0xe9,0x02]
+; X86-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
+; X86-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT:    shrb $3, %cl # encoding: [0xc0,0xe9,0x03]
+; X86-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT:    vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03]
+; X86-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT:    shrb $4, %cl # encoding: [0xc0,0xe9,0x04]
+; X86-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
+; X86-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT:    shrb $5, %cl # encoding: [0xc0,0xe9,0x05]
+; X86-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
+; X86-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT:    shrb $6, %cl # encoding: [0xc0,0xe9,0x06]
+; X86-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
+; X86-NEXT:    shrb $7, %al # encoding: [0xc0,0xe8,0x07]
+; X86-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
+; X86-NEXT:    negl %eax # encoding: [0xf7,0xd8]
+; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
+; X86-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdf,0xc1]
+; X86-NEXT:    popl %eax # encoding: [0x58]
+; X86-NEXT:    retl # encoding: [0xc3]
+  %1 = bitcast i8 %x to <8 x i1>
+  %2 = select <8 x i1> %1, <8 x bfloat> zeroinitializer, <8 x bfloat> %y
+  ret <8 x bfloat> %2
+}



More information about the llvm-commits mailing list