[llvm] fd85761 - [X86][BF16] Customize VSELECT for BF16 under AVX-NECONVERT (#113322)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 28 00:15:53 PDT 2024
Author: Phoebe Wang
Date: 2024-10-28T15:15:49+08:00
New Revision: fd85761208b4fe0569e3fde0d11297f7dcd0e48e
URL: https://github.com/llvm/llvm-project/commit/fd85761208b4fe0569e3fde0d11297f7dcd0e48e
DIFF: https://github.com/llvm/llvm-project/commit/fd85761208b4fe0569e3fde0d11297f7dcd0e48e.diff
LOG: [X86][BF16] Customize VSELECT for BF16 under AVX-NECONVERT (#113322)
Fixes: https://godbolt.org/z/9abGnE8zs
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9d447959faf55a..1c790f3813b7a4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2393,6 +2393,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::bf16, Custom);
for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
setF16Action(VT, Expand);
+ if (!Subtarget.hasBF16())
+ setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
diff --git a/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll b/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll
index b311c8831457b8..ef87ac31fcf48c 100644
--- a/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll
@@ -215,3 +215,117 @@ define <8 x bfloat> @test_int_x86_vcvtneps2bf16256(<8 x float> %A) {
}
declare <8 x bfloat> @llvm.x86.vcvtneps2bf16256(<8 x float> %A)
+define <8 x bfloat> @select(i8 %x, <8 x bfloat> %y) nounwind {
+; X64-LABEL: select:
+; X64: # %bb.0:
+; X64-NEXT: vmovaps %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x28,0xc8]
+; X64-NEXT: movb %dil, %al # encoding: [0x40,0x88,0xf8]
+; X64-NEXT: movb %al, -{{[0-9]+}}(%rsp) # encoding: [0x88,0x44,0x24,0xff]
+; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0xff]
+; X64-NEXT: movl %eax, %ecx # encoding: [0x89,0xc1]
+; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT: shrb %cl # encoding: [0xd0,0xe9]
+; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
+; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT: shrb $2, %cl # encoding: [0xc0,0xe9,0x02]
+; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
+; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT: shrb $3, %cl # encoding: [0xc0,0xe9,0x03]
+; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03]
+; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT: shrb $4, %cl # encoding: [0xc0,0xe9,0x04]
+; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
+; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT: shrb $5, %cl # encoding: [0xc0,0xe9,0x05]
+; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
+; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT: shrb $6, %cl # encoding: [0xc0,0xe9,0x06]
+; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
+; X64-NEXT: shrb $7, %al # encoding: [0xc0,0xe8,0x07]
+; X64-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
+; X64-NEXT: negl %eax # encoding: [0xf7,0xd8]
+; X64-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
+; X64-NEXT: vpandn %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdf,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: select:
+; X86: # %bb.0:
+; X86-NEXT: pushl %eax # encoding: [0x50]
+; X86-NEXT: vmovaps %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x28,0xc8]
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x08]
+; X86-NEXT: movb %al, {{[0-9]+}}(%esp) # encoding: [0x88,0x44,0x24,0x03]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x03]
+; X86-NEXT: movl %eax, %ecx # encoding: [0x89,0xc1]
+; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT: shrb %cl # encoding: [0xd0,0xe9]
+; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
+; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT: shrb $2, %cl # encoding: [0xc0,0xe9,0x02]
+; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
+; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT: shrb $3, %cl # encoding: [0xc0,0xe9,0x03]
+; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03]
+; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT: shrb $4, %cl # encoding: [0xc0,0xe9,0x04]
+; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
+; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT: shrb $5, %cl # encoding: [0xc0,0xe9,0x05]
+; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
+; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT: shrb $6, %cl # encoding: [0xc0,0xe9,0x06]
+; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
+; X86-NEXT: shrb $7, %al # encoding: [0xc0,0xe8,0x07]
+; X86-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
+; X86-NEXT: negl %eax # encoding: [0xf7,0xd8]
+; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
+; X86-NEXT: vpandn %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdf,0xc1]
+; X86-NEXT: popl %eax # encoding: [0x58]
+; X86-NEXT: retl # encoding: [0xc3]
+ %1 = bitcast i8 %x to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> zeroinitializer, <8 x bfloat> %y
+ ret <8 x bfloat> %2
+}
More information about the llvm-commits
mailing list