[PATCH] D155961: [X86][BF16] Customize INSERT_VECTOR_ELT for bf16 when feature BF16 is on
Phoebe Wang via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 21 08:12:18 PDT 2023
pengfei created this revision.
pengfei added reviewers: RKSimon, bkramer, craig.topper, skan.
Herald added a subscriber: hiraditya.
Herald added a project: All.
pengfei requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.
Fixes root cause of #63017.
The reason is similar to BUILD_VECTOR. We have legal vector type but
still soft promote for scalar type. So we need to customize these scalar
to vector nodes.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D155961
Files:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/bfloat.ll
Index: llvm/test/CodeGen/X86/bfloat.ll
===================================================================
--- llvm/test/CodeGen/X86/bfloat.ll
+++ llvm/test/CodeGen/X86/bfloat.ll
@@ -1158,4 +1158,29 @@
ret <32 x bfloat> %1
}
+define <32 x bfloat> @pr62997_3(<32 x bfloat> %0, bfloat %1) {
+; SSE2-LABEL: pr62997_3:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movq %xmm0, %rax
+; SSE2-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
+; SSE2-NEXT: andq %rax, %rcx
+; SSE2-NEXT: movzwl %ax, %eax
+; SSE2-NEXT: movd %xmm4, %edx
+; SSE2-NEXT: shll $16, %edx
+; SSE2-NEXT: orl %eax, %edx
+; SSE2-NEXT: orq %rcx, %rdx
+; SSE2-NEXT: movq %rdx, %xmm4
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
+; SSE2-NEXT: retq
+;
+; BF16-LABEL: pr62997_3:
+; BF16: # %bb.0:
+; BF16-NEXT: vmovd %xmm1, %eax
+; BF16-NEXT: vpinsrw $1, %eax, %xmm0, %xmm1
+; BF16-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; BF16-NEXT: retq
+ %3 = insertelement <32 x bfloat> %0, bfloat %1, i64 1
+ ret <32 x bfloat> %3
+}
+
declare <32 x bfloat> @llvm.masked.load.v32bf16.p0(ptr, i32, <32 x i1>, <32 x bfloat>)
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2273,9 +2273,10 @@
addRegisterClass(MVT::v8bf16, &X86::VR128XRegClass);
addRegisterClass(MVT::v16bf16, &X86::VR256XRegClass);
// We set the type action of bf16 to TypeSoftPromoteHalf, but we don't
- // provide the method to promote BUILD_VECTOR. Set the operation action
- // Custom to do the customization later.
+ // provide the method to promote BUILD_VECTOR and INSERT_VECTOR_ELT.
+ // Set the operation action Custom to do the customization later.
setOperationAction(ISD::BUILD_VECTOR, MVT::bf16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::bf16, Custom);
for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
setF16Action(VT, Expand);
setOperationAction(ISD::FADD, VT, Expand);
@@ -20699,6 +20700,14 @@
SDValue N2 = Op.getOperand(2);
auto *N2C = dyn_cast<ConstantSDNode>(N2);
+ if (EltVT == MVT::bf16) {
+ MVT IVT = VT.changeVectorElementTypeToInteger();
+ SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVT,
+ DAG.getBitcast(IVT, N0),
+ DAG.getBitcast(MVT::i16, N1), N2);
+ return DAG.getBitcast(VT, Res);
+ }
+
if (!N2C) {
// Variable insertion indices, usually we're better off spilling to stack,
// but AVX512 can use a variable compare+select by comparing against all
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D155961.542925.patch
Type: text/x-patch
Size: 2688 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230721/32411fa3/attachment.bin>
More information about the llvm-commits
mailing list