[llvm] X86: Fix asserting on bfloat argument/return without sse2 (PR #93146)

via llvm-commits llvm-commits at lists.llvm.org
Thu May 23 00:49:07 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Matt Arsenault (arsenm)

<details>
<summary>Changes</summary>

These now get the default promote-to-float behavior, like half does.

Fixes #<!-- -->92899

---

Patch is 58.72 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/93146.diff


2 Files Affected:

- (modified) llvm/lib/Target/X86/X86ISelLoweringCall.cpp (+11-7) 
- (added) llvm/test/CodeGen/X86/bfloat-calling-conv-no-sse2.ll (+1441) 


``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index b107d56f8cf9b..4fddb60db5f10 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -123,12 +123,14 @@ MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
       !Subtarget.hasX87())
     return MVT::i32;
 
-  if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
-    return getRegisterTypeForCallingConv(Context, CC,
-                                         VT.changeVectorElementType(MVT::f16));
+  if (isTypeLegal(MVT::f16)) {
+    if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
+      return getRegisterTypeForCallingConv(
+          Context, CC, VT.changeVectorElementType(MVT::f16));
 
-  if (VT == MVT::bf16)
-    return MVT::f16;
+    if (VT == MVT::bf16)
+      return MVT::f16;
+  }
 
   return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
 }
@@ -161,7 +163,8 @@ unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
       return 3;
   }
 
-  if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
+  if (VT.isVector() && VT.getVectorElementType() == MVT::bf16 &&
+      isTypeLegal(MVT::f16))
     return getNumRegistersForCallingConv(Context, CC,
                                          VT.changeVectorElementType(MVT::f16));
 
@@ -193,7 +196,8 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
   }
 
   // Split vNbf16 vectors according to vNf16.
-  if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
+  if (VT.isVector() && VT.getVectorElementType() == MVT::bf16 &&
+      isTypeLegal(MVT::f16))
     VT = VT.changeVectorElementType(MVT::f16);
 
   return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
diff --git a/llvm/test/CodeGen/X86/bfloat-calling-conv-no-sse2.ll b/llvm/test/CodeGen/X86/bfloat-calling-conv-no-sse2.ll
new file mode 100644
index 0000000000000..05f036201f4b1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/bfloat-calling-conv-no-sse2.ll
@@ -0,0 +1,1441 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=i386-linux-gnu < %s | FileCheck -check-prefixes=CHECK,NOSSE %s
+; RUN: llc -mtriple=i386-linux-gnu -mattr=+sse < %s | FileCheck -check-prefixes=CHECK,SSE %s
+
+; Make sure no assert without SSE2 and bfloat. Issue 92899
+
+define bfloat @return_arg_bf16(bfloat %x) {
+; CHECK-LABEL: return_arg_bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
+; CHECK-NEXT:    retl
+  ret bfloat %x
+}
+
+define <2 x bfloat> @return_arg_v2bf16(<2 x bfloat> %x) {
+; CHECK-LABEL: return_arg_v2bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
+; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
+; CHECK-NEXT:    retl
+  ret <2 x bfloat> %x
+}
+
+define <3 x bfloat> @return_arg_v3bf16(<3 x bfloat> %x) {
+; NOSSE-LABEL: return_arg_v3bf16:
+; NOSSE:       # %bb.0:
+; NOSSE-NEXT:    pushl %edi
+; NOSSE-NEXT:    .cfi_def_cfa_offset 8
+; NOSSE-NEXT:    pushl %esi
+; NOSSE-NEXT:    .cfi_def_cfa_offset 12
+; NOSSE-NEXT:    pushl %eax
+; NOSSE-NEXT:    .cfi_def_cfa_offset 16
+; NOSSE-NEXT:    .cfi_offset %esi, -12
+; NOSSE-NEXT:    .cfi_offset %edi, -8
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movl %eax, %esi
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    # kill: def $ax killed $ax def $eax
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    shll $16, %eax
+; NOSSE-NEXT:    movzwl %si, %edi
+; NOSSE-NEXT:    orl %eax, %edi
+; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movw %ax, 4(%esi)
+; NOSSE-NEXT:    movl %edi, (%esi)
+; NOSSE-NEXT:    movl %esi, %eax
+; NOSSE-NEXT:    addl $4, %esp
+; NOSSE-NEXT:    .cfi_def_cfa_offset 12
+; NOSSE-NEXT:    popl %esi
+; NOSSE-NEXT:    .cfi_def_cfa_offset 8
+; NOSSE-NEXT:    popl %edi
+; NOSSE-NEXT:    .cfi_def_cfa_offset 4
+; NOSSE-NEXT:    retl $4
+;
+; SSE-LABEL: return_arg_v3bf16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pushl %edi
+; SSE-NEXT:    .cfi_def_cfa_offset 8
+; SSE-NEXT:    pushl %esi
+; SSE-NEXT:    .cfi_def_cfa_offset 12
+; SSE-NEXT:    pushl %eax
+; SSE-NEXT:    .cfi_def_cfa_offset 16
+; SSE-NEXT:    .cfi_offset %esi, -12
+; SSE-NEXT:    .cfi_offset %edi, -8
+; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    movss %xmm0, (%esp)
+; SSE-NEXT:    calll __truncsfbf2
+; SSE-NEXT:    movl %eax, %esi
+; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    movss %xmm0, (%esp)
+; SSE-NEXT:    calll __truncsfbf2
+; SSE-NEXT:    # kill: def $ax killed $ax def $eax
+; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    movss %xmm0, (%esp)
+; SSE-NEXT:    shll $16, %eax
+; SSE-NEXT:    movzwl %si, %edi
+; SSE-NEXT:    orl %eax, %edi
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; SSE-NEXT:    calll __truncsfbf2
+; SSE-NEXT:    movw %ax, 4(%esi)
+; SSE-NEXT:    movl %edi, (%esi)
+; SSE-NEXT:    movl %esi, %eax
+; SSE-NEXT:    addl $4, %esp
+; SSE-NEXT:    .cfi_def_cfa_offset 12
+; SSE-NEXT:    popl %esi
+; SSE-NEXT:    .cfi_def_cfa_offset 8
+; SSE-NEXT:    popl %edi
+; SSE-NEXT:    .cfi_def_cfa_offset 4
+; SSE-NEXT:    retl $4
+  ret <3 x bfloat> %x
+}
+
+define <4 x bfloat> @return_arg_v4bf16(<4 x bfloat> %x) {
+; NOSSE-LABEL: return_arg_v4bf16:
+; NOSSE:       # %bb.0:
+; NOSSE-NEXT:    pushl %ebp
+; NOSSE-NEXT:    .cfi_def_cfa_offset 8
+; NOSSE-NEXT:    pushl %ebx
+; NOSSE-NEXT:    .cfi_def_cfa_offset 12
+; NOSSE-NEXT:    pushl %edi
+; NOSSE-NEXT:    .cfi_def_cfa_offset 16
+; NOSSE-NEXT:    pushl %esi
+; NOSSE-NEXT:    .cfi_def_cfa_offset 20
+; NOSSE-NEXT:    subl $12, %esp
+; NOSSE-NEXT:    .cfi_def_cfa_offset 32
+; NOSSE-NEXT:    .cfi_offset %esi, -20
+; NOSSE-NEXT:    .cfi_offset %edi, -16
+; NOSSE-NEXT:    .cfi_offset %ebx, -12
+; NOSSE-NEXT:    .cfi_offset %ebp, -8
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movl %eax, %esi
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movl %eax, %edi
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movl %eax, %ebx
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movw %ax, 6(%ebp)
+; NOSSE-NEXT:    movw %bx, 4(%ebp)
+; NOSSE-NEXT:    movw %di, 2(%ebp)
+; NOSSE-NEXT:    movw %si, (%ebp)
+; NOSSE-NEXT:    movl %ebp, %eax
+; NOSSE-NEXT:    addl $12, %esp
+; NOSSE-NEXT:    .cfi_def_cfa_offset 20
+; NOSSE-NEXT:    popl %esi
+; NOSSE-NEXT:    .cfi_def_cfa_offset 16
+; NOSSE-NEXT:    popl %edi
+; NOSSE-NEXT:    .cfi_def_cfa_offset 12
+; NOSSE-NEXT:    popl %ebx
+; NOSSE-NEXT:    .cfi_def_cfa_offset 8
+; NOSSE-NEXT:    popl %ebp
+; NOSSE-NEXT:    .cfi_def_cfa_offset 4
+; NOSSE-NEXT:    retl $4
+;
+; SSE-LABEL: return_arg_v4bf16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pushl %ebp
+; SSE-NEXT:    .cfi_def_cfa_offset 8
+; SSE-NEXT:    pushl %ebx
+; SSE-NEXT:    .cfi_def_cfa_offset 12
+; SSE-NEXT:    pushl %edi
+; SSE-NEXT:    .cfi_def_cfa_offset 16
+; SSE-NEXT:    pushl %esi
+; SSE-NEXT:    .cfi_def_cfa_offset 20
+; SSE-NEXT:    subl $12, %esp
+; SSE-NEXT:    .cfi_def_cfa_offset 32
+; SSE-NEXT:    .cfi_offset %esi, -20
+; SSE-NEXT:    .cfi_offset %edi, -16
+; SSE-NEXT:    .cfi_offset %ebx, -12
+; SSE-NEXT:    .cfi_offset %ebp, -8
+; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    movss %xmm0, (%esp)
+; SSE-NEXT:    calll __truncsfbf2
+; SSE-NEXT:    movl %eax, %esi
+; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    movss %xmm0, (%esp)
+; SSE-NEXT:    calll __truncsfbf2
+; SSE-NEXT:    movl %eax, %edi
+; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    movss %xmm0, (%esp)
+; SSE-NEXT:    calll __truncsfbf2
+; SSE-NEXT:    movl %eax, %ebx
+; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    movss %xmm0, (%esp)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; SSE-NEXT:    calll __truncsfbf2
+; SSE-NEXT:    movw %ax, 6(%ebp)
+; SSE-NEXT:    movw %bx, 4(%ebp)
+; SSE-NEXT:    movw %di, 2(%ebp)
+; SSE-NEXT:    movw %si, (%ebp)
+; SSE-NEXT:    movl %ebp, %eax
+; SSE-NEXT:    addl $12, %esp
+; SSE-NEXT:    .cfi_def_cfa_offset 20
+; SSE-NEXT:    popl %esi
+; SSE-NEXT:    .cfi_def_cfa_offset 16
+; SSE-NEXT:    popl %edi
+; SSE-NEXT:    .cfi_def_cfa_offset 12
+; SSE-NEXT:    popl %ebx
+; SSE-NEXT:    .cfi_def_cfa_offset 8
+; SSE-NEXT:    popl %ebp
+; SSE-NEXT:    .cfi_def_cfa_offset 4
+; SSE-NEXT:    retl $4
+  ret <4 x bfloat> %x
+}
+
+define <8 x bfloat> @return_arg_v8bf16(<8 x bfloat> %x) {
+; NOSSE-LABEL: return_arg_v8bf16:
+; NOSSE:       # %bb.0:
+; NOSSE-NEXT:    pushl %ebp
+; NOSSE-NEXT:    .cfi_def_cfa_offset 8
+; NOSSE-NEXT:    pushl %ebx
+; NOSSE-NEXT:    .cfi_def_cfa_offset 12
+; NOSSE-NEXT:    pushl %edi
+; NOSSE-NEXT:    .cfi_def_cfa_offset 16
+; NOSSE-NEXT:    pushl %esi
+; NOSSE-NEXT:    .cfi_def_cfa_offset 20
+; NOSSE-NEXT:    subl $12, %esp
+; NOSSE-NEXT:    .cfi_def_cfa_offset 32
+; NOSSE-NEXT:    .cfi_offset %esi, -20
+; NOSSE-NEXT:    .cfi_offset %edi, -16
+; NOSSE-NEXT:    .cfi_offset %ebx, -12
+; NOSSE-NEXT:    .cfi_offset %ebp, -8
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movl %eax, %esi
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movl %eax, %edi
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movl %eax, %ebx
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movw %ax, 14(%ebp)
+; NOSSE-NEXT:    movw %bx, 12(%ebp)
+; NOSSE-NEXT:    movw %di, 10(%ebp)
+; NOSSE-NEXT:    movw %si, 8(%ebp)
+; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload
+; NOSSE-NEXT:    movw %ax, 6(%ebp)
+; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload
+; NOSSE-NEXT:    movw %ax, 4(%ebp)
+; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload
+; NOSSE-NEXT:    movw %ax, 2(%ebp)
+; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload
+; NOSSE-NEXT:    movw %ax, (%ebp)
+; NOSSE-NEXT:    movl %ebp, %eax
+; NOSSE-NEXT:    addl $12, %esp
+; NOSSE-NEXT:    .cfi_def_cfa_offset 20
+; NOSSE-NEXT:    popl %esi
+; NOSSE-NEXT:    .cfi_def_cfa_offset 16
+; NOSSE-NEXT:    popl %edi
+; NOSSE-NEXT:    .cfi_def_cfa_offset 12
+; NOSSE-NEXT:    popl %ebx
+; NOSSE-NEXT:    .cfi_def_cfa_offset 8
+; NOSSE-NEXT:    popl %ebp
+; NOSSE-NEXT:    .cfi_def_cfa_offset 4
+; NOSSE-NEXT:    retl $4
+;
+; SSE-LABEL: return_arg_v8bf16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pushl %ebp
+; SSE-NEXT:    .cfi_def_cfa_offset 8
+; SSE-NEXT:    pushl %ebx
+; SSE-NEXT:    .cfi_def_cfa_offset 12
+; SSE-NEXT:    pushl %edi
+; SSE-NEXT:    .cfi_def_cfa_offset 16
+; SSE-NEXT:    pushl %esi
+; SSE-NEXT:    .cfi_def_cfa_offset 20
+; SSE-NEXT:    subl $12, %esp
+; SSE-NEXT:    .cfi_def_cfa_offset 32
+; SSE-NEXT:    .cfi_offset %esi, -20
+; SSE-NEXT:    .cfi_offset %edi, -16
+; SSE-NEXT:    .cfi_offset %ebx, -12
+; SSE-NEXT:    .cfi_offset %ebp, -8
+; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    movss %xmm0, (%esp)
+; SSE-NEXT:    calll __truncsfbf2
+; SSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    movss %xmm0, (%esp)
+; SSE-NEXT:    calll __truncsfbf2
+; SSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    movss %xmm0, (%esp)
+; SSE-NEXT:    calll __truncsfbf2
+; SSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    movss %xmm0, (%esp)
+; SSE-NEXT:    calll __truncsfbf2
+; SSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    movss %xmm0, (%esp)
+; SSE-NEXT:    calll __truncsfbf2
+; SSE-NEXT:    movl %eax, %esi
+; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    movss %xmm0, (%esp)
+; SSE-NEXT:    calll __truncsfbf2
+; SSE-NEXT:    movl %eax, %edi
+; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    movss %xmm0, (%esp)
+; SSE-NEXT:    calll __truncsfbf2
+; SSE-NEXT:    movl %eax, %ebx
+; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    movss %xmm0, (%esp)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; SSE-NEXT:    calll __truncsfbf2
+; SSE-NEXT:    movw %ax, 14(%ebp)
+; SSE-NEXT:    movw %bx, 12(%ebp)
+; SSE-NEXT:    movw %di, 10(%ebp)
+; SSE-NEXT:    movw %si, 8(%ebp)
+; SSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload
+; SSE-NEXT:    movw %ax, 6(%ebp)
+; SSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload
+; SSE-NEXT:    movw %ax, 4(%ebp)
+; SSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload
+; SSE-NEXT:    movw %ax, 2(%ebp)
+; SSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload
+; SSE-NEXT:    movw %ax, (%ebp)
+; SSE-NEXT:    movl %ebp, %eax
+; SSE-NEXT:    addl $12, %esp
+; SSE-NEXT:    .cfi_def_cfa_offset 20
+; SSE-NEXT:    popl %esi
+; SSE-NEXT:    .cfi_def_cfa_offset 16
+; SSE-NEXT:    popl %edi
+; SSE-NEXT:    .cfi_def_cfa_offset 12
+; SSE-NEXT:    popl %ebx
+; SSE-NEXT:    .cfi_def_cfa_offset 8
+; SSE-NEXT:    popl %ebp
+; SSE-NEXT:    .cfi_def_cfa_offset 4
+; SSE-NEXT:    retl $4
+  ret <8 x bfloat> %x
+}
+
+define <16 x bfloat> @return_arg_v16bf16(<16 x bfloat> %x) {
+; NOSSE-LABEL: return_arg_v16bf16:
+; NOSSE:       # %bb.0:
+; NOSSE-NEXT:    pushl %ebp
+; NOSSE-NEXT:    .cfi_def_cfa_offset 8
+; NOSSE-NEXT:    pushl %ebx
+; NOSSE-NEXT:    .cfi_def_cfa_offset 12
+; NOSSE-NEXT:    pushl %edi
+; NOSSE-NEXT:    .cfi_def_cfa_offset 16
+; NOSSE-NEXT:    pushl %esi
+; NOSSE-NEXT:    .cfi_def_cfa_offset 20
+; NOSSE-NEXT:    subl $28, %esp
+; NOSSE-NEXT:    .cfi_def_cfa_offset 48
+; NOSSE-NEXT:    .cfi_offset %esi, -20
+; NOSSE-NEXT:    .cfi_offset %edi, -16
+; NOSSE-NEXT:    .cfi_offset %ebx, -12
+; NOSSE-NEXT:    .cfi_offset %ebp, -8
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movl %eax, %esi
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movl %eax, %ebx
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movl %eax, %ebp
+; NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
+; NOSSE-NEXT:    fstps (%esp)
+; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; NOSSE-NEXT:    calll __truncsfbf2
+; NOSSE-NEXT:    movw %ax, 30(%edi)
+; NOSSE-NEXT:    movw %bp, 28(%edi)
+; NOSSE-NEXT:    movw %bx, 26(%edi)
+; NOSSE-NEXT:    movw %si, 24(%edi)
+; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload
+; NOSSE-NEXT:    movw %ax, 22(%edi)
+; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload
+; NOSSE-NEXT:    movw %ax, 20(%edi)
+; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload
+; NOSSE-NEXT:    movw %ax, 18(%edi)
+; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload
+; NOSSE-NEXT:    movw %ax, 16(%edi)
+; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload
+; NOSSE-NEXT:    movw %ax, 14(%edi)
+; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload
+; NOSSE-NEXT:    movw %ax, 12(%edi)
+; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload
+; NOSSE-NEXT:    movw %ax, 10(%edi)
+; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload
+; NOSSE-NEXT:    movw %ax, 8(%edi)
+; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload
+; NOSSE-NEXT:    movw %ax, 6(%edi)
+; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload
+; NOSSE-NEXT:    movw %ax, 4(%edi)
+; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload
+; NOSSE-NEXT:    movw %ax, 2(%edi)
+; NOSSE-NEXT:    movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload
+; NOSSE-NEXT:    movw %ax, (%edi)
+; NOSSE-NEXT:    movl %edi, %eax
+; NOSSE-NEXT:    addl $28, %esp
+; NOSSE-NEXT:    .cfi_def_cfa_offset 20
+; NOSSE-NEXT:    popl %esi
+; NOSSE-NEXT:    .cfi_def_cfa_offset 16
+; NOSSE-NEXT:    popl %edi
+; NOSSE-NEXT:    .cfi_def_cfa_offset 12
+; NOSSE-NEXT:    popl %ebx
+; NOSSE-NEXT:    .cfi_def_cfa_offset 8
+; NOSSE-NEXT:    popl %ebp
+; NOSSE-NEXT:    .cfi_def_cfa_offset 4
+; NOSSE-NEXT:    retl $4
+;
+; SSE-LABEL: return_arg_v16bf16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pushl %ebp
+; SSE-NEXT:    .cfi_def_cfa_offset 8
+; SSE-NEXT:    pushl %ebx
+; SSE-NEXT:    .cfi_def_cfa_offset 12
+; SSE-NEXT:    pushl %edi
+; SSE-NEXT:    .cfi_def_cfa_offset 16
+; SSE-NEXT:    p...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/93146


More information about the llvm-commits mailing list