[compiler-rt] [llvm] [SelectionDAG] Add `STRICT_BF16_TO_FP` and `STRICT_FP_TO_BF16` (PR #80056)

Sun Mar 3 18:57:15 PST 2024

================
@@ -0,0 +1,180 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefix=X64
+
+ at a = global bfloat 0xR0000, align 2
+ at b = global bfloat 0xR0000, align 2
+ at c = global bfloat 0xR0000, align 2
+
+define float @bfloat_to_float() strictfp {
+; X32-LABEL: bfloat_to_float:
+; X32:       # %bb.0:
+; X32-NEXT:    subl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 16
+; X32-NEXT:    movzwl a, %eax
+; X32-NEXT:    movl %eax, (%esp)
+; X32-NEXT:    calll __extendbfsf2
+; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 4
+; X32-NEXT:    retl
+;
+; X64-LABEL: bfloat_to_float:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    movq a at GOTPCREL(%rip), %rax
+; X64-NEXT:    movzwl (%rax), %edi
+; X64-NEXT:    callq __extendbfsf2 at PLT
+; X64-NEXT:    popq %rax
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %1 = load bfloat, ptr @a, align 2
+  %2 = tail call float @llvm.experimental.constrained.fpext.f32.bfloat(bfloat %1, metadata !"fpexcept.strict") #0
+  ret float %2
+}
+
+define double @bfloat_to_double() strictfp {
+; X32-LABEL: bfloat_to_double:
+; X32:       # %bb.0:
+; X32-NEXT:    subl $12, %esp
+; X32-NEXT:    .cfi_def_cfa_offset 16
+; X32-NEXT:    movzwl a, %eax
+; X32-NEXT:    movl %eax, (%esp)
+; X32-NEXT:    calll __extendbfsf2
+; X32-NEXT:    addl $12, %esp
----------------
phoebewang wrote:

Oh, I see. It is using x87 registers. IIRC, the data is preserved in high precision in register. So we save an extra conversion.

https://github.com/llvm/llvm-project/pull/80056