[llvm] ca50cb1 - [SelectionDAG] Constant fold FP_TO_BF16 and BF16_TO_FP.

Wed Jun 15 09:53:14 PDT 2022

Author: Benjamin Kramer
Date: 2022-06-15T18:51:32+02:00
New Revision: ca50cb120ba5853e9a56de2151ba47c201ad6be6

URL: https://github.com/llvm/llvm-project/commit/ca50cb120ba5853e9a56de2151ba47c201ad6be6
DIFF: https://github.com/llvm/llvm-project/commit/ca50cb120ba5853e9a56de2151ba47c201ad6be6.diff

LOG: [SelectionDAG] Constant fold FP_TO_BF16 and BF16_TO_FP.

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/test/CodeGen/X86/bfloat.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 97777d008421..bf222a77cbcc 100644

--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4979,9 +4979,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     case ISD::CTTZ_ZERO_UNDEF:
       return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(),
                          C->isOpaque());
-    case ISD::FP16_TO_FP: {
+    case ISD::FP16_TO_FP:
+    case ISD::BF16_TO_FP: {
       bool Ignored;
-      APFloat FPV(APFloat::IEEEhalf(),
+      APFloat FPV(Opcode == ISD::FP16_TO_FP ? APFloat::IEEEhalf()
+                                            : APFloat::BFloat(),
                   (Val.getBitWidth() == 16) ? Val : Val.trunc(16));
 
       // This can return overflow, underflow, or inexact; we don't care.
@@ -5055,11 +5057,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
       if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
         return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
       break;
-    case ISD::FP_TO_FP16: {
+    case ISD::FP_TO_FP16:
+    case ISD::FP_TO_BF16: {
       bool Ignored;
       // This can return overflow, underflow, or inexact; we don't care.
       // FIXME need to be more flexible about rounding mode.
-      (void)V.convert(APFloat::IEEEhalf(),
+      (void)V.convert(Opcode == ISD::FP_TO_FP16 ? APFloat::IEEEhalf()
+                                                : APFloat::BFloat(),
                       APFloat::rmNearestTiesToEven, &Ignored);
       return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
     }

diff  --git a/llvm/test/CodeGen/X86/bfloat.ll b/llvm/test/CodeGen/X86/bfloat.ll
index ab8c59a81c65..404ad095ca75 100644
--- a/llvm/test/CodeGen/X86/bfloat.ll
+++ b/llvm/test/CodeGen/X86/bfloat.ll
@@ -69,3 +69,34 @@ define void @add_double(ptr %pa, ptr %pb, ptr %pc) {
   store double %dadd, ptr %pc
   ret void
 }
+
+define void @add_constant(ptr %pa, ptr %pc) {
+; CHECK-LABEL: add_constant:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbx, -16
+; CHECK-NEXT:    movq %rsi, %rbx
+; CHECK-NEXT:    movzwl (%rdi), %eax
+; CHECK-NEXT:    shll $16, %eax
+; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT:    callq __truncsfbf2 at PLT
+; CHECK-NEXT:    movw %ax, (%rbx)
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+  %a = load bfloat, ptr %pa
+  %add = fadd bfloat %a, 1.0
+  store bfloat %add, ptr %pc
+  ret void
+}
+
+define void @store_constant(ptr %pc) {
+; CHECK-LABEL: store_constant:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movw $16256, (%rdi) # imm = 0x3F80
+; CHECK-NEXT:    retq
+  store bfloat 1.0, ptr %pc
+  ret void
+}