[Mlir-commits] [mlir] [flang] [llvm] [flang][Draft/RFC] Set unsafe_fp_math attribute for -ffast-math (PR #79301)

Wed Jan 24 07:08:06 PST 2024

https://github.com/asb created https://github.com/llvm/llvm-project/pull/79301

A draft/RFC while the impact (if any) is more fully investigated.

CC @preames 

>From 505283264f90bb05b33954dc664b19b3016d1725 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 24 Jan 2024 13:19:29 +0000
Subject: [PATCH 1/2] [MLIR][LLVM] Add unsafe-fp-math attribute support

---
 mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td  | 3 ++-
 mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 4 ++++
 mlir/test/Dialect/LLVMIR/func.mlir           | 6 ++++++
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index 01d476f530b1c57..aad633a3bf8a589 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -1428,7 +1428,8 @@ def LLVM_LLVMFuncOp : LLVM_Op<"func", [
     OptionalAttr<LLVM_VScaleRangeAttr>:$vscale_range,
     OptionalAttr<FramePointerKindAttr>:$frame_pointer,
     OptionalAttr<StrAttr>:$target_cpu,
-    OptionalAttr<LLVM_TargetFeaturesAttr>:$target_features
+    OptionalAttr<LLVM_TargetFeaturesAttr>:$target_features,
+    OptionalAttr<BoolAttr>:$unsafe_fp_math
   );
 
   let regions = (region AnyRegion:$body);
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index 69a1cbe5969e859..bcb97f8e422f824 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -37,6 +37,7 @@
 
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
 #include "llvm/IR/BasicBlock.h"
@@ -1214,6 +1215,9 @@ LogicalResult ModuleTranslation::convertOneFunction(LLVMFuncOp func) {
         getLLVMContext(), attr->getMinRange().getInt(),
         attr->getMaxRange().getInt()));
 
+  if (auto unsafeFpMath = func.getUnsafeFpMath())
+    llvmFunc->addFnAttr("unsafe-fp-math", llvm::toStringRef(*unsafeFpMath));
+
   // Add function attribute frame-pointer, if found.
   if (FramePointerKindAttr attr = func.getFramePointerAttr())
     llvmFunc->addFnAttr("frame-pointer",
diff --git a/mlir/test/Dialect/LLVMIR/func.mlir b/mlir/test/Dialect/LLVMIR/func.mlir
index 9dc1bc57034e02f..5177b166f088980 100644
--- a/mlir/test/Dialect/LLVMIR/func.mlir
+++ b/mlir/test/Dialect/LLVMIR/func.mlir
@@ -257,6 +257,12 @@ module {
   llvm.func @frame_pointer_roundtrip() attributes {frame_pointer = #llvm.framePointerKind<"non-leaf">} {
     llvm.return
   }
+
+  llvm.func @unsafe_fp_math_roundtrip() attributes {unsafe_fp_math = true} {
+    // CHECK: @unsafe_fp_math_roundtrip
+    // CHECK-SAME: attributes {unsafe_fp_math = true}
+    llvm.return
+  }
 }
 
 // -----

>From 5548f7db920fe05d4f809c07b6fe3f8bc2c80b10 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 24 Jan 2024 15:04:17 +0000
Subject: [PATCH 2/2] [flang] Set the unsafe-fp-math=true if -ffast-math is set

Although the fast-math flag is set on individual IR operations, the
function attribute isn't set. Clang sets both, and matching that seems
sensible.

Currently investigating if the lack of the function-level attribute is
causing missed optimisations (although this would indicate an LLVM
bug/omission, matching clang's IR codegen approach still doesn't seem a
bad strategy).
---
 .../flang/Optimizer/Transforms/Passes.h       |   2 +-
 .../flang/Optimizer/Transforms/Passes.td      |   3 +
 flang/include/flang/Tools/CLOptions.inc       |   4 +-
 flang/include/flang/Tools/CrossToolHelpers.h  |  12 +-
 flang/lib/Frontend/FrontendActions.cpp        |   3 +-
 .../lib/Optimizer/Transforms/FunctionAttr.cpp |   8 +-
 llvm/test/CodeGen/RISCV/bfloat-round-conv.ll  | 283 ++++++++++++++++++
 7 files changed, 309 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/bfloat-round-conv.ll

diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index 6970da8698ae84d..4e4670bcb47ced6 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -91,7 +91,7 @@ struct FunctionAttrTypes {
 
 std::unique_ptr<mlir::Pass> createFunctionAttrPass();
 std::unique_ptr<mlir::Pass>
-createFunctionAttrPass(FunctionAttrTypes &functionAttr);
+createFunctionAttrPass(FunctionAttrTypes &functionAttr, bool unsafeFPMath);
 
 // declarative passes
 #define GEN_PASS_REGISTRATION
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index e3c45d41f04cc71..53fb8aa6ac78fe8 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -366,6 +366,9 @@ def FunctionAttr : Pass<"function-attr", "mlir::func::FuncOp"> {
            "mlir::LLVM::framePointerKind::FramePointerKind", 
            /*default=*/"mlir::LLVM::framePointerKind::FramePointerKind{}",
            "frame pointer">,
+    Option<"unsafeFPMath", "unsafe-fp-math",
+           "bool", /*default=*/"false",
+           "Set the unsafe-fp-math attribute on functions in the module.">,
   ];
   let constructor = "::fir::createFunctionAttrPass()";
 }
diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
index 96d3869cd093912..69be13cb1d014e8 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -315,7 +315,7 @@ inline void createDefaultFIRCodeGenPassPipeline(
   // Add function attributes
   fir::FunctionAttrTypes functionAttrs;
 
-  if (config.FramePointerKind != llvm::FramePointerKind::None) {
+  if (config.FramePointerKind != llvm::FramePointerKind::None || config.UnsafeFPMath) {
     if (config.FramePointerKind == llvm::FramePointerKind::NonLeaf)
       functionAttrs.framePointerKind =
           mlir::LLVM::framePointerKind::FramePointerKind::NonLeaf;
@@ -323,7 +323,7 @@ inline void createDefaultFIRCodeGenPassPipeline(
       functionAttrs.framePointerKind =
           mlir::LLVM::framePointerKind::FramePointerKind::All;
 
-    pm.addPass(fir::createFunctionAttrPass(functionAttrs));
+    pm.addPass(fir::createFunctionAttrPass(functionAttrs, config.UnsafeFPMath));
   }
 
   fir::addFIRToLLVMPass(pm, config);
diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h
index b61224ff4f1b3cd..4b3423a737ed353 100644
--- a/flang/include/flang/Tools/CrossToolHelpers.h
+++ b/flang/include/flang/Tools/CrossToolHelpers.h
@@ -13,6 +13,7 @@
 #ifndef FORTRAN_TOOLS_CROSS_TOOL_HELPERS_H
 #define FORTRAN_TOOLS_CROSS_TOOL_HELPERS_H
 
+#include "flang/Common/MathOptionsBase.h"
 #include "flang/Frontend/CodeGenOptions.h"
 #include "flang/Frontend/LangOptions.h"
 #include <cstdint>
@@ -28,7 +29,8 @@ struct MLIRToLLVMPassPipelineConfig {
     OptLevel = level;
   }
   explicit MLIRToLLVMPassPipelineConfig(llvm::OptimizationLevel level,
-      const Fortran::frontend::CodeGenOptions &opts) {
+      const Fortran::frontend::CodeGenOptions &opts,
+      const Fortran::common::MathOptionsBase &mathOpts) {
     OptLevel = level;
     StackArrays = opts.StackArrays;
     Underscoring = opts.Underscoring;
@@ -36,6 +38,13 @@ struct MLIRToLLVMPassPipelineConfig {
     DebugInfo = opts.getDebugInfo();
     AliasAnalysis = opts.AliasAnalysis;
     FramePointerKind = opts.getFramePointer();
+    // TODO: This matches the set of options enabled for Ofast, but this is
+    // probably overkill. Sadly the precise semantics of unsafe-fp-math=true
+    // don't seem to be clearly documented.
+    UnsafeFPMath = mathOpts.getNoHonorNaNs() && mathOpts.getNoHonorInfs() &&
+        mathOpts.getNoSignedZeros() && mathOpts.getReciprocalMath() &&
+        mathOpts.getFPContractEnabled() && mathOpts.getApproxFunc() &&
+        mathOpts.getAssociativeMath();
   }
 
   llvm::OptimizationLevel OptLevel; ///< optimisation level
@@ -49,6 +58,7 @@ struct MLIRToLLVMPassPipelineConfig {
       llvm::FramePointerKind::None; ///< Add frame pointer to functions.
   unsigned VScaleMin = 0; ///< SVE vector range minimum.
   unsigned VScaleMax = 0; ///< SVE vector range maximum.
+  bool UnsafeFPMath = false; ///< Set unsafe-fp-math attribute for functions.
 };
 
 struct OffloadModuleOpts {
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index 65c4df7388f97b2..17d5670b4134d86 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -787,6 +787,7 @@ void CodeGenAction::generateLLVMIR() {
 
   CompilerInstance &ci = this->getInstance();
   auto opts = ci.getInvocation().getCodeGenOpts();
+  auto mathOpts = ci.getInvocation().getLoweringOpts().getMathOptions();
   llvm::OptimizationLevel level = mapToLevel(opts);
 
   fir::support::loadDialects(*mlirCtx);
@@ -799,7 +800,7 @@ void CodeGenAction::generateLLVMIR() {
   pm.addPass(std::make_unique<Fortran::lower::VerifierPass>());
   pm.enableVerifier(/*verifyPasses=*/true);
 
-  MLIRToLLVMPassPipelineConfig config(level, opts);
+  MLIRToLLVMPassPipelineConfig config(level, opts, mathOpts);
 
   if (auto vsr = getVScaleRange(ci)) {
     config.VScaleMin = vsr->first;
diff --git a/flang/lib/Optimizer/Transforms/FunctionAttr.cpp b/flang/lib/Optimizer/Transforms/FunctionAttr.cpp
index 55b908ba5d86139..bf7a83d6e47dcd7 100644
--- a/flang/lib/Optimizer/Transforms/FunctionAttr.cpp
+++ b/flang/lib/Optimizer/Transforms/FunctionAttr.cpp
@@ -27,6 +27,7 @@ class FunctionAttrPass : public fir::impl::FunctionAttrBase<FunctionAttrPass> {
 public:
   FunctionAttrPass(const fir::FunctionAttrOptions &options) {
     framePointerKind = options.framePointerKind;
+    unsafeFPMath = options.unsafeFPMath;
   }
   FunctionAttrPass() {}
   void runOnOperation() override;
@@ -45,14 +46,19 @@ void FunctionAttrPass::runOnOperation() {
     func->setAttr("frame_pointer", mlir::LLVM::FramePointerKindAttr::get(
                                        context, framePointerKind));
 
+  if (unsafeFPMath)
+    func->setAttr("unsafe_fp_math", mlir::BoolAttr::get(context, true));
+
   LLVM_DEBUG(llvm::dbgs() << "=== End " DEBUG_TYPE " ===\n");
 }
 
 std::unique_ptr<mlir::Pass>
-fir::createFunctionAttrPass(fir::FunctionAttrTypes &functionAttr) {
+fir::createFunctionAttrPass(fir::FunctionAttrTypes &functionAttr,
+                            bool unsafeFPMath) {
   FunctionAttrOptions opts;
   // Frame pointer
   opts.framePointerKind = functionAttr.framePointerKind;
+  opts.unsafeFPMath = unsafeFPMath;
 
   return std::make_unique<FunctionAttrPass>(opts);
 }
diff --git a/llvm/test/CodeGen/RISCV/bfloat-round-conv.ll b/llvm/test/CodeGen/RISCV/bfloat-round-conv.ll
new file mode 100644
index 000000000000000..e24d379cd6fc628
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/bfloat-round-conv.ll
@@ -0,0 +1,283 @@
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfbfmin -verify-machineinstrs \
+; RUN:   -target-abi ilp32f < %s | FileCheck -check-prefixes=CHECK32ZFBFMIN,RV32IZFBFMIN %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfbfmin -verify-machineinstrs \
+; RUN:   -target-abi ilp32d < %s | FileCheck -check-prefixes=CHECK32ZFBFMIN,R32IDZFBFMIN %s
+; RUN: llc -mtriple=riscv32 -mattr=+d -verify-machineinstrs \
+; RUN:   -target-abi ilp32d < %s | FileCheck -check-prefixes=RV32ID %s
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfbfmin -verify-machineinstrs \
+; RUN:   -target-abi lp64f < %s | FileCheck -check-prefixes=CHECK64ZFBFMIN,RV64IZFBFMIN %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfbfmin -verify-machineinstrs \
+; RUN:   -target-abi lp64d < %s | FileCheck -check-prefixes=CHECK64ZFBFMIN,RV64IDZFBFMIN %s
+; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs \
+; RUN:   -target-abi lp64d < %s | FileCheck -check-prefixes=RV64ID %s
+
+define signext i8 @test_floor_si8(bfloat %x) {
+  %a = call bfloat @llvm.floor.bf16(bfloat %x)
+  %b = fptosi bfloat %a to i8
+  ret i8 %b
+}
+
+; define signext i16 @test_floor_si16(bfloat %x) {
+;   %a = call bfloat @llvm.floor.bf16(bfloat %x)
+;   %b = fptosi bfloat %a to i16
+;   ret i16 %b
+; }
+
+; define signext i32 @test_floor_si32(bfloat %x) {
+;   %a = call bfloat @llvm.floor.bf16(bfloat %x)
+;   %b = fptosi bfloat %a to i32
+;   ret i32 %b
+; }
+
+; define i64 @test_floor_si64(bfloat %x) {
+;   %a = call bfloat @llvm.floor.bf16(bfloat %x)
+;   %b = fptosi bfloat %a to i64
+;   ret i64 %b
+; }
+
+; define zeroext i8 @test_floor_ui8(bfloat %x) {
+;   %a = call bfloat @llvm.floor.bf16(bfloat %x)
+;   %b = fptoui bfloat %a to i8
+;   ret i8 %b
+; }
+
+; define zeroext i16 @test_floor_ui16(bfloat %x) {
+;   %a = call bfloat @llvm.floor.bf16(bfloat %x)
+;   %b = fptoui bfloat %a to i16
+;   ret i16 %b
+; }
+
+; define signext i32 @test_floor_ui32(bfloat %x) {
+;   %a = call bfloat @llvm.floor.bf16(bfloat %x)
+;   %b = fptoui bfloat %a to i32
+;   ret i32 %b
+; }
+
+; define i64 @test_floor_ui64(bfloat %x) {
+;   %a = call bfloat @llvm.floor.bf16(bfloat %x)
+;   %b = fptoui bfloat %a to i64
+;   ret i64 %b
+; }
+
+; define signext i8 @test_ceil_si8(bfloat %x) {
+;   %a = call bfloat @llvm.ceil.bf16(bfloat %x)
+;   %b = fptosi bfloat %a to i8
+;   ret i8 %b
+; }
+
+; define signext i16 @test_ceil_si16(bfloat %x) {
+;   %a = call bfloat @llvm.ceil.bf16(bfloat %x)
+;   %b = fptosi bfloat %a to i16
+;   ret i16 %b
+; }
+
+; define signext i32 @test_ceil_si32(bfloat %x) {
+;   %a = call bfloat @llvm.ceil.bf16(bfloat %x)
+;   %b = fptosi bfloat %a to i32
+;   ret i32 %b
+; }
+
+; define i64 @test_ceil_si64(bfloat %x) {
+;   %a = call bfloat @llvm.ceil.bf16(bfloat %x)
+;   %b = fptosi bfloat %a to i64
+;   ret i64 %b
+; }
+
+; define zeroext i8 @test_ceil_ui8(bfloat %x) {
+;   %a = call bfloat @llvm.ceil.bf16(bfloat %x)
+;   %b = fptoui bfloat %a to i8
+;   ret i8 %b
+; }
+
+; define zeroext i16 @test_ceil_ui16(bfloat %x) {
+;   %a = call bfloat @llvm.ceil.bf16(bfloat %x)
+;   %b = fptoui bfloat %a to i16
+;   ret i16 %b
+; }
+
+; define signext i32 @test_ceil_ui32(bfloat %x) {
+;   %a = call bfloat @llvm.ceil.bf16(bfloat %x)
+;   %b = fptoui bfloat %a to i32
+;   ret i32 %b
+; }
+
+; define i64 @test_ceil_ui64(bfloat %x) {
+;   %a = call bfloat @llvm.ceil.bf16(bfloat %x)
+;   %b = fptoui bfloat %a to i64
+;   ret i64 %b
+; }
+
+; define signext i8 @test_trunc_si8(bfloat %x) {
+;   %a = call bfloat @llvm.trunc.bf16(bfloat %x)
+;   %b = fptosi bfloat %a to i8
+;   ret i8 %b
+; }
+
+; define signext i16 @test_trunc_si16(bfloat %x) {
+;   %a = call bfloat @llvm.trunc.bf16(bfloat %x)
+;   %b = fptosi bfloat %a to i16
+;   ret i16 %b
+; }
+
+; define signext i32 @test_trunc_si32(bfloat %x) {
+;   %a = call bfloat @llvm.trunc.bf16(bfloat %x)
+;   %b = fptosi bfloat %a to i32
+;   ret i32 %b
+; }
+
+; define i64 @test_trunc_si64(bfloat %x) {
+;   %a = call bfloat @llvm.trunc.bf16(bfloat %x)
+;   %b = fptosi bfloat %a to i64
+;   ret i64 %b
+; }
+
+; define zeroext i8 @test_trunc_ui8(bfloat %x) {
+;   %a = call bfloat @llvm.trunc.bf16(bfloat %x)
+;   %b = fptoui bfloat %a to i8
+;   ret i8 %b
+; }
+
+; define zeroext i16 @test_trunc_ui16(bfloat %x) {
+;   %a = call bfloat @llvm.trunc.bf16(bfloat %x)
+;   %b = fptoui bfloat %a to i16
+;   ret i16 %b
+; }
+
+; define signext i32 @test_trunc_ui32(bfloat %x) {
+;   %a = call bfloat @llvm.trunc.bf16(bfloat %x)
+;   %b = fptoui bfloat %a to i32
+;   ret i32 %b
+; }
+
+; define i64 @test_trunc_ui64(bfloat %x) {
+;   %a = call bfloat @llvm.trunc.bf16(bfloat %x)
+;   %b = fptoui bfloat %a to i64
+;   ret i64 %b
+; }
+
+; define signext i8 @test_round_si8(bfloat %x) {
+;   %a = call bfloat @llvm.round.bf16(bfloat %x)
+;   %b = fptosi bfloat %a to i8
+;   ret i8 %b
+; }
+
+; define signext i16 @test_round_si16(bfloat %x) {
+;   %a = call bfloat @llvm.round.bf16(bfloat %x)
+;   %b = fptosi bfloat %a to i16
+;   ret i16 %b
+; }
+
+; define signext i32 @test_round_si32(bfloat %x) {
+;   %a = call bfloat @llvm.round.bf16(bfloat %x)
+;   %b = fptosi bfloat %a to i32
+;   ret i32 %b
+; }
+
+; define i64 @test_round_si64(bfloat %x) {
+;   %a = call bfloat @llvm.round.bf16(bfloat %x)
+;   %b = fptosi bfloat %a to i64
+;   ret i64 %b
+; }
+
+; define zeroext i8 @test_round_ui8(bfloat %x) {
+;   %a = call bfloat @llvm.round.bf16(bfloat %x)
+;   %b = fptoui bfloat %a to i8
+;   ret i8 %b
+; }
+
+; define zeroext i16 @test_round_ui16(bfloat %x) {
+;   %a = call bfloat @llvm.round.bf16(bfloat %x)
+;   %b = fptoui bfloat %a to i16
+;   ret i16 %b
+; }
+
+; define signext i32 @test_round_ui32(bfloat %x) {
+;   %a = call bfloat @llvm.round.bf16(bfloat %x)
+;   %b = fptoui bfloat %a to i32
+;   ret i32 %b
+; }
+
+; define i64 @test_round_ui64(bfloat %x) {
+;   %a = call bfloat @llvm.round.bf16(bfloat %x)
+;   %b = fptoui bfloat %a to i64
+;   ret i64 %b
+; }
+
+; define signext i8 @test_roundeven_si8(bfloat %x) {
+;   %a = call bfloat @llvm.roundeven.bf16(bfloat %x)
+;   %b = fptosi bfloat %a to i8
+;   ret i8 %b
+; }
+
+; define signext i16 @test_roundeven_si16(bfloat %x) {
+;   %a = call bfloat @llvm.roundeven.bf16(bfloat %x)
+;   %b = fptosi bfloat %a to i16
+;   ret i16 %b
+; }
+
+; define signext i32 @test_roundeven_si32(bfloat %x) {
+;   %a = call bfloat @llvm.roundeven.bf16(bfloat %x)
+;   %b = fptosi bfloat %a to i32
+;   ret i32 %b
+; }
+
+; define i64 @test_roundeven_si64(bfloat %x) {
+;   %a = call bfloat @llvm.roundeven.bf16(bfloat %x)
+;   %b = fptosi bfloat %a to i64
+;   ret i64 %b
+; }
+
+; define zeroext i8 @test_roundeven_ui8(bfloat %x) {
+;   %a = call bfloat @llvm.roundeven.bf16(bfloat %x)
+;   %b = fptoui bfloat %a to i8
+;   ret i8 %b
+; }
+
+; define zeroext i16 @test_roundeven_ui16(bfloat %x) {
+;   %a = call bfloat @llvm.roundeven.bf16(bfloat %x)
+;   %b = fptoui bfloat %a to i16
+;   ret i16 %b
+; }
+
+; define signext i32 @test_roundeven_ui32(bfloat %x) {
+;   %a = call bfloat @llvm.roundeven.bf16(bfloat %x)
+;   %b = fptoui bfloat %a to i32
+;   ret i32 %b
+; }
+
+; define i64 @test_roundeven_ui64(bfloat %x) {
+;   %a = call bfloat @llvm.roundeven.bf16(bfloat %x)
+;   %b = fptoui bfloat %a to i64
+;   ret i64 %b
+; }
+
+; define bfloat @test_floor_bfloat(bfloat %x) {
+;   %a = call bfloat @llvm.floor.bf16(bfloat %x)
+;   ret bfloat %a
+; }
+
+; define bfloat @test_ceil_bfloat(bfloat %x) {
+;   %a = call bfloat @llvm.ceil.bf16(bfloat %x)
+;   ret bfloat %a
+; }
+
+; define bfloat @test_trunc_bfloat(bfloat %x) {
+;   %a = call bfloat @llvm.trunc.bf16(bfloat %x)
+;   ret bfloat %a
+; }
+
+; define bfloat @test_round_bfloat(bfloat %x) {
+;   %a = call bfloat @llvm.round.bf16(bfloat %x)
+;   ret bfloat %a
+; }
+
+; define bfloat @test_roundeven_bfloat(bfloat %x) {
+;   %a = call bfloat @llvm.roundeven.bf16(bfloat %x)
+;   ret bfloat %a
+; }
+
+declare bfloat @llvm.floor.bf16(bfloat)
+; declare bfloat @llvm.ceil.bf16(bfloat)
+; declare bfloat @llvm.trunc.bf16(bfloat)
+; declare bfloat @llvm.round.bf16(bfloat)
+; declare bfloat @llvm.roundeven.bf16(bfloat)