[Mlir-commits] [mlir] [mlir] Add fast and ftz flags in gpu-lower-to-nvvm-pipeline (PR #84199)

Wed Mar 6 09:10:12 PST 2024

https://github.com/grypp created https://github.com/llvm/llvm-project/pull/84199

This PR adds two flags to the pipeline so we can pass them to the nvvm lowering.

>From 5c94e0f4082736f127b286edf0e8f355550b0846 Mon Sep 17 00:00:00 2001
From: grypp <guray.ozen at gmail.com>
Date: Wed, 6 Mar 2024 17:05:49 +0000
Subject: [PATCH] [mlir] Add fast and ftz flags in gpu-lower-to-nvvm-pipeline

This PR adds two flags to the pipeline so we can pass them to the nvvm lowering.
---
 mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h     | 10 ++++++++++
 mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp |  2 ++
 2 files changed, 12 insertions(+)

diff --git a/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h b/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h
index caa0901bb49434..f98dbc75a4f94e 100644
--- a/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h
@@ -53,6 +53,16 @@ struct GPUToNVVMPipelineOptions
           "Whether to use the bareptr calling convention on the host (warning "
           "this should be false until the GPU layering is fixed)"),
       llvm::cl::init(false)};
+  PassOptions::Option<bool> ftz{
+      *this, "ftz",
+      llvm::cl::desc(
+          "Enable flush to zero for denormals"),
+      llvm::cl::init(false)};
+  PassOptions::Option<bool> fast{
+      *this, "fast",
+      llvm::cl::desc(
+          "Enable fast math mode."),
+      llvm::cl::init(false)};
 };
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp b/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp
index db1974ddb3773b..8d836c07b1f16f 100644
--- a/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp
+++ b/mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp
@@ -60,6 +60,8 @@ void buildCommonPassPipeline(
   nvvmTargetOptions.chip = options.cubinChip;
   nvvmTargetOptions.features = options.cubinFeatures;
   nvvmTargetOptions.optLevel = options.optLevel;
+  nvvmTargetOptions.ftzFlag = options.ftz;
+  nvvmTargetOptions.fastFlag = options.fast;
   pm.addPass(createGpuNVVMAttachTarget(nvvmTargetOptions));
   pm.addPass(createLowerAffinePass());
   pm.addPass(createArithToLLVMConversionPass());