[flang-commits] [flang] [flang][cuda] Lower launch_bounds values (PR #81537)

Valentin Clement バレンタイン クレメン via flang-commits flang-commits at lists.llvm.org
Mon Feb 12 20:58:03 PST 2024


https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/81537

>From 7f7050ec86acd331c796fb57f298682885bd5196 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 12 Feb 2024 13:10:19 -0800
Subject: [PATCH 1/2] [flang][cuda] Lower launch_bounds values

---
 .../flang/Optimizer/Dialect/FIRAttr.td        | 12 +++++
 .../flang/Optimizer/Dialect/FIROpsSupport.h   |  5 +++
 flang/lib/Lower/CallInterface.cpp             | 45 ++++++++++++++++---
 flang/lib/Optimizer/Dialect/FIRAttr.cpp       |  3 +-
 flang/test/Lower/CUDA/cuda-proc-attribute.cuf |  6 +++
 5 files changed, 64 insertions(+), 7 deletions(-)

diff --git a/flang/include/flang/Optimizer/Dialect/FIRAttr.td b/flang/include/flang/Optimizer/Dialect/FIRAttr.td
index 00e293e2f04278..d55e93ba666e71 100644
--- a/flang/include/flang/Optimizer/Dialect/FIRAttr.td
+++ b/flang/include/flang/Optimizer/Dialect/FIRAttr.td
@@ -113,4 +113,16 @@ def fir_CUDAProcAttributeAttr :
   let assemblyFormat = [{ ```<` $value `>` }];
 }
 
+def fir_CUDALaunchBoundsAttr : fir_Attr<"CUDALaunchBounds"> {
+  let mnemonic = "launch_bounds";
+
+  let parameters = (ins
+    "mlir::IntegerAttr":$maxTBP,
+    "mlir::IntegerAttr":$minBPM,
+    OptionalParameter<"mlir::IntegerAttr">:$upperBoundClusterSize
+  );
+
+  let assemblyFormat = "`<` struct(params) `>`";
+}
+
 #endif // FIR_DIALECT_FIR_ATTRS
diff --git a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h
index 6ac6a3116d40b0..29fa57cd7a0d8a 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h
+++ b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h
@@ -75,6 +75,11 @@ static constexpr llvm::StringRef getTargetAttrName() { return "fir.target"; }
 /// Attribute to mark Fortran entities with the CUDA attribute.
 static constexpr llvm::StringRef getCUDAAttrName() { return "fir.cuda_attr"; }
 
+/// Attribute to carry CUDA launch_bounds values.
+static constexpr llvm::StringRef getCUDALaunchBoundsAttrName() {
+  return "fir.cuda_launch_bounds";
+}
+
 /// Attribute to mark that a function argument is a character dummy procedure.
 /// Character dummy procedure have special ABI constraints.
 static constexpr llvm::StringRef getCharacterProcedureDummyAttrName() {
diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp
index 41597c1b15386e..968fba7a66044a 100644
--- a/flang/lib/Lower/CallInterface.cpp
+++ b/flang/lib/Lower/CallInterface.cpp
@@ -524,6 +524,43 @@ static void addSymbolAttribute(mlir::func::FuncOp func,
                 mlir::StringAttr::get(&mlirContext, name));
 }
 
+static void
+setCUDAAttributes(mlir::func::FuncOp func,
+                  const Fortran::semantics::Symbol *sym,
+                  std::optional<Fortran::evaluate::characteristics::Procedure>
+                      characteristic) {
+  if (characteristic && characteristic->cudaSubprogramAttrs) {
+    func.getOperation()->setAttr(
+        fir::getCUDAAttrName(),
+        fir::getCUDAProcAttribute(func.getContext(),
+                                  *characteristic->cudaSubprogramAttrs));
+  }
+
+  if (sym) {
+    if (auto details =
+            sym->GetUltimate()
+                .detailsIf<Fortran::semantics::SubprogramDetails>()) {
+      if (!details->cudaLaunchBounds().empty()) {
+        assert(details->cudaLaunchBounds().size() >= 2 &&
+               "expect at least 2 values");
+        mlir::Type i64Ty = mlir::IntegerType::get(func.getContext(), 64);
+        auto maxTBPAttr =
+            mlir::IntegerAttr::get(i64Ty, details->cudaLaunchBounds()[0]);
+        auto minBPMAttr =
+            mlir::IntegerAttr::get(i64Ty, details->cudaLaunchBounds()[1]);
+        mlir::IntegerAttr ubAttr;
+        if (details->cudaLaunchBounds().size() > 2)
+          ubAttr =
+              mlir::IntegerAttr::get(i64Ty, details->cudaLaunchBounds()[2]);
+        func.getOperation()->setAttr(
+            fir::getCUDALaunchBoundsAttrName(),
+            fir::CUDALaunchBoundsAttr::get(func.getContext(), maxTBPAttr,
+                                           minBPMAttr, ubAttr));
+      }
+    }
+  }
+}
+
 /// Declare drives the different actions to be performed while analyzing the
 /// signature and building/finding the mlir::func::FuncOp.
 template <typename T>
@@ -559,12 +596,8 @@ void Fortran::lower::CallInterface<T>::declare() {
         if (!placeHolder.value().attributes.empty())
           func.setArgAttrs(placeHolder.index(), placeHolder.value().attributes);
       side().setFuncAttrs(func);
-    }
-    if (characteristic && characteristic->cudaSubprogramAttrs) {
-      func.getOperation()->setAttr(
-          fir::getCUDAAttrName(),
-          fir::getCUDAProcAttribute(func.getContext(),
-                                    *characteristic->cudaSubprogramAttrs));
+
+      setCUDAAttributes(func, side().getProcedureSymbol(), characteristic);
     }
   }
 }
diff --git a/flang/lib/Optimizer/Dialect/FIRAttr.cpp b/flang/lib/Optimizer/Dialect/FIRAttr.cpp
index 8df7a6c5cfc5d5..8d780e03dcbe73 100644
--- a/flang/lib/Optimizer/Dialect/FIRAttr.cpp
+++ b/flang/lib/Optimizer/Dialect/FIRAttr.cpp
@@ -298,5 +298,6 @@ void fir::printFirAttribute(FIROpsDialect *dialect, mlir::Attribute attr,
 void FIROpsDialect::registerAttributes() {
   addAttributes<ClosedIntervalAttr, ExactTypeAttr, FortranVariableFlagsAttr,
                 LowerBoundAttr, PointIntervalAttr, RealAttr, SubclassAttr,
-                UpperBoundAttr, CUDADataAttributeAttr, CUDAProcAttributeAttr>();
+                UpperBoundAttr, CUDADataAttributeAttr, CUDAProcAttributeAttr,
+                CUDALaunchBoundsAttr>();
 }
diff --git a/flang/test/Lower/CUDA/cuda-proc-attribute.cuf b/flang/test/Lower/CUDA/cuda-proc-attribute.cuf
index 050731086d8525..ade38d4b7f0917 100644
--- a/flang/test/Lower/CUDA/cuda-proc-attribute.cuf
+++ b/flang/test/Lower/CUDA/cuda-proc-attribute.cuf
@@ -32,3 +32,9 @@ attributes(host) attributes(device) integer function fct_host_device; end
 
 attributes(device) attributes(host) integer function fct_device_host; end
 ! CHECK: func.func @_QPfct_device_host() -> i32 attributes {fir.cuda_attr = #fir.cuda_proc<host_device>}
+
+attributes(global) launch_bounds(1, 2) subroutine sub_lbounds1(); end
+! CHECK: func.func @_QPsub_lbounds1() attributes {fir.cuda_attr = #fir.cuda_proc<global>, fir.cuda_launch_bounds = #fir.launch_bounds<maxTBP = 1 : i64, minBPM = 2 : i64>}
+
+attributes(global) launch_bounds(1, 2, 3) subroutine sub_lbounds2(); end
+! CHECK: func.func @_QPsub_lbounds2() attributes {fir.cuda_attr = #fir.cuda_proc<global>, fir.cuda_launch_bounds = #fir.launch_bounds<maxTBP = 1 : i64, minBPM = 2 : i64, upperBoundClusterSize = 3 : i64>}

>From dd9a29a870f50b38d1261c520a2376b748cd13f2 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 12 Feb 2024 20:57:51 -0800
Subject: [PATCH 2/2] Fix type with maxTPB

---
 flang/include/flang/Optimizer/Dialect/FIRAttr.td | 2 +-
 flang/lib/Lower/CallInterface.cpp                | 4 ++--
 flang/test/Lower/CUDA/cuda-proc-attribute.cuf    | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/flang/include/flang/Optimizer/Dialect/FIRAttr.td b/flang/include/flang/Optimizer/Dialect/FIRAttr.td
index d55e93ba666e71..3602c67de1412a 100644
--- a/flang/include/flang/Optimizer/Dialect/FIRAttr.td
+++ b/flang/include/flang/Optimizer/Dialect/FIRAttr.td
@@ -117,7 +117,7 @@ def fir_CUDALaunchBoundsAttr : fir_Attr<"CUDALaunchBounds"> {
   let mnemonic = "launch_bounds";
 
   let parameters = (ins
-    "mlir::IntegerAttr":$maxTBP,
+    "mlir::IntegerAttr":$maxTPB,
     "mlir::IntegerAttr":$minBPM,
     OptionalParameter<"mlir::IntegerAttr">:$upperBoundClusterSize
   );
diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp
index 968fba7a66044a..f990e0b7ce4dcf 100644
--- a/flang/lib/Lower/CallInterface.cpp
+++ b/flang/lib/Lower/CallInterface.cpp
@@ -544,7 +544,7 @@ setCUDAAttributes(mlir::func::FuncOp func,
         assert(details->cudaLaunchBounds().size() >= 2 &&
                "expect at least 2 values");
         mlir::Type i64Ty = mlir::IntegerType::get(func.getContext(), 64);
-        auto maxTBPAttr =
+        auto maxTPBAttr =
             mlir::IntegerAttr::get(i64Ty, details->cudaLaunchBounds()[0]);
         auto minBPMAttr =
             mlir::IntegerAttr::get(i64Ty, details->cudaLaunchBounds()[1]);
@@ -554,7 +554,7 @@ setCUDAAttributes(mlir::func::FuncOp func,
               mlir::IntegerAttr::get(i64Ty, details->cudaLaunchBounds()[2]);
         func.getOperation()->setAttr(
             fir::getCUDALaunchBoundsAttrName(),
-            fir::CUDALaunchBoundsAttr::get(func.getContext(), maxTBPAttr,
+            fir::CUDALaunchBoundsAttr::get(func.getContext(), maxTPBAttr,
                                            minBPMAttr, ubAttr));
       }
     }
diff --git a/flang/test/Lower/CUDA/cuda-proc-attribute.cuf b/flang/test/Lower/CUDA/cuda-proc-attribute.cuf
index ade38d4b7f0917..9eb2b85aaf0b83 100644
--- a/flang/test/Lower/CUDA/cuda-proc-attribute.cuf
+++ b/flang/test/Lower/CUDA/cuda-proc-attribute.cuf
@@ -34,7 +34,7 @@ attributes(device) attributes(host) integer function fct_device_host; end
 ! CHECK: func.func @_QPfct_device_host() -> i32 attributes {fir.cuda_attr = #fir.cuda_proc<host_device>}
 
 attributes(global) launch_bounds(1, 2) subroutine sub_lbounds1(); end
-! CHECK: func.func @_QPsub_lbounds1() attributes {fir.cuda_attr = #fir.cuda_proc<global>, fir.cuda_launch_bounds = #fir.launch_bounds<maxTBP = 1 : i64, minBPM = 2 : i64>}
+! CHECK: func.func @_QPsub_lbounds1() attributes {fir.cuda_attr = #fir.cuda_proc<global>, fir.cuda_launch_bounds = #fir.launch_bounds<maxTPB = 1 : i64, minBPM = 2 : i64>}
 
 attributes(global) launch_bounds(1, 2, 3) subroutine sub_lbounds2(); end
-! CHECK: func.func @_QPsub_lbounds2() attributes {fir.cuda_attr = #fir.cuda_proc<global>, fir.cuda_launch_bounds = #fir.launch_bounds<maxTBP = 1 : i64, minBPM = 2 : i64, upperBoundClusterSize = 3 : i64>}
+! CHECK: func.func @_QPsub_lbounds2() attributes {fir.cuda_attr = #fir.cuda_proc<global>, fir.cuda_launch_bounds = #fir.launch_bounds<maxTPB = 1 : i64, minBPM = 2 : i64, upperBoundClusterSize = 3 : i64>}



More information about the flang-commits mailing list