[flang-commits] [flang] [flang][openacc] add acc.routine op for external names added in bind clauses. (PR #205591)

Andre Kuhlenschmidt via flang-commits flang-commits at lists.llvm.org
Wed Jun 24 09:21:28 PDT 2026


https://github.com/akuhlens created https://github.com/llvm/llvm-project/pull/205591

This adds acc.routine ops for the func.func ops that declare external functions bound for device specific. This is needed to get the ACCRoutineToGPUFunc pass to move the function declaration into the correct region.

>From 40d03509e7fe50b240c3b711519f6e4d7d799779 Mon Sep 17 00:00:00 2001
From: Andre Kuhlenschmidt <akuhlenschmi at nvidia.com>
Date: Tue, 23 Jun 2026 09:39:35 -0700
Subject: [PATCH 1/4] [flang][acc] Add routine metadata for bind targets

Materialize acc.routine metadata for acc routine bind targets that do not already carry routine info. This lets downstream OpenACC routine lowering recognize bound calls while preserving explicitly declared target routine clauses.
---
 flang/lib/Lower/OpenACC.cpp | 45 ++++++++++++++++++++++++++++++++-----
 1 file changed, 39 insertions(+), 6 deletions(-)

diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 5fbc678e6d0ae..5ab3718d6b724 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -4687,7 +4687,15 @@ void Fortran::lower::materializeOpenACCRoutineBindTargets(
           ? converter.getMLIRSymbolTable()
           : nullptr;
 
-  for (mlir::acc::RoutineOp routineOp : module.getOps<mlir::acc::RoutineOp>()) {
+  auto appendAttrs = [](mlir::ArrayAttr attrs,
+                        llvm::SmallVector<mlir::Attribute> &out) {
+    if (attrs)
+      out.append(attrs.begin(), attrs.end());
+  };
+
+  llvm::SmallVector<mlir::acc::RoutineOp> routineOps(
+      module.getOps<mlir::acc::RoutineOp>());
+  for (mlir::acc::RoutineOp routineOp : routineOps) {
     // bind renames the same callable, so clone the decorated routine's type.
     mlir::func::FuncOp decorated = fir::FirOpBuilder::getNamedFunction(
         module, symbolTable, routineOp.getFuncName());
@@ -4696,20 +4704,45 @@ void Fortran::lower::materializeOpenACCRoutineBindTargets(
                   : mlir::FunctionType::get(builder.getContext(), {}, {});
 
     auto declare = [&](llvm::StringRef name) {
-      if (!fir::FirOpBuilder::getNamedFunction(module, symbolTable, name))
-        fir::FirOpBuilder::createFunction(builder.getUnknownLoc(), module, name,
-                                          type, symbolTable);
+      if (mlir::func::FuncOp func =
+              fir::FirOpBuilder::getNamedFunction(module, symbolTable, name))
+        return func;
+      return fir::FirOpBuilder::createFunction(builder.getUnknownLoc(), module,
+                                               name, type, symbolTable);
+    };
+
+    auto createRoutineForBindTarget = [&](mlir::func::FuncOp target) {
+      if (target->hasAttr(mlir::acc::getRoutineInfoAttrName()))
+        return;
+
+      llvm::SmallVector<mlir::Attribute> emptyBindIdNames, emptyBindStrNames,
+          emptyBindIdNameDeviceTypes, emptyBindStrNameDeviceTypes,
+          gangDeviceTypes, gangDimValues, gangDimDeviceTypes, seqDeviceTypes,
+          workerDeviceTypes, vectorDeviceTypes;
+      appendAttrs(routineOp.getGangAttr(), gangDeviceTypes);
+      appendAttrs(routineOp.getGangDimAttr(), gangDimValues);
+      appendAttrs(routineOp.getGangDimDeviceTypeAttr(), gangDimDeviceTypes);
+      appendAttrs(routineOp.getSeqAttr(), seqDeviceTypes);
+      appendAttrs(routineOp.getWorkerAttr(), workerDeviceTypes);
+      appendAttrs(routineOp.getVectorAttr(), vectorDeviceTypes);
+
+      createOpenACCRoutineConstruct(
+          converter, routineOp.getLoc(), module, target, target.getName().str(),
+          routineOp.getNohost(), emptyBindIdNames, emptyBindStrNames,
+          emptyBindIdNameDeviceTypes, emptyBindStrNameDeviceTypes,
+          gangDeviceTypes, gangDimValues, gangDimDeviceTypes, seqDeviceTypes,
+          workerDeviceTypes, vectorDeviceTypes);
     };
 
     // bind(identifier) is mangled; bind("string") is a verbatim asm name.
     if (mlir::ArrayAttr binds = routineOp.getBindIdNameAttr())
       for (mlir::Attribute bind : binds)
         if (auto symRef = mlir::dyn_cast<mlir::SymbolRefAttr>(bind))
-          declare(symRef.getLeafReference());
+          createRoutineForBindTarget(declare(symRef.getLeafReference()));
     if (mlir::ArrayAttr binds = routineOp.getBindStrNameAttr())
       for (mlir::Attribute bind : binds)
         if (auto strAttr = mlir::dyn_cast<mlir::StringAttr>(bind))
-          declare(strAttr.getValue());
+          createRoutineForBindTarget(declare(strAttr.getValue()));
   }
 }
 

>From 736c9f1e06d0bfa0f3795fac3387b978ec779d54 Mon Sep 17 00:00:00 2001
From: Andre Kuhlenschmidt <akuhlenschmi at nvidia.com>
Date: Tue, 23 Jun 2026 12:00:59 -0700
Subject: [PATCH 2/4] [OpenACC] Filter bind target routine clauses by device
 type

When materializing undeclared acc routine bind targets, copy only the routine clauses that apply to each target's bind device_type. This prevents a target for one device type from inheriting parallelism metadata meant for another bind target.

Add a lowering regression test with separate nvidia and multicore bind targets.
---
 flang/lib/Lower/OpenACC.cpp                   | 150 ++++++++++++++----
 .../acc-routine-bind-devtype-filter.f90       |  21 +++
 2 files changed, 142 insertions(+), 29 deletions(-)
 create mode 100644 flang/test/Lower/OpenACC/acc-routine-bind-devtype-filter.f90

diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 5ab3718d6b724..5809fd5f87743 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -4687,12 +4687,58 @@ void Fortran::lower::materializeOpenACCRoutineBindTargets(
           ? converter.getMLIRSymbolTable()
           : nullptr;
 
-  auto appendAttrs = [](mlir::ArrayAttr attrs,
-                        llvm::SmallVector<mlir::Attribute> &out) {
-    if (attrs)
-      out.append(attrs.begin(), attrs.end());
+  mlir::Attribute defaultDeviceTypeAttr = mlir::acc::DeviceTypeAttr::get(
+      builder.getContext(), mlir::acc::DeviceType::None);
+
+  auto getDeviceType = [](mlir::Attribute attr) -> mlir::acc::DeviceType {
+    return mlir::cast<mlir::acc::DeviceTypeAttr>(attr).getValue();
+  };
+
+  auto hasDeviceType = [&](llvm::ArrayRef<mlir::Attribute> deviceTypes,
+                           mlir::Attribute deviceType) {
+    mlir::acc::DeviceType value = getDeviceType(deviceType);
+    return llvm::any_of(deviceTypes, [&](mlir::Attribute attr) {
+      return getDeviceType(attr) == value;
+    });
   };
 
+  auto deviceTypeAppliesToBindTarget =
+      [&](mlir::Attribute deviceType,
+          llvm::ArrayRef<mlir::Attribute> targetDeviceTypes) {
+        // Clauses without an explicit device_type are the default routine
+        // clauses used as a fallback by later routine lowering.
+        return getDeviceType(deviceType) == mlir::acc::DeviceType::None ||
+               hasDeviceType(targetDeviceTypes, deviceType);
+      };
+
+  auto appendMatchingDeviceTypes =
+      [&](mlir::ArrayAttr attrs, llvm::ArrayRef<mlir::Attribute> deviceTypes,
+          llvm::SmallVector<mlir::Attribute> &out) {
+        if (!attrs)
+          return;
+        for (mlir::Attribute attr : attrs)
+          if (deviceTypeAppliesToBindTarget(attr, deviceTypes))
+            out.push_back(attr);
+      };
+
+  auto appendMatchingAttrs =
+      [&](mlir::ArrayAttr attrs, mlir::ArrayAttr attrDeviceTypes,
+          llvm::ArrayRef<mlir::Attribute> deviceTypes,
+          llvm::SmallVector<mlir::Attribute> &outAttrs,
+          llvm::SmallVector<mlir::Attribute> &outDeviceTypes) {
+        if (!attrs || !attrDeviceTypes)
+          return;
+        assert(attrs.size() == attrDeviceTypes.size() &&
+               "expect same number of attributes");
+        for (auto it : llvm::enumerate(attrDeviceTypes)) {
+          mlir::Attribute deviceType = it.value();
+          if (!deviceTypeAppliesToBindTarget(deviceType, deviceTypes))
+            continue;
+          outAttrs.push_back(attrs[it.index()]);
+          outDeviceTypes.push_back(deviceType);
+        }
+      };
+
   llvm::SmallVector<mlir::acc::RoutineOp> routineOps(
       module.getOps<mlir::acc::RoutineOp>());
   for (mlir::acc::RoutineOp routineOp : routineOps) {
@@ -4711,38 +4757,84 @@ void Fortran::lower::materializeOpenACCRoutineBindTargets(
                                                name, type, symbolTable);
     };
 
-    auto createRoutineForBindTarget = [&](mlir::func::FuncOp target) {
-      if (target->hasAttr(mlir::acc::getRoutineInfoAttrName()))
+    auto createRoutineForBindTarget =
+        [&](mlir::func::FuncOp target,
+            llvm::ArrayRef<mlir::Attribute> bindTargetDeviceTypes) {
+          if (target->hasAttr(mlir::acc::getRoutineInfoAttrName()))
+            return;
+
+          llvm::SmallVector<mlir::Attribute> emptyBindIdNames,
+              emptyBindStrNames, emptyBindIdNameDeviceTypes,
+              emptyBindStrNameDeviceTypes, gangDeviceTypes, gangDimValues,
+              gangDimDeviceTypes, seqDeviceTypes, workerDeviceTypes,
+              vectorDeviceTypes;
+          appendMatchingDeviceTypes(routineOp.getGangAttr(),
+                                    bindTargetDeviceTypes, gangDeviceTypes);
+          appendMatchingAttrs(
+              routineOp.getGangDimAttr(), routineOp.getGangDimDeviceTypeAttr(),
+              bindTargetDeviceTypes, gangDimValues, gangDimDeviceTypes);
+          appendMatchingDeviceTypes(routineOp.getSeqAttr(),
+                                    bindTargetDeviceTypes, seqDeviceTypes);
+          appendMatchingDeviceTypes(routineOp.getWorkerAttr(),
+                                    bindTargetDeviceTypes, workerDeviceTypes);
+          appendMatchingDeviceTypes(routineOp.getVectorAttr(),
+                                    bindTargetDeviceTypes, vectorDeviceTypes);
+
+          createOpenACCRoutineConstruct(
+              converter, routineOp.getLoc(), module, target,
+              target.getName().str(), routineOp.getNohost(), emptyBindIdNames,
+              emptyBindStrNames, emptyBindIdNameDeviceTypes,
+              emptyBindStrNameDeviceTypes, gangDeviceTypes, gangDimValues,
+              gangDimDeviceTypes, seqDeviceTypes, workerDeviceTypes,
+              vectorDeviceTypes);
+        };
+
+    struct BindTarget {
+      mlir::func::FuncOp target;
+      llvm::SmallVector<mlir::Attribute> deviceTypes;
+    };
+    llvm::SmallVector<BindTarget> bindTargets;
+
+    auto addBindTarget = [&](mlir::func::FuncOp target,
+                             mlir::Attribute deviceType) {
+      for (BindTarget &bindTarget : bindTargets) {
+        if (bindTarget.target.getOperation() != target.getOperation())
+          continue;
+        if (!hasDeviceType(bindTarget.deviceTypes, deviceType))
+          bindTarget.deviceTypes.push_back(deviceType);
         return;
+      }
+      bindTargets.push_back({target, {deviceType}});
+    };
 
-      llvm::SmallVector<mlir::Attribute> emptyBindIdNames, emptyBindStrNames,
-          emptyBindIdNameDeviceTypes, emptyBindStrNameDeviceTypes,
-          gangDeviceTypes, gangDimValues, gangDimDeviceTypes, seqDeviceTypes,
-          workerDeviceTypes, vectorDeviceTypes;
-      appendAttrs(routineOp.getGangAttr(), gangDeviceTypes);
-      appendAttrs(routineOp.getGangDimAttr(), gangDimValues);
-      appendAttrs(routineOp.getGangDimDeviceTypeAttr(), gangDimDeviceTypes);
-      appendAttrs(routineOp.getSeqAttr(), seqDeviceTypes);
-      appendAttrs(routineOp.getWorkerAttr(), workerDeviceTypes);
-      appendAttrs(routineOp.getVectorAttr(), vectorDeviceTypes);
-
-      createOpenACCRoutineConstruct(
-          converter, routineOp.getLoc(), module, target, target.getName().str(),
-          routineOp.getNohost(), emptyBindIdNames, emptyBindStrNames,
-          emptyBindIdNameDeviceTypes, emptyBindStrNameDeviceTypes,
-          gangDeviceTypes, gangDimValues, gangDimDeviceTypes, seqDeviceTypes,
-          workerDeviceTypes, vectorDeviceTypes);
+    auto getBindDeviceType = [&](mlir::ArrayAttr deviceTypes,
+                                 unsigned index) -> mlir::Attribute {
+      if (deviceTypes) {
+        assert(index < deviceTypes.size() &&
+               "expect bind name and device_type arrays to match");
+        return deviceTypes[index];
+      }
+      return defaultDeviceTypeAttr;
     };
 
     // bind(identifier) is mangled; bind("string") is a verbatim asm name.
     if (mlir::ArrayAttr binds = routineOp.getBindIdNameAttr())
-      for (mlir::Attribute bind : binds)
-        if (auto symRef = mlir::dyn_cast<mlir::SymbolRefAttr>(bind))
-          createRoutineForBindTarget(declare(symRef.getLeafReference()));
+      for (auto bind : llvm::enumerate(binds))
+        if (auto symRef = mlir::dyn_cast<mlir::SymbolRefAttr>(bind.value()))
+          addBindTarget(
+              declare(symRef.getLeafReference()),
+              getBindDeviceType(routineOp.getBindIdNameDeviceTypeAttr(),
+                                bind.index()));
     if (mlir::ArrayAttr binds = routineOp.getBindStrNameAttr())
-      for (mlir::Attribute bind : binds)
-        if (auto strAttr = mlir::dyn_cast<mlir::StringAttr>(bind))
-          createRoutineForBindTarget(declare(strAttr.getValue()));
+      for (auto bind : llvm::enumerate(binds))
+        if (auto strAttr = mlir::dyn_cast<mlir::StringAttr>(bind.value()))
+          addBindTarget(
+              declare(strAttr.getValue()),
+              getBindDeviceType(routineOp.getBindStrNameDeviceTypeAttr(),
+                                bind.index()));
+
+    for (BindTarget &bindTarget : bindTargets)
+      createRoutineForBindTarget(bindTarget.target, bindTarget.deviceTypes);
   }
 }
 
diff --git a/flang/test/Lower/OpenACC/acc-routine-bind-devtype-filter.f90 b/flang/test/Lower/OpenACC/acc-routine-bind-devtype-filter.f90
new file mode 100644
index 0000000000000..ae97d34973a5b
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-routine-bind-devtype-filter.f90
@@ -0,0 +1,21 @@
+! Device_type-specific acc routine bind targets inherit only the clauses for
+! the bind target's own device type.
+
+! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
+
+subroutine s_bind_devtype_filter(n, x)
+  integer :: n, i
+  real :: x(n)
+  !$acc routine(foo) device_type(nvidia) vector bind(foo_n) device_type(multicore) worker bind(foo_m)
+  external :: foo
+  !$acc parallel loop
+  do i = 1, n
+    call foo(x(i))
+  end do
+end subroutine
+
+! CHECK-DAG: acc.routine @{{.*}} func(@_QPfoo) bind(@_QPfoo_n [#acc.device_type<nvidia>], @_QPfoo_m [#acc.device_type<multicore>]) worker ([#acc.device_type<multicore>]) vector ([#acc.device_type<nvidia>])
+! CHECK-DAG: acc.routine @{{.*}} func(@_QPfoo_n) vector ([#acc.device_type<nvidia>]){{$}}
+! CHECK-DAG: acc.routine @{{.*}} func(@_QPfoo_m) worker ([#acc.device_type<multicore>]){{$}}
+! CHECK-DAG: func.func private @_QPfoo_n
+! CHECK-DAG: func.func private @_QPfoo_m

>From b4da9fe73f8b281ff4d84948ee9d0dbeb12807cc Mon Sep 17 00:00:00 2001
From: Andre Kuhlenschmidt <akuhlenschmi at nvidia.com>
Date: Tue, 23 Jun 2026 13:12:16 -0700
Subject: [PATCH 3/4] making test more strict

---
 .../Lower/OpenACC/acc-routine-bind-devtype-filter.f90     | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/flang/test/Lower/OpenACC/acc-routine-bind-devtype-filter.f90 b/flang/test/Lower/OpenACC/acc-routine-bind-devtype-filter.f90
index ae97d34973a5b..0ba9ee5392249 100644
--- a/flang/test/Lower/OpenACC/acc-routine-bind-devtype-filter.f90
+++ b/flang/test/Lower/OpenACC/acc-routine-bind-devtype-filter.f90
@@ -15,7 +15,7 @@ subroutine s_bind_devtype_filter(n, x)
 end subroutine
 
 ! CHECK-DAG: acc.routine @{{.*}} func(@_QPfoo) bind(@_QPfoo_n [#acc.device_type<nvidia>], @_QPfoo_m [#acc.device_type<multicore>]) worker ([#acc.device_type<multicore>]) vector ([#acc.device_type<nvidia>])
-! CHECK-DAG: acc.routine @{{.*}} func(@_QPfoo_n) vector ([#acc.device_type<nvidia>]){{$}}
-! CHECK-DAG: acc.routine @{{.*}} func(@_QPfoo_m) worker ([#acc.device_type<multicore>]){{$}}
-! CHECK-DAG: func.func private @_QPfoo_n
-! CHECK-DAG: func.func private @_QPfoo_m
+! CHECK-DAG: acc.routine @[[FOO_N_ROUTINE:.*]] func(@_QPfoo_n) vector ([#acc.device_type<nvidia>]){{$}}
+! CHECK-DAG: acc.routine @[[FOO_M_ROUTINE:.*]] func(@_QPfoo_m) worker ([#acc.device_type<multicore>]){{$}}
+! CHECK-DAG: func.func private @_QPfoo_n({{.*}}) attributes {acc.routine_info = #acc.routine_info<[@[[FOO_N_ROUTINE]]]>}
+! CHECK-DAG: func.func private @_QPfoo_m({{.*}}) attributes {acc.routine_info = #acc.routine_info<[@[[FOO_M_ROUTINE]]]>}

>From abc016e0000435307f0cc242854ea57289390feb Mon Sep 17 00:00:00 2001
From: Andre Kuhlenschmidt <akuhlenschmi at nvidia.com>
Date: Tue, 23 Jun 2026 15:39:40 -0700
Subject: [PATCH 4/4] add test case

---
 .../acc-routine-bind-devtype-filter.f90       | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/flang/test/Lower/OpenACC/acc-routine-bind-devtype-filter.f90 b/flang/test/Lower/OpenACC/acc-routine-bind-devtype-filter.f90
index 0ba9ee5392249..725dec744b01b 100644
--- a/flang/test/Lower/OpenACC/acc-routine-bind-devtype-filter.f90
+++ b/flang/test/Lower/OpenACC/acc-routine-bind-devtype-filter.f90
@@ -19,3 +19,35 @@ subroutine s_bind_devtype_filter(n, x)
 ! CHECK-DAG: acc.routine @[[FOO_M_ROUTINE:.*]] func(@_QPfoo_m) worker ([#acc.device_type<multicore>]){{$}}
 ! CHECK-DAG: func.func private @_QPfoo_n({{.*}}) attributes {acc.routine_info = #acc.routine_info<[@[[FOO_N_ROUTINE]]]>}
 ! CHECK-DAG: func.func private @_QPfoo_m({{.*}}) attributes {acc.routine_info = #acc.routine_info<[@[[FOO_M_ROUTINE]]]>}
+
+subroutine s_bind_devtype_merged_target(n, x)
+  integer :: n, i
+  real :: x(n)
+  !$acc routine(foo_merge) device_type(nvidia) vector bind(foo_dev) device_type(multicore) worker bind(foo_dev)
+  external :: foo_merge
+  !$acc parallel loop
+  do i = 1, n
+    call foo_merge(x(i))
+  end do
+end subroutine
+
+! CHECK-DAG: acc.routine @{{.*}} func(@_QPfoo_merge) bind(@_QPfoo_dev [#acc.device_type<nvidia>], @_QPfoo_dev [#acc.device_type<multicore>]) worker ([#acc.device_type<multicore>]) vector ([#acc.device_type<nvidia>])
+! CHECK-DAG: acc.routine @[[FOO_DEV_ROUTINE:.*]] func(@_QPfoo_dev) worker ([#acc.device_type<multicore>]) vector ([#acc.device_type<nvidia>]){{$}}
+! CHECK-DAG: func.func private @_QPfoo_dev({{.*}}) attributes {acc.routine_info = #acc.routine_info<[@[[FOO_DEV_ROUTINE]]]>}
+
+subroutine s_bind_before_modality(n, x)
+  integer :: n, i
+  real :: x(n)
+  !$acc routine(bar) device_type(nvidia) bind(bar_n) vector device_type(multicore) bind(bar_m) seq
+  external :: bar
+  !$acc parallel loop
+  do i = 1, n
+    call bar(x(i))
+  end do
+end subroutine
+
+! CHECK-DAG: acc.routine @{{.*}} func(@_QPbar) bind(@_QPbar_n [#acc.device_type<nvidia>], @_QPbar_m [#acc.device_type<multicore>]) vector ([#acc.device_type<nvidia>]) seq ([#acc.device_type<multicore>])
+! CHECK-DAG: acc.routine @[[BAR_N_ROUTINE:.*]] func(@_QPbar_n) vector ([#acc.device_type<nvidia>]){{$}}
+! CHECK-DAG: acc.routine @[[BAR_M_ROUTINE:.*]] func(@_QPbar_m) seq ([#acc.device_type<multicore>]){{$}}
+! CHECK-DAG: func.func private @_QPbar_n({{.*}}) attributes {acc.routine_info = #acc.routine_info<[@[[BAR_N_ROUTINE]]]>}
+! CHECK-DAG: func.func private @_QPbar_m({{.*}}) attributes {acc.routine_info = #acc.routine_info<[@[[BAR_M_ROUTINE]]]>}



More information about the flang-commits mailing list