[flang-commits] [flang] [flang][cuda] Prefer valid intrinsics over host-only generics in device code (PR #205376)

Zhen Wang via flang-commits flang-commits at lists.llvm.org
Tue Jun 23 09:30:13 PDT 2026


https://github.com/wangzpgi created https://github.com/llvm/llvm-project/pull/205376

In CUDA device code, a host-only generic can shadow an intrinsic with the same name and cause a valid call such as maxloc(a, 1) to be rejected as non-device-callable. Retry intrinsic resolution for host-only generic function calls in device context, preserving normal diagnostics when no valid intrinsic matches.

>From 9ffc51c9ea55b686d5c38d6039465fe687e0c2e0 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Mon, 22 Jun 2026 11:19:36 -0700
Subject: [PATCH 1/2] Fix CUDA device calls to prefer valid intrinsics over
 host-only generic wrappers

---
 flang/lib/Semantics/expression.cpp | 37 +++++++++++++
 flang/test/Semantics/cuf29.cuf     | 83 ++++++++++++++++++++++++++++++
 2 files changed, 120 insertions(+)
 create mode 100644 flang/test/Semantics/cuf29.cuf

diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp
index 6c0a21cc769c1..a0215ebf36195 100644
--- a/flang/lib/Semantics/expression.cpp
+++ b/flang/lib/Semantics/expression.cpp
@@ -3067,6 +3067,31 @@ const Symbol *ExpressionAnalyzer::ResolveForward(const Symbol &symbol) {
 
 // Resolve a call to a generic procedure with given actual arguments.
 // adjustActuals is called on procedure bindings to handle pass arg.
+static bool IsCUDADeviceCallable(const Symbol &symbol) {
+  const auto *subprogram{
+      symbol.GetUltimate().detailsIf<semantics::SubprogramDetails>()};
+  if (!subprogram) {
+    return false;
+  }
+  auto attrs{subprogram->cudaSubprogramAttrs()};
+  return attrs &&
+      (*attrs == common::CUDASubprogramAttrs::Device ||
+          *attrs == common::CUDASubprogramAttrs::HostDevice);
+}
+
+static bool IsCudaDeviceIntrinsicShadowedByHostProcedure(
+    const parser::CharBlock &callSite, semantics::SemanticsContext &context,
+    const Symbol *resolution) {
+  if (!resolution || !IsProcedure(*resolution) ||
+      resolution->attrs().test(semantics::Attr::INTRINSIC) ||
+      !semantics::FindCUDADeviceContext(&context.FindScope(callSite))) {
+    return false;
+  }
+  // Keep use-associated names visible in device code, but do not let a
+  // host-only procedure hide a valid intrinsic with the same generic name.
+  return !IsCUDADeviceCallable(*resolution);
+}
+
 auto ExpressionAnalyzer::ResolveGeneric(const Symbol &symbol,
     const ActualArguments &actuals, const AdjustActuals &adjustActuals,
     bool isSubroutine, SymbolVector &&tried, bool mightBeStructureConstructor)
@@ -3320,6 +3345,18 @@ auto ExpressionAnalyzer::GetCalleeAndArguments(const parser::Name &name,
     resolution = result.specific;
     dueToAmbiguity = result.failedDueToAmbiguity;
     tried = std::move(result.tried);
+    if (IsCudaDeviceIntrinsicShadowedByHostProcedure(
+            name.source, context_, resolution)) {
+      ActualArguments localArguments{arguments};
+      if (std::optional<SpecificCall> specificCall{context_.intrinsics().Probe(
+              CallCharacteristics{symbol->name().ToString(), isSubroutine},
+              localArguments, GetFoldingContext())}) {
+        CheckBadExplicitType(*specificCall, *symbol);
+        return CalleeAndArguments{
+            ProcedureDesignator{std::move(specificCall->specificIntrinsic)},
+            std::move(specificCall->arguments)};
+      }
+    }
     if (resolution) {
       if (context_.GetPPCBuiltinsScope() &&
           resolution->name().ToString().rfind("__ppc_", 0) == 0) {
diff --git a/flang/test/Semantics/cuf29.cuf b/flang/test/Semantics/cuf29.cuf
new file mode 100644
index 0000000000000..cf8280c0db35e
--- /dev/null
+++ b/flang/test/Semantics/cuf29.cuf
@@ -0,0 +1,83 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1
+
+! Host-side generic wrappers can share names with intrinsics. In device code,
+! intrinsic resolution must still win when the host wrapper is not
+! device-callable.
+module host_reduction_wrappers
+  interface sum
+    module procedure fake_sum
+  end interface
+  interface maxval
+    module procedure fake_maxval
+  end interface
+  interface minval
+    module procedure fake_minval
+  end interface
+  interface maxloc
+    module procedure fake_maxloc
+  end interface
+  interface minloc
+    module procedure fake_minloc
+  end interface
+  interface host_wrapper
+    module procedure fake_host_wrapper
+  end interface
+contains
+  function fake_sum(array) result(res)
+    real(8) :: array(:)
+    real(8) :: res
+  end function
+  function fake_maxval(array) result(res)
+    real(8) :: array(:)
+    real(8) :: res
+  end function
+  function fake_minval(array) result(res)
+    real(8) :: array(:)
+    real(8) :: res
+  end function
+  function fake_maxloc(array, dim) result(res)
+    real(8) :: array(:)
+    integer :: dim
+    integer :: res
+  end function
+  function fake_minloc(array, dim) result(res)
+    real(8) :: array(:)
+    integer :: dim
+    integer :: res
+  end function
+  function fake_host_wrapper(array) result(res)
+    real(8) :: array(:)
+    real(8) :: res
+  end function
+end module
+
+module test
+  use host_reduction_wrappers
+contains
+  attributes(global) subroutine reduction_intrinsics(a, locs, vals)
+    real(8), intent(in) :: a(3)
+    integer, intent(out) :: locs(2)
+    real(8), intent(out) :: vals(3)
+    real(8) :: local(3)
+
+    local = a
+    locs(1) = maxloc(local, 1)
+    locs(2) = minloc(local, 1)
+    vals(1) = sum(local)
+    vals(2) = maxval(local)
+    vals(3) = minval(local)
+    !ERROR: 'fake_host_wrapper' may not be called in device code
+    vals(1) = host_wrapper(local)
+  end subroutine
+end module
+
+module renamed_test
+  use host_reduction_wrappers, only: not_maxloc => maxloc
+contains
+  attributes(global) subroutine renamed_wrapper(a, loc)
+    real(8), intent(in) :: a(3)
+    integer, intent(out) :: loc
+    !ERROR: 'fake_maxloc' may not be called in device code
+    loc = not_maxloc(a, 1)
+  end subroutine
+end module

>From f167d9acb9486565b8d62951be25387848525fb5 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Tue, 23 Jun 2026 09:23:54 -0700
Subject: [PATCH 2/2] change test

---
 flang/lib/Semantics/expression.cpp |  8 ++++----
 flang/test/Semantics/cuf29.cuf     | 13 +------------
 2 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp
index a0215ebf36195..04f2f63e78dfd 100644
--- a/flang/lib/Semantics/expression.cpp
+++ b/flang/lib/Semantics/expression.cpp
@@ -3081,8 +3081,8 @@ static bool IsCUDADeviceCallable(const Symbol &symbol) {
 
 static bool IsCudaDeviceIntrinsicShadowedByHostProcedure(
     const parser::CharBlock &callSite, semantics::SemanticsContext &context,
-    const Symbol *resolution) {
-  if (!resolution || !IsProcedure(*resolution) ||
+    const Symbol *resolution, bool isSubroutine) {
+  if (isSubroutine || !resolution || !IsProcedure(*resolution) ||
       resolution->attrs().test(semantics::Attr::INTRINSIC) ||
       !semantics::FindCUDADeviceContext(&context.FindScope(callSite))) {
     return false;
@@ -3346,10 +3346,10 @@ auto ExpressionAnalyzer::GetCalleeAndArguments(const parser::Name &name,
     dueToAmbiguity = result.failedDueToAmbiguity;
     tried = std::move(result.tried);
     if (IsCudaDeviceIntrinsicShadowedByHostProcedure(
-            name.source, context_, resolution)) {
+            name.source, context_, resolution, isSubroutine)) {
       ActualArguments localArguments{arguments};
       if (std::optional<SpecificCall> specificCall{context_.intrinsics().Probe(
-              CallCharacteristics{symbol->name().ToString(), isSubroutine},
+              CallCharacteristics{name.source.ToString(), isSubroutine},
               localArguments, GetFoldingContext())}) {
         CheckBadExplicitType(*specificCall, *symbol);
         return CalleeAndArguments{
diff --git a/flang/test/Semantics/cuf29.cuf b/flang/test/Semantics/cuf29.cuf
index cf8280c0db35e..1a89ebada6e90 100644
--- a/flang/test/Semantics/cuf29.cuf
+++ b/flang/test/Semantics/cuf29.cuf
@@ -66,18 +66,7 @@ contains
     vals(1) = sum(local)
     vals(2) = maxval(local)
     vals(3) = minval(local)
-    !ERROR: 'fake_host_wrapper' may not be called in device code
+    !ERROR: No specific function of generic 'host_wrapper' matches the actual arguments
     vals(1) = host_wrapper(local)
   end subroutine
 end module
-
-module renamed_test
-  use host_reduction_wrappers, only: not_maxloc => maxloc
-contains
-  attributes(global) subroutine renamed_wrapper(a, loc)
-    real(8), intent(in) :: a(3)
-    integer, intent(out) :: loc
-    !ERROR: 'fake_maxloc' may not be called in device code
-    loc = not_maxloc(a, 1)
-  end subroutine
-end module



More information about the flang-commits mailing list