[flang-commits] [flang] [flang][cuda] Prefer valid intrinsics over host-only generics in device code (PR #205376)
Zhen Wang via flang-commits
flang-commits at lists.llvm.org
Tue Jun 23 09:30:13 PDT 2026
https://github.com/wangzpgi created https://github.com/llvm/llvm-project/pull/205376
In CUDA device code, a host-only generic can shadow an intrinsic with the same name and cause a valid call such as maxloc(a, 1) to be rejected as non-device-callable. Retry intrinsic resolution for host-only generic function calls in device context, preserving normal diagnostics when no valid intrinsic matches.
>From 9ffc51c9ea55b686d5c38d6039465fe687e0c2e0 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Mon, 22 Jun 2026 11:19:36 -0700
Subject: [PATCH 1/2] Fix CUDA device calls to prefer valid intrinsics over
host-only generic wrappers
---
flang/lib/Semantics/expression.cpp | 37 +++++++++++++
flang/test/Semantics/cuf29.cuf | 83 ++++++++++++++++++++++++++++++
2 files changed, 120 insertions(+)
create mode 100644 flang/test/Semantics/cuf29.cuf
diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp
index 6c0a21cc769c1..a0215ebf36195 100644
--- a/flang/lib/Semantics/expression.cpp
+++ b/flang/lib/Semantics/expression.cpp
@@ -3067,6 +3067,31 @@ const Symbol *ExpressionAnalyzer::ResolveForward(const Symbol &symbol) {
// Resolve a call to a generic procedure with given actual arguments.
// adjustActuals is called on procedure bindings to handle pass arg.
+static bool IsCUDADeviceCallable(const Symbol &symbol) {
+ const auto *subprogram{
+ symbol.GetUltimate().detailsIf<semantics::SubprogramDetails>()};
+ if (!subprogram) {
+ return false;
+ }
+ auto attrs{subprogram->cudaSubprogramAttrs()};
+ return attrs &&
+ (*attrs == common::CUDASubprogramAttrs::Device ||
+ *attrs == common::CUDASubprogramAttrs::HostDevice);
+}
+
+static bool IsCudaDeviceIntrinsicShadowedByHostProcedure(
+ const parser::CharBlock &callSite, semantics::SemanticsContext &context,
+ const Symbol *resolution) {
+ if (!resolution || !IsProcedure(*resolution) ||
+ resolution->attrs().test(semantics::Attr::INTRINSIC) ||
+ !semantics::FindCUDADeviceContext(&context.FindScope(callSite))) {
+ return false;
+ }
+ // Keep use-associated names visible in device code, but do not let a
+ // host-only procedure hide a valid intrinsic with the same generic name.
+ return !IsCUDADeviceCallable(*resolution);
+}
+
auto ExpressionAnalyzer::ResolveGeneric(const Symbol &symbol,
const ActualArguments &actuals, const AdjustActuals &adjustActuals,
bool isSubroutine, SymbolVector &&tried, bool mightBeStructureConstructor)
@@ -3320,6 +3345,18 @@ auto ExpressionAnalyzer::GetCalleeAndArguments(const parser::Name &name,
resolution = result.specific;
dueToAmbiguity = result.failedDueToAmbiguity;
tried = std::move(result.tried);
+ if (IsCudaDeviceIntrinsicShadowedByHostProcedure(
+ name.source, context_, resolution)) {
+ ActualArguments localArguments{arguments};
+ if (std::optional<SpecificCall> specificCall{context_.intrinsics().Probe(
+ CallCharacteristics{symbol->name().ToString(), isSubroutine},
+ localArguments, GetFoldingContext())}) {
+ CheckBadExplicitType(*specificCall, *symbol);
+ return CalleeAndArguments{
+ ProcedureDesignator{std::move(specificCall->specificIntrinsic)},
+ std::move(specificCall->arguments)};
+ }
+ }
if (resolution) {
if (context_.GetPPCBuiltinsScope() &&
resolution->name().ToString().rfind("__ppc_", 0) == 0) {
diff --git a/flang/test/Semantics/cuf29.cuf b/flang/test/Semantics/cuf29.cuf
new file mode 100644
index 0000000000000..cf8280c0db35e
--- /dev/null
+++ b/flang/test/Semantics/cuf29.cuf
@@ -0,0 +1,83 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1
+
+! Host-side generic wrappers can share names with intrinsics. In device code,
+! intrinsic resolution must still win when the host wrapper is not
+! device-callable.
+module host_reduction_wrappers
+ interface sum
+ module procedure fake_sum
+ end interface
+ interface maxval
+ module procedure fake_maxval
+ end interface
+ interface minval
+ module procedure fake_minval
+ end interface
+ interface maxloc
+ module procedure fake_maxloc
+ end interface
+ interface minloc
+ module procedure fake_minloc
+ end interface
+ interface host_wrapper
+ module procedure fake_host_wrapper
+ end interface
+contains
+ function fake_sum(array) result(res)
+ real(8) :: array(:)
+ real(8) :: res
+ end function
+ function fake_maxval(array) result(res)
+ real(8) :: array(:)
+ real(8) :: res
+ end function
+ function fake_minval(array) result(res)
+ real(8) :: array(:)
+ real(8) :: res
+ end function
+ function fake_maxloc(array, dim) result(res)
+ real(8) :: array(:)
+ integer :: dim
+ integer :: res
+ end function
+ function fake_minloc(array, dim) result(res)
+ real(8) :: array(:)
+ integer :: dim
+ integer :: res
+ end function
+ function fake_host_wrapper(array) result(res)
+ real(8) :: array(:)
+ real(8) :: res
+ end function
+end module
+
+module test
+ use host_reduction_wrappers
+contains
+ attributes(global) subroutine reduction_intrinsics(a, locs, vals)
+ real(8), intent(in) :: a(3)
+ integer, intent(out) :: locs(2)
+ real(8), intent(out) :: vals(3)
+ real(8) :: local(3)
+
+ local = a
+ locs(1) = maxloc(local, 1)
+ locs(2) = minloc(local, 1)
+ vals(1) = sum(local)
+ vals(2) = maxval(local)
+ vals(3) = minval(local)
+ !ERROR: 'fake_host_wrapper' may not be called in device code
+ vals(1) = host_wrapper(local)
+ end subroutine
+end module
+
+module renamed_test
+ use host_reduction_wrappers, only: not_maxloc => maxloc
+contains
+ attributes(global) subroutine renamed_wrapper(a, loc)
+ real(8), intent(in) :: a(3)
+ integer, intent(out) :: loc
+ !ERROR: 'fake_maxloc' may not be called in device code
+ loc = not_maxloc(a, 1)
+ end subroutine
+end module
>From f167d9acb9486565b8d62951be25387848525fb5 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Tue, 23 Jun 2026 09:23:54 -0700
Subject: [PATCH 2/2] change test
---
flang/lib/Semantics/expression.cpp | 8 ++++----
flang/test/Semantics/cuf29.cuf | 13 +------------
2 files changed, 5 insertions(+), 16 deletions(-)
diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp
index a0215ebf36195..04f2f63e78dfd 100644
--- a/flang/lib/Semantics/expression.cpp
+++ b/flang/lib/Semantics/expression.cpp
@@ -3081,8 +3081,8 @@ static bool IsCUDADeviceCallable(const Symbol &symbol) {
static bool IsCudaDeviceIntrinsicShadowedByHostProcedure(
const parser::CharBlock &callSite, semantics::SemanticsContext &context,
- const Symbol *resolution) {
- if (!resolution || !IsProcedure(*resolution) ||
+ const Symbol *resolution, bool isSubroutine) {
+ if (isSubroutine || !resolution || !IsProcedure(*resolution) ||
resolution->attrs().test(semantics::Attr::INTRINSIC) ||
!semantics::FindCUDADeviceContext(&context.FindScope(callSite))) {
return false;
@@ -3346,10 +3346,10 @@ auto ExpressionAnalyzer::GetCalleeAndArguments(const parser::Name &name,
dueToAmbiguity = result.failedDueToAmbiguity;
tried = std::move(result.tried);
if (IsCudaDeviceIntrinsicShadowedByHostProcedure(
- name.source, context_, resolution)) {
+ name.source, context_, resolution, isSubroutine)) {
ActualArguments localArguments{arguments};
if (std::optional<SpecificCall> specificCall{context_.intrinsics().Probe(
- CallCharacteristics{symbol->name().ToString(), isSubroutine},
+ CallCharacteristics{name.source.ToString(), isSubroutine},
localArguments, GetFoldingContext())}) {
CheckBadExplicitType(*specificCall, *symbol);
return CalleeAndArguments{
diff --git a/flang/test/Semantics/cuf29.cuf b/flang/test/Semantics/cuf29.cuf
index cf8280c0db35e..1a89ebada6e90 100644
--- a/flang/test/Semantics/cuf29.cuf
+++ b/flang/test/Semantics/cuf29.cuf
@@ -66,18 +66,7 @@ contains
vals(1) = sum(local)
vals(2) = maxval(local)
vals(3) = minval(local)
- !ERROR: 'fake_host_wrapper' may not be called in device code
+ !ERROR: No specific function of generic 'host_wrapper' matches the actual arguments
vals(1) = host_wrapper(local)
end subroutine
end module
-
-module renamed_test
- use host_reduction_wrappers, only: not_maxloc => maxloc
-contains
- attributes(global) subroutine renamed_wrapper(a, loc)
- real(8), intent(in) :: a(3)
- integer, intent(out) :: loc
- !ERROR: 'fake_maxloc' may not be called in device code
- loc = not_maxloc(a, 1)
- end subroutine
-end module
More information about the flang-commits
mailing list