[flang-commits] [flang] [Flang][OpenMP] Fix USM `close` semantics and `use_device_ptr` (PR #163258)
Akash Banerjee via flang-commits
flang-commits at lists.llvm.org
Tue Oct 14 08:09:28 PDT 2025
https://github.com/TIFitis updated https://github.com/llvm/llvm-project/pull/163258
>From 9dae79ddd306585275c5061f2a4af5a485008d77 Mon Sep 17 00:00:00 2001
From: Akash Banerjee <Akash.Banerjee at amd.com>
Date: Mon, 13 Oct 2025 20:43:06 +0100
Subject: [PATCH 1/3] [Flang][OpenMP] Fix USM `close` semantics and
`use_device_ptr`
- Add CLOSE map flag when USM is required.
- use_device_ptr: prevent implicitly expanding member operands.
- Fixes test offload/test/offloading/fortran/usm_map_close.f90.
---
.../Optimizer/OpenMP/MapInfoFinalization.cpp | 149 +++++++++++++++---
1 file changed, 124 insertions(+), 25 deletions(-)
diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
index 260e5256f1520..8fe4f1bab9ddf 100644
--- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
@@ -40,6 +40,7 @@
#include "mlir/IR/SymbolTable.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Support/LLVM.h"
+#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
@@ -398,24 +399,18 @@ class MapInfoFinalizationPass
baseAddrIndex);
}
- /// Adjusts the descriptor's map type. The main alteration that is done
- /// currently is transforming the map type to `OMP_MAP_TO` where possible.
- /// This is because we will always need to map the descriptor to device
- /// (or at the very least it seems to be the case currently with the
- /// current lowered kernel IR), as without the appropriate descriptor
- /// information on the device there is a risk of the kernel IR
- /// requesting for various data that will not have been copied to
- /// perform things like indexing. This can cause segfaults and
- /// memory access errors. However, we do not need this data mapped
- /// back to the host from the device, as per the OpenMP spec we cannot alter
- /// the data via resizing or deletion on the device. Discarding any
- /// descriptor alterations via no map back is reasonable (and required
- /// for certain segments of descriptor data like the type descriptor that are
- /// global constants). This alteration is only inapplicable to `target exit`
- /// and `target update` currently, and that's due to `target exit` not
- /// allowing `to` mappings, and `target update` not allowing both `to` and
- /// `from` simultaneously. We currently try to maintain the `implicit` flag
- /// where necessary, although it does not seem strictly required.
+ /// Adjust the descriptor's map type such that we ensure the descriptor
+ /// itself is present on device when needed, without changing the user's
+ /// requested data mapping semantics for the underlying data.
+ ///
+ /// We conservatively transform descriptor mappings to `OMP_MAP_TO` (and
+ /// preserve `IMPLICIT`/`ALWAYS` when present) for structured regions. The
+ /// descriptor should live on device for indexing, bounds, etc., but we do
+ /// not require, nor want, additional mapping semantics like `CLOSE` for the
+ /// descriptor entry itself. `CLOSE` (and other user-provided flags) should
+ /// apply to the base data entry that actually carries the pointee, which is
+ /// generated separately as a member map. For `target exit`/`target update`
+ /// we keep the original map type unchanged.
unsigned long getDescriptorMapType(unsigned long mapTypeFlag,
mlir::Operation *target) {
using mapFlags = llvm::omp::OpenMPOffloadMappingFlags;
@@ -425,8 +420,24 @@ class MapInfoFinalizationPass
mapFlags flags = mapFlags::OMP_MAP_TO |
(mapFlags(mapTypeFlag) &
- (mapFlags::OMP_MAP_IMPLICIT | mapFlags::OMP_MAP_CLOSE |
- mapFlags::OMP_MAP_ALWAYS));
+ (mapFlags::OMP_MAP_IMPLICIT | mapFlags::OMP_MAP_ALWAYS));
+
+ // For unified_shared_memory, we additionally add `CLOSE` on the descriptor
+ // to ensure device-local placement where required by tests relying on USM +
+ // close semantics.
+ if (target) {
+ if (auto mod = target->getParentOfType<mlir::ModuleOp>()) {
+ if (mlir::Attribute reqAttr = mod->getAttr("omp.requires")) {
+ if (auto req =
+ mlir::dyn_cast<mlir::omp::ClauseRequiresAttr>(reqAttr)) {
+ if (mlir::omp::bitEnumContainsAll(
+ req.getValue(),
+ mlir::omp::ClauseRequires::unified_shared_memory))
+ flags |= mapFlags::OMP_MAP_CLOSE;
+ }
+ }
+ }
+ }
return llvm::to_underlying(flags);
}
@@ -518,6 +529,75 @@ class MapInfoFinalizationPass
return newMapInfoOp;
}
+ // Expand mappings of type(C_PTR) to map their `__address` field explicitly
+ // as a single pointer-sized member (USM-gated at callsite). This helps in
+ // USM scenarios to ensure the pointer-sized mapping is used.
+ mlir::omp::MapInfoOp genCptrMemberMap(mlir::omp::MapInfoOp op,
+ fir::FirOpBuilder &builder) {
+ if (!op.getMembers().empty())
+ return op;
+
+ mlir::Type varTy = fir::unwrapRefType(op.getVarPtr().getType());
+ if (!mlir::isa<fir::RecordType>(varTy))
+ return op;
+ auto recTy = mlir::cast<fir::RecordType>(varTy);
+ // If not a builtin C_PTR record, skip.
+ if (!recTy.getName().ends_with("__builtin_c_ptr"))
+ return op;
+
+ // Find the index of the c_ptr address component named "__address".
+ int32_t fieldIdx = recTy.getFieldIndex("__address");
+ if (fieldIdx < 0)
+ return op;
+
+ mlir::Location loc = op.getVarPtr().getLoc();
+ mlir::Type memTy = recTy.getType(fieldIdx);
+ fir::IntOrValue idxConst =
+ mlir::IntegerAttr::get(builder.getI32Type(), fieldIdx);
+ mlir::Value coord = fir::CoordinateOp::create(
+ builder, loc, builder.getRefType(memTy), op.getVarPtr(),
+ llvm::SmallVector<fir::IntOrValue, 1>{idxConst});
+
+ // Child for the `__address` member.
+ llvm::SmallVector<llvm::SmallVector<int64_t>> memberIdx = {{0}};
+ mlir::ArrayAttr newMembersAttr = builder.create2DI64ArrayAttr(memberIdx);
+ // Force CLOSE in USM paths so the pointer gets device-local placement
+ // when required by tests relying on USM + close semantics.
+ uint64_t mapTypeVal =
+ op.getMapType() |
+ llvm::to_underlying(
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
+ mlir::IntegerAttr mapTypeAttr = builder.getIntegerAttr(
+ builder.getIntegerType(64, /*isSigned=*/false), mapTypeVal);
+
+ mlir::omp::MapInfoOp memberMap = mlir::omp::MapInfoOp::create(
+ builder, loc, coord.getType(), coord,
+ mlir::TypeAttr::get(fir::unwrapRefType(coord.getType())), mapTypeAttr,
+ builder.getAttr<mlir::omp::VariableCaptureKindAttr>(
+ mlir::omp::VariableCaptureKind::ByRef),
+ /*varPtrPtr=*/mlir::Value{},
+ /*members=*/llvm::SmallVector<mlir::Value>{},
+ /*member_index=*/mlir::ArrayAttr{},
+ /*bounds=*/op.getBounds(),
+ /*mapperId=*/mlir::FlatSymbolRefAttr(),
+ /*name=*/op.getNameAttr(),
+ /*partial_map=*/builder.getBoolAttr(false));
+
+ // Rebuild the parent as a container with the `__address` member.
+ mlir::omp::MapInfoOp newParent = mlir::omp::MapInfoOp::create(
+ builder, op.getLoc(), op.getResult().getType(), op.getVarPtr(),
+ op.getVarTypeAttr(), mapTypeAttr, op.getMapCaptureTypeAttr(),
+ /*varPtrPtr=*/mlir::Value{},
+ /*members=*/llvm::SmallVector<mlir::Value>{memberMap},
+ /*member_index=*/newMembersAttr,
+ /*bounds=*/llvm::SmallVector<mlir::Value>{},
+ /*mapperId=*/mlir::FlatSymbolRefAttr(), op.getNameAttr(),
+ /*partial_map=*/builder.getBoolAttr(false));
+ op.replaceAllUsesWith(newParent.getResult());
+ op->erase();
+ return newParent;
+ }
+
mlir::omp::MapInfoOp genDescriptorMemberMaps(mlir::omp::MapInfoOp op,
fir::FirOpBuilder &builder,
mlir::Operation *target) {
@@ -727,11 +807,6 @@ class MapInfoFinalizationPass
argIface.getUseDeviceAddrBlockArgsStart() +
argIface.numUseDeviceAddrBlockArgs());
- mlir::MutableOperandRange useDevPtrMutableOpRange =
- targetDataOp.getUseDevicePtrVarsMutable();
- addOperands(useDevPtrMutableOpRange, target,
- argIface.getUseDevicePtrBlockArgsStart() +
- argIface.numUseDevicePtrBlockArgs());
} else if (auto targetOp = llvm::dyn_cast<mlir::omp::TargetOp>(target)) {
mlir::MutableOperandRange hasDevAddrMutableOpRange =
targetOp.getHasDeviceAddrVarsMutable();
@@ -1169,6 +1244,30 @@ class MapInfoFinalizationPass
genBoxcharMemberMap(op, builder);
});
+ // Expand type(C_PTR) only when unified_shared_memory is required,
+ // to ensure device-visible pointer size/behavior in USM scenarios
+ // without changing default expectations elsewhere.
+ func->walk([&](mlir::omp::MapInfoOp op) {
+ // Check module requires USM; otherwise, leave mappings untouched.
+ auto mod = func->getParentOfType<mlir::ModuleOp>();
+ bool hasUSM = false;
+ if (mod) {
+ if (mlir::Attribute reqAttr = mod->getAttr("omp.requires")) {
+ if (auto req =
+ mlir::dyn_cast<mlir::omp::ClauseRequiresAttr>(reqAttr)) {
+ hasUSM = mlir::omp::bitEnumContainsAll(
+ req.getValue(),
+ mlir::omp::ClauseRequires::unified_shared_memory);
+ }
+ }
+ }
+ if (!hasUSM)
+ return;
+
+ builder.setInsertionPoint(op);
+ genCptrMemberMap(op, builder);
+ });
+
func->walk([&](mlir::omp::MapInfoOp op) {
// TODO: Currently only supports a single user for the MapInfoOp. This
// is fine for the moment, as the Fortran frontend will generate a
>From 04e64fcb4f70172012b2a19a5481ef2880a2de8e Mon Sep 17 00:00:00 2001
From: Akash Banerjee <Akash.Banerjee at amd.com>
Date: Mon, 13 Oct 2025 20:58:32 +0100
Subject: [PATCH 2/3] Remove whitespace.
---
.../Optimizer/OpenMP/MapInfoFinalization.cpp | 31 +++++++++++--------
1 file changed, 18 insertions(+), 13 deletions(-)
diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
index 8fe4f1bab9ddf..2549745aa9b86 100644
--- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
@@ -399,18 +399,24 @@ class MapInfoFinalizationPass
baseAddrIndex);
}
- /// Adjust the descriptor's map type such that we ensure the descriptor
- /// itself is present on device when needed, without changing the user's
- /// requested data mapping semantics for the underlying data.
- ///
- /// We conservatively transform descriptor mappings to `OMP_MAP_TO` (and
- /// preserve `IMPLICIT`/`ALWAYS` when present) for structured regions. The
- /// descriptor should live on device for indexing, bounds, etc., but we do
- /// not require, nor want, additional mapping semantics like `CLOSE` for the
- /// descriptor entry itself. `CLOSE` (and other user-provided flags) should
- /// apply to the base data entry that actually carries the pointee, which is
- /// generated separately as a member map. For `target exit`/`target update`
- /// we keep the original map type unchanged.
+ /// Adjusts the descriptor's map type. The main alteration that is done
+ /// currently is transforming the map type to `OMP_MAP_TO` where possible.
+ /// This is because we will always need to map the descriptor to device
+ /// (or at the very least it seems to be the case currently with the
+ /// current lowered kernel IR), as without the appropriate descriptor
+ /// information on the device there is a risk of the kernel IR
+ /// requesting for various data that will not have been copied to
+ /// perform things like indexing. This can cause segfaults and
+ /// memory access errors. However, we do not need this data mapped
+ /// back to the host from the device, as per the OpenMP spec we cannot alter
+ /// the data via resizing or deletion on the device. Discarding any
+ /// descriptor alterations via no map back is reasonable (and required
+ /// for certain segments of descriptor data like the type descriptor that are
+ /// global constants). This alteration is only inapplicable to `target exit`
+ /// and `target update` currently, and that's due to `target exit` not
+ /// allowing `to` mappings, and `target update` not allowing both `to` and
+ /// `from` simultaneously. We currently try to maintain the `implicit` flag
+ /// where necessary, although it does not seem strictly required.
unsigned long getDescriptorMapType(unsigned long mapTypeFlag,
mlir::Operation *target) {
using mapFlags = llvm::omp::OpenMPOffloadMappingFlags;
@@ -421,7 +427,6 @@ class MapInfoFinalizationPass
mapFlags flags = mapFlags::OMP_MAP_TO |
(mapFlags(mapTypeFlag) &
(mapFlags::OMP_MAP_IMPLICIT | mapFlags::OMP_MAP_ALWAYS));
-
// For unified_shared_memory, we additionally add `CLOSE` on the descriptor
// to ensure device-local placement where required by tests relying on USM +
// close semantics.
>From 6f95445c7ae16c0b2cb4873e68e00cd22ef165f4 Mon Sep 17 00:00:00 2001
From: Akash Banerjee <Akash.Banerjee at amd.com>
Date: Tue, 14 Oct 2025 16:01:16 +0100
Subject: [PATCH 3/3] Kept blockArgs for consistency. Added lowering test.
---
.../Optimizer/OpenMP/MapInfoFinalization.cpp | 5 +++++
.../cptr-usm-close-and-use-device-ptr.f90 | 21 +++++++++++++++++++
2 files changed, 26 insertions(+)
create mode 100644 flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90
diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
index 2549745aa9b86..2e3af3bc6c2d2 100644
--- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
@@ -812,6 +812,11 @@ class MapInfoFinalizationPass
argIface.getUseDeviceAddrBlockArgsStart() +
argIface.numUseDeviceAddrBlockArgs());
+ mlir::MutableOperandRange useDevPtrMutableOpRange =
+ targetDataOp.getUseDevicePtrVarsMutable();
+ addOperands(useDevPtrMutableOpRange, target,
+ argIface.getUseDevicePtrBlockArgsStart() +
+ argIface.numUseDevicePtrBlockArgs());
} else if (auto targetOp = llvm::dyn_cast<mlir::omp::TargetOp>(target)) {
mlir::MutableOperandRange hasDevAddrMutableOpRange =
targetOp.getHasDeviceAddrVarsMutable();
diff --git a/flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90 b/flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90
new file mode 100644
index 0000000000000..7fc30b431ad49
--- /dev/null
+++ b/flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90
@@ -0,0 +1,21 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s
+!
+! Checks:
+! - C_PTR mappings expand to `__address` member with CLOSE under USM paths.
+! - use_device_ptr does not implicitly expand member operands in the clause.
+
+subroutine only_cptr_use_device_ptr
+ use iso_c_binding
+ type(c_ptr) :: cptr
+ integer :: i
+
+ !$omp target data use_device_ptr(cptr) map(tofrom: i)
+ !$omp end target data
+end subroutine
+
+! CHECK-LABEL: func.func @_QPonly_cptr_use_device_ptr()
+! CHECK: %[[I_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<i32>, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "i"}
+! CHECK: %[[CP_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.type<{{.*}}__builtin_c_ptr{{.*}}>>, !fir.type<{{.*}}__builtin_c_ptr{{.*}}>) map_clauses(return_param) capture(ByRef) -> !fir.ref<!fir.type<{{.*}}__builtin_c_ptr{{.*}}>>
+! CHECK: omp.target_data map_entries(%[[I_MAP]] : !fir.ref<i32>) use_device_ptr(%[[CP_MAP]] -> %{{.*}} : !fir.ref<!fir.type<{{.*}}__builtin_c_ptr{{.*}}>>) {
+! CHECK: omp.terminator
+! CHECK: }
More information about the flang-commits
mailing list