[flang-commits] [flang] [Flang][OpenMP] Fix USM `close` semantics and `use_device_ptr` (PR #163258)

Akash Banerjee via flang-commits flang-commits at lists.llvm.org
Tue Oct 14 08:09:28 PDT 2025


https://github.com/TIFitis updated https://github.com/llvm/llvm-project/pull/163258

>From 9dae79ddd306585275c5061f2a4af5a485008d77 Mon Sep 17 00:00:00 2001
From: Akash Banerjee <Akash.Banerjee at amd.com>
Date: Mon, 13 Oct 2025 20:43:06 +0100
Subject: [PATCH 1/3] [Flang][OpenMP] Fix USM `close` semantics and
 `use_device_ptr`

	- Add CLOSE map flag when USM is required.
	- use_device_ptr: prevent implicitly expanding member operands.
	- Fixes test offload/test/offloading/fortran/usm_map_close.f90.
---
 .../Optimizer/OpenMP/MapInfoFinalization.cpp  | 149 +++++++++++++++---
 1 file changed, 124 insertions(+), 25 deletions(-)

diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
index 260e5256f1520..8fe4f1bab9ddf 100644
--- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
@@ -40,6 +40,7 @@
 #include "mlir/IR/SymbolTable.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Support/LLVM.h"
+#include "llvm/ADT/BitmaskEnum.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
@@ -398,24 +399,18 @@ class MapInfoFinalizationPass
                          baseAddrIndex);
   }
 
-  /// Adjusts the descriptor's map type. The main alteration that is done
-  /// currently is transforming the map type to `OMP_MAP_TO` where possible.
-  /// This is because we will always need to map the descriptor to device
-  /// (or at the very least it seems to be the case currently with the
-  /// current lowered kernel IR), as without the appropriate descriptor
-  /// information on the device there is a risk of the kernel IR
-  /// requesting for various data that will not have been copied to
-  /// perform things like indexing. This can cause segfaults and
-  /// memory access errors. However, we do not need this data mapped
-  /// back to the host from the device, as per the OpenMP spec we cannot alter
-  /// the data via resizing or deletion on the device. Discarding any
-  /// descriptor alterations via no map back is reasonable (and required
-  /// for certain segments of descriptor data like the type descriptor that are
-  /// global constants). This alteration is only inapplicable to `target exit`
-  /// and `target update` currently, and that's due to `target exit` not
-  /// allowing `to` mappings, and `target update` not allowing both `to` and
-  /// `from` simultaneously. We currently try to maintain the `implicit` flag
-  /// where necessary, although it does not seem strictly required.
+  /// Adjust the descriptor's map type such that we ensure the descriptor
+  /// itself is present on device when needed, without changing the user's
+  /// requested data mapping semantics for the underlying data.
+  ///
+  /// We conservatively transform descriptor mappings to `OMP_MAP_TO` (and
+  /// preserve `IMPLICIT`/`ALWAYS` when present) for structured regions. The
+  /// descriptor should live on device for indexing, bounds, etc., but we do
+  /// not require, nor want, additional mapping semantics like `CLOSE` for the
+  /// descriptor entry itself. `CLOSE` (and other user-provided flags) should
+  /// apply to the base data entry that actually carries the pointee, which is
+  /// generated separately as a member map. For `target exit`/`target update`
+  /// we keep the original map type unchanged.
   unsigned long getDescriptorMapType(unsigned long mapTypeFlag,
                                      mlir::Operation *target) {
     using mapFlags = llvm::omp::OpenMPOffloadMappingFlags;
@@ -425,8 +420,24 @@ class MapInfoFinalizationPass
 
     mapFlags flags = mapFlags::OMP_MAP_TO |
                      (mapFlags(mapTypeFlag) &
-                      (mapFlags::OMP_MAP_IMPLICIT | mapFlags::OMP_MAP_CLOSE |
-                       mapFlags::OMP_MAP_ALWAYS));
+                      (mapFlags::OMP_MAP_IMPLICIT | mapFlags::OMP_MAP_ALWAYS));
+    
+    // For unified_shared_memory, we additionally add `CLOSE` on the descriptor
+    // to ensure device-local placement where required by tests relying on USM +
+    // close semantics.
+    if (target) {
+      if (auto mod = target->getParentOfType<mlir::ModuleOp>()) {
+        if (mlir::Attribute reqAttr = mod->getAttr("omp.requires")) {
+          if (auto req =
+                  mlir::dyn_cast<mlir::omp::ClauseRequiresAttr>(reqAttr)) {
+            if (mlir::omp::bitEnumContainsAll(
+                    req.getValue(),
+                    mlir::omp::ClauseRequires::unified_shared_memory))
+              flags |= mapFlags::OMP_MAP_CLOSE;
+          }
+        }
+      }
+    }
     return llvm::to_underlying(flags);
   }
 
@@ -518,6 +529,75 @@ class MapInfoFinalizationPass
     return newMapInfoOp;
   }
 
+  // Expand mappings of type(C_PTR) to map their `__address` field explicitly
+  // as a single pointer-sized member (USM-gated at callsite). This helps in
+  // USM scenarios to ensure the pointer-sized mapping is used.
+  mlir::omp::MapInfoOp genCptrMemberMap(mlir::omp::MapInfoOp op,
+                                        fir::FirOpBuilder &builder) {
+    if (!op.getMembers().empty())
+      return op;
+
+    mlir::Type varTy = fir::unwrapRefType(op.getVarPtr().getType());
+    if (!mlir::isa<fir::RecordType>(varTy))
+      return op;
+    auto recTy = mlir::cast<fir::RecordType>(varTy);
+    // If not a builtin C_PTR record, skip.
+    if (!recTy.getName().ends_with("__builtin_c_ptr"))
+      return op;
+
+    // Find the index of the c_ptr address component named "__address".
+    int32_t fieldIdx = recTy.getFieldIndex("__address");
+    if (fieldIdx < 0)
+      return op;
+
+    mlir::Location loc = op.getVarPtr().getLoc();
+    mlir::Type memTy = recTy.getType(fieldIdx);
+    fir::IntOrValue idxConst =
+        mlir::IntegerAttr::get(builder.getI32Type(), fieldIdx);
+    mlir::Value coord = fir::CoordinateOp::create(
+        builder, loc, builder.getRefType(memTy), op.getVarPtr(),
+        llvm::SmallVector<fir::IntOrValue, 1>{idxConst});
+
+    // Child for the `__address` member.
+    llvm::SmallVector<llvm::SmallVector<int64_t>> memberIdx = {{0}};
+    mlir::ArrayAttr newMembersAttr = builder.create2DI64ArrayAttr(memberIdx);
+    // Force CLOSE in USM paths so the pointer gets device-local placement
+    // when required by tests relying on USM + close semantics.
+    uint64_t mapTypeVal =
+        op.getMapType() |
+        llvm::to_underlying(
+            llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
+    mlir::IntegerAttr mapTypeAttr = builder.getIntegerAttr(
+        builder.getIntegerType(64, /*isSigned=*/false), mapTypeVal);
+
+    mlir::omp::MapInfoOp memberMap = mlir::omp::MapInfoOp::create(
+        builder, loc, coord.getType(), coord,
+        mlir::TypeAttr::get(fir::unwrapRefType(coord.getType())), mapTypeAttr,
+        builder.getAttr<mlir::omp::VariableCaptureKindAttr>(
+            mlir::omp::VariableCaptureKind::ByRef),
+        /*varPtrPtr=*/mlir::Value{},
+        /*members=*/llvm::SmallVector<mlir::Value>{},
+        /*member_index=*/mlir::ArrayAttr{},
+        /*bounds=*/op.getBounds(),
+        /*mapperId=*/mlir::FlatSymbolRefAttr(),
+        /*name=*/op.getNameAttr(),
+        /*partial_map=*/builder.getBoolAttr(false));
+
+    // Rebuild the parent as a container with the `__address` member.
+    mlir::omp::MapInfoOp newParent = mlir::omp::MapInfoOp::create(
+        builder, op.getLoc(), op.getResult().getType(), op.getVarPtr(),
+        op.getVarTypeAttr(), mapTypeAttr, op.getMapCaptureTypeAttr(),
+        /*varPtrPtr=*/mlir::Value{},
+        /*members=*/llvm::SmallVector<mlir::Value>{memberMap},
+        /*member_index=*/newMembersAttr,
+        /*bounds=*/llvm::SmallVector<mlir::Value>{},
+        /*mapperId=*/mlir::FlatSymbolRefAttr(), op.getNameAttr(),
+        /*partial_map=*/builder.getBoolAttr(false));
+    op.replaceAllUsesWith(newParent.getResult());
+    op->erase();
+    return newParent;
+  }
+
   mlir::omp::MapInfoOp genDescriptorMemberMaps(mlir::omp::MapInfoOp op,
                                                fir::FirOpBuilder &builder,
                                                mlir::Operation *target) {
@@ -727,11 +807,6 @@ class MapInfoFinalizationPass
                   argIface.getUseDeviceAddrBlockArgsStart() +
                       argIface.numUseDeviceAddrBlockArgs());
 
-      mlir::MutableOperandRange useDevPtrMutableOpRange =
-          targetDataOp.getUseDevicePtrVarsMutable();
-      addOperands(useDevPtrMutableOpRange, target,
-                  argIface.getUseDevicePtrBlockArgsStart() +
-                      argIface.numUseDevicePtrBlockArgs());
     } else if (auto targetOp = llvm::dyn_cast<mlir::omp::TargetOp>(target)) {
       mlir::MutableOperandRange hasDevAddrMutableOpRange =
           targetOp.getHasDeviceAddrVarsMutable();
@@ -1169,6 +1244,30 @@ class MapInfoFinalizationPass
         genBoxcharMemberMap(op, builder);
       });
 
+      // Expand type(C_PTR) only when unified_shared_memory is required,
+      // to ensure device-visible pointer size/behavior in USM scenarios
+      // without changing default expectations elsewhere.
+      func->walk([&](mlir::omp::MapInfoOp op) {
+        // Check module requires USM; otherwise, leave mappings untouched.
+        auto mod = func->getParentOfType<mlir::ModuleOp>();
+        bool hasUSM = false;
+        if (mod) {
+          if (mlir::Attribute reqAttr = mod->getAttr("omp.requires")) {
+            if (auto req =
+                    mlir::dyn_cast<mlir::omp::ClauseRequiresAttr>(reqAttr)) {
+              hasUSM = mlir::omp::bitEnumContainsAll(
+                  req.getValue(),
+                  mlir::omp::ClauseRequires::unified_shared_memory);
+            }
+          }
+        }
+        if (!hasUSM)
+          return;
+
+        builder.setInsertionPoint(op);
+        genCptrMemberMap(op, builder);
+      });
+
       func->walk([&](mlir::omp::MapInfoOp op) {
         // TODO: Currently only supports a single user for the MapInfoOp. This
         // is fine for the moment, as the Fortran frontend will generate a

>From 04e64fcb4f70172012b2a19a5481ef2880a2de8e Mon Sep 17 00:00:00 2001
From: Akash Banerjee <Akash.Banerjee at amd.com>
Date: Mon, 13 Oct 2025 20:58:32 +0100
Subject: [PATCH 2/3] Remove whitespace.

---
 .../Optimizer/OpenMP/MapInfoFinalization.cpp  | 31 +++++++++++--------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
index 8fe4f1bab9ddf..2549745aa9b86 100644
--- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
@@ -399,18 +399,24 @@ class MapInfoFinalizationPass
                          baseAddrIndex);
   }
 
-  /// Adjust the descriptor's map type such that we ensure the descriptor
-  /// itself is present on device when needed, without changing the user's
-  /// requested data mapping semantics for the underlying data.
-  ///
-  /// We conservatively transform descriptor mappings to `OMP_MAP_TO` (and
-  /// preserve `IMPLICIT`/`ALWAYS` when present) for structured regions. The
-  /// descriptor should live on device for indexing, bounds, etc., but we do
-  /// not require, nor want, additional mapping semantics like `CLOSE` for the
-  /// descriptor entry itself. `CLOSE` (and other user-provided flags) should
-  /// apply to the base data entry that actually carries the pointee, which is
-  /// generated separately as a member map. For `target exit`/`target update`
-  /// we keep the original map type unchanged.
+  /// Adjusts the descriptor's map type. The main alteration that is done
+  /// currently is transforming the map type to `OMP_MAP_TO` where possible.
+  /// This is because we will always need to map the descriptor to device
+  /// (or at the very least it seems to be the case currently with the
+  /// current lowered kernel IR), as without the appropriate descriptor
+  /// information on the device there is a risk of the kernel IR
+  /// requesting for various data that will not have been copied to
+  /// perform things like indexing. This can cause segfaults and
+  /// memory access errors. However, we do not need this data mapped
+  /// back to the host from the device, as per the OpenMP spec we cannot alter
+  /// the data via resizing or deletion on the device. Discarding any
+  /// descriptor alterations via no map back is reasonable (and required
+  /// for certain segments of descriptor data like the type descriptor that are
+  /// global constants). This alteration is only inapplicable to `target exit`
+  /// and `target update` currently, and that's due to `target exit` not
+  /// allowing `to` mappings, and `target update` not allowing both `to` and
+  /// `from` simultaneously. We currently try to maintain the `implicit` flag
+  /// where necessary, although it does not seem strictly required.
   unsigned long getDescriptorMapType(unsigned long mapTypeFlag,
                                      mlir::Operation *target) {
     using mapFlags = llvm::omp::OpenMPOffloadMappingFlags;
@@ -421,7 +427,6 @@ class MapInfoFinalizationPass
     mapFlags flags = mapFlags::OMP_MAP_TO |
                      (mapFlags(mapTypeFlag) &
                       (mapFlags::OMP_MAP_IMPLICIT | mapFlags::OMP_MAP_ALWAYS));
-    
     // For unified_shared_memory, we additionally add `CLOSE` on the descriptor
     // to ensure device-local placement where required by tests relying on USM +
     // close semantics.

>From 6f95445c7ae16c0b2cb4873e68e00cd22ef165f4 Mon Sep 17 00:00:00 2001
From: Akash Banerjee <Akash.Banerjee at amd.com>
Date: Tue, 14 Oct 2025 16:01:16 +0100
Subject: [PATCH 3/3] Kept blockArgs for consistency. Added lowering test.

---
 .../Optimizer/OpenMP/MapInfoFinalization.cpp  |  5 +++++
 .../cptr-usm-close-and-use-device-ptr.f90     | 21 +++++++++++++++++++
 2 files changed, 26 insertions(+)
 create mode 100644 flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90

diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
index 2549745aa9b86..2e3af3bc6c2d2 100644
--- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
@@ -812,6 +812,11 @@ class MapInfoFinalizationPass
                   argIface.getUseDeviceAddrBlockArgsStart() +
                       argIface.numUseDeviceAddrBlockArgs());
 
+      mlir::MutableOperandRange useDevPtrMutableOpRange =
+          targetDataOp.getUseDevicePtrVarsMutable();
+      addOperands(useDevPtrMutableOpRange, target,
+                  argIface.getUseDevicePtrBlockArgsStart() +
+                      argIface.numUseDevicePtrBlockArgs());
     } else if (auto targetOp = llvm::dyn_cast<mlir::omp::TargetOp>(target)) {
       mlir::MutableOperandRange hasDevAddrMutableOpRange =
           targetOp.getHasDeviceAddrVarsMutable();
diff --git a/flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90 b/flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90
new file mode 100644
index 0000000000000..7fc30b431ad49
--- /dev/null
+++ b/flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90
@@ -0,0 +1,21 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s
+!
+! Checks:
+!  - C_PTR mappings expand to `__address` member with CLOSE under USM paths.
+!  - use_device_ptr does not implicitly expand member operands in the clause.
+
+subroutine only_cptr_use_device_ptr
+  use iso_c_binding
+  type(c_ptr) :: cptr
+  integer :: i
+
+  !$omp target data use_device_ptr(cptr) map(tofrom: i)
+  !$omp end target data
+end subroutine
+
+! CHECK-LABEL: func.func @_QPonly_cptr_use_device_ptr()
+! CHECK: %[[I_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<i32>, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "i"}
+! CHECK: %[[CP_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.type<{{.*}}__builtin_c_ptr{{.*}}>>, !fir.type<{{.*}}__builtin_c_ptr{{.*}}>) map_clauses(return_param) capture(ByRef) -> !fir.ref<!fir.type<{{.*}}__builtin_c_ptr{{.*}}>>
+! CHECK: omp.target_data map_entries(%[[I_MAP]] : !fir.ref<i32>) use_device_ptr(%[[CP_MAP]] -> %{{.*}} : !fir.ref<!fir.type<{{.*}}__builtin_c_ptr{{.*}}>>) {
+! CHECK:   omp.terminator
+! CHECK: }



More information about the flang-commits mailing list