[llvm-branch-commits] [flang] [llvm] [mlir] [MLIR][OpenMP] Post-translate declare-target USM indirection in OpenMPIRBuilder (PR #194291)

Kareem Ergawy via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sun Apr 26 23:46:59 PDT 2026


https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/194291

>From 504930b655af9a49c0596b2aa68f1b763a599bab Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Sun, 26 Apr 2026 22:17:27 -0700
Subject: [PATCH 1/5] [X86] Remove update_mir_test_checks.py NOTE (#194278)

The test checks printer output, not MIR.
It was probably copy-pasted in #193107 from other test.
---
 llvm/test/CodeGen/X86/machine-block-hash.mir | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/test/CodeGen/X86/machine-block-hash.mir b/llvm/test/CodeGen/X86/machine-block-hash.mir
index 313be7cc5ad23..9728ee7c662f5 100644
--- a/llvm/test/CodeGen/X86/machine-block-hash.mir
+++ b/llvm/test/CodeGen/X86/machine-block-hash.mir
@@ -1,4 +1,3 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
 # RUN: llc -mtriple=x86_64-pc-linux -passes="print<machine-block-hash>" -filetype=null %s 2>&1 | FileCheck %s --check-prefix=HASH
 
 # FIXME: `stable_hash_combine` is not stable across different endianness.

>From 1f9c611b0997205568f226e0f0ecd22cb96e4f77 Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Mon, 27 Apr 2026 11:10:50 +0530
Subject: [PATCH 2/5] [LoopFusion][NFC] UTC gen some tests (#193755)

Some variables need rename as UTC normalizes IR value names. Also,
remove dead variable `%M` and `%N` from
`double_loop_nest_inner_guard.ll`
---
 .../double_loop_nest_inner_guard.ll           |  87 +++++++------
 .../triple_loop_nest_inner_guard.ll           | 119 ++++++++++--------
 2 files changed, 118 insertions(+), 88 deletions(-)

diff --git a/llvm/test/Transforms/LoopFusion/double_loop_nest_inner_guard.ll b/llvm/test/Transforms/LoopFusion/double_loop_nest_inner_guard.ll
index f6eab83b5d154..a9f066b4657ad 100644
--- a/llvm/test/Transforms/LoopFusion/double_loop_nest_inner_guard.ll
+++ b/llvm/test/Transforms/LoopFusion/double_loop_nest_inner_guard.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -S -passes=loop-fusion < %s 2>&1 | FileCheck %s
 
 ; Verify that LoopFusion can fuse two double-loop nests with guarded inner
@@ -7,42 +8,54 @@
 @b = common global [10 x [10 x i32]] zeroinitializer
 @c = common global [10 x [10 x i32]] zeroinitializer
 
-; CHECK-LABEL: @double_loop_nest_inner_guard
-; CHECK: br i1 %{{.*}}, label %[[OUTER_PH:outer1.ph]], label %[[FUNC_EXIT:func_exit]]
-
-; CHECK: [[OUTER_PH]]:
-; CHECK: br label %[[OUTER_BODY_INNER_GUARD:outer1.body.inner.guard]]
-
-; CHECK: [[OUTER_BODY_INNER_GUARD]]:
-; CHECK: br i1 %{{.*}}, label %[[INNER_PH:inner1.ph]], label %[[OUTER_LATCH:outer2.latch]]
-
-; CHECK: [[INNER_PH]]:
-; CHECK-NEXT: br label %[[INNER_BODY:inner1.body]]
-
-; CHECK: [[INNER_BODY]]:
-; First loop body.
-; CHECK: load
-; CHECK: add
-; CHECK: store
-; Second loop body.
-; CHECK: load
-; CHECK: mul
-; CHECK: store
-; CHECK: br i1 %{{.*}}, label %[[INNER_EXIT:inner2.exit]], label %[[INNER_BODY:inner1.body]]
-
-; CHECK: [[INNER_EXIT]]:
-; CHECK-NEXT: br label %[[OUTER_LATCH:outer2.latch]]
-
-; CHECK: [[OUTER_LATCH]]:
-; CHECK: br i1 %{{.*}}, label %[[OUTER_EXIT:outer2.exit]], label %[[OUTER_BODY_INNER_GUARD]]
-
-; CHECK: [[OUTER_EXIT]]:
-; CHECK-NEXT: br label %[[FUNC_EXIT:func_exit]]
-
-; CHECK: [[FUNC_EXIT]]:
-; CHECK-NEXT: ret
-
-define i32 @double_loop_nest_inner_guard(i32 %m, i32 %n, i32 %M, i32 %N) {
+define i32 @double_loop_nest_inner_guard(i32 %m, i32 %n) {
+; CHECK-LABEL: define i32 @double_loop_nest_inner_guard(
+; CHECK-SAME: i32 [[M:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP63:%.*]] = icmp sgt i32 [[M]], 0
+; CHECK-NEXT:    br i1 [[CMP63]], label %[[OUTER1_PH:.*]], label %[[FUNC_EXIT:.*]]
+; CHECK:       [[OUTER1_PH]]:
+; CHECK-NEXT:    [[CMP261:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT76:%.*]] = zext i32 [[M]] to i64
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT72:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT:    br label %[[OUTER1_BODY_INNER_GUARD:.*]]
+; CHECK:       [[OUTER1_BODY_INNER_GUARD]]:
+; CHECK-NEXT:    [[IV74:%.*]] = phi i64 [ 0, %[[OUTER1_PH]] ], [ [[IV_NEXT75:%.*]], %[[OUTER2_LATCH:.*]] ]
+; CHECK-NEXT:    [[IV66:%.*]] = phi i64 [ [[IV_NEXT67:%.*]], %[[OUTER2_LATCH]] ], [ 0, %[[OUTER1_PH]] ]
+; CHECK-NEXT:    br i1 [[CMP261]], label %[[INNER1_PH:.*]], label %[[OUTER2_LATCH]]
+; CHECK:       [[INNER1_PH]]:
+; CHECK-NEXT:    br label %[[INNER1_BODY:.*]]
+; CHECK:       [[INNER1_BODY]]:
+; CHECK-NEXT:    [[IV70:%.*]] = phi i64 [ [[IV_NEXT71:%.*]], %[[INNER1_BODY]] ], [ 0, %[[INNER1_PH]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[INNER1_BODY]] ], [ 0, %[[INNER1_PH]] ]
+; CHECK-NEXT:    [[IDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @a, i64 0, i64 [[IV74]], i64 [[IV70]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[IDX6]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 2
+; CHECK-NEXT:    [[IDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @b, i64 0, i64 [[IV74]], i64 [[IV70]]
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[IDX10]], align 4
+; CHECK-NEXT:    [[IV_NEXT71]] = add nuw nsw i64 [[IV70]], 1
+; CHECK-NEXT:    [[EXITCOND73:%.*]] = icmp eq i64 [[IV_NEXT71]], [[WIDE_TRIP_COUNT72]]
+; CHECK-NEXT:    [[IDX27:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @a, i64 0, i64 [[IV66]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[IDX27]], align 4
+; CHECK-NEXT:    [[MUL:%.*]] = shl nsw i32 [[TMP1]], 1
+; CHECK-NEXT:    [[IDX31:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @c, i64 0, i64 [[IV66]], i64 [[IV]]
+; CHECK-NEXT:    store i32 [[MUL]], ptr [[IDX31]], align 4
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT72]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[INNER2_EXIT:.*]], label %[[INNER1_BODY]]
+; CHECK:       [[INNER2_EXIT]]:
+; CHECK-NEXT:    br label %[[OUTER2_LATCH]]
+; CHECK:       [[OUTER2_LATCH]]:
+; CHECK-NEXT:    [[IV_NEXT75]] = add nuw nsw i64 [[IV74]], 1
+; CHECK-NEXT:    [[EXITCOND77:%.*]] = icmp eq i64 [[IV_NEXT75]], [[WIDE_TRIP_COUNT76]]
+; CHECK-NEXT:    [[IV_NEXT67]] = add nuw nsw i64 [[IV66]], 1
+; CHECK-NEXT:    [[EXITCOND69:%.*]] = icmp eq i64 [[IV_NEXT67]], [[WIDE_TRIP_COUNT76]]
+; CHECK-NEXT:    br i1 [[EXITCOND69]], label %[[OUTER2_EXIT:.*]], label %[[OUTER1_BODY_INNER_GUARD]]
+; CHECK:       [[OUTER2_EXIT]]:
+; CHECK-NEXT:    br label %[[FUNC_EXIT]]
+; CHECK:       [[FUNC_EXIT]]:
+; CHECK-NEXT:    ret i32 0
+;
 entry:
   %cmp63 = icmp sgt i32 %m, 0
   br i1 %cmp63, label %outer1.ph, label %func_exit
@@ -112,5 +125,5 @@ outer2.exit:
   br label %func_exit
 
 func_exit:
-  ret i32 undef
+  ret i32 0
 }
diff --git a/llvm/test/Transforms/LoopFusion/triple_loop_nest_inner_guard.ll b/llvm/test/Transforms/LoopFusion/triple_loop_nest_inner_guard.ll
index 2ad211685aa47..4fde663289bcb 100644
--- a/llvm/test/Transforms/LoopFusion/triple_loop_nest_inner_guard.ll
+++ b/llvm/test/Transforms/LoopFusion/triple_loop_nest_inner_guard.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -S -passes=loop-fusion < %s 2>&1 | FileCheck %s
 
 ; Verify that LoopFusion can fuse two triple-loop nests with guarded inner
@@ -7,64 +8,80 @@
 @b = common global [10 x [10 x [10 x i32]]] zeroinitializer
 @c = common global [10 x [10 x [10 x i32]]] zeroinitializer
 
-; CHECK-LABEL: @triple_loop_nest_inner_guard
-; CHECK: br i1 %{{.*}}, label %[[OUTER_PH:outer1.ph]], label %[[FUNC_EXIT:func_exit]]
-
-; CHECK: [[OUTER_PH]]:
-; CHECK: br label %[[OUTER_BODY_MIDDLE_GUARD:outer1.body.middle1.guard]]
-
-; CHECK: [[OUTER_BODY_MIDDLE_GUARD]]:
-; CHECK: br i1 %{{.*}}, label %[[MIDDLE_PH:middle1.ph]], label %[[OUTER_LATCH:outer2.latch]]
-
-; CHECK: [[MIDDLE_PH]]:
-; CHECK-NEXT: br label %[[MIDDLE_BODY_INNER_GUARD:middle1.body.inner1.guard]]
-
-; CHECK: [[MIDDLE_BODY_INNER_GUARD]]:
-; CHECK: br i1 %{{.*}}, label %[[INNER_PH:inner1.ph]], label %[[MIDDLE_LATCH:middle2.latch]]
-
-; CHECK: [[INNER_PH]]:
-; CHECK-NEXT: br label %[[INNER_BODY:inner1.body]]
-
-; CHECK: [[INNER_BODY]]:
-; First loop body.
-; CHECK: load
-; CHECK: add
-; CHECK: store
-; Second loop body.
-; CHECK: load
-; CHECK: mul
-; CHECK: store
-; CHECK: br i1 %{{.*}}, label %[[INNER_EXIT:inner2.exit]], label %[[INNER_BODY:inner1.body]]
-
-; CHECK: [[INNER_EXIT]]:
-; CHECK-NEXT: br label %[[MIDDLE_LATCH:middle2.latch]]
-
-; CHECK: [[MIDDLE_LATCH]]:
-; CHECK: br i1 %{{.*}}, label %[[MIDDLE_EXIT:middle2.exit]], label %[[MIDDLE_BODY_INNER_GUARD]]
-
-; CHECK: [[MIDDLE_EXIT]]:
-; CHECK-NEXT: br label %[[OUTER_LATCH:outer2.latch]]
-
-; CHECK: [[OUTER_LATCH]]:
-; CHECK: br i1 %{{.*}}, label %[[OUTER_EXIT:outer2.exit]], label %[[OUTER_BODY_MIDDLE_GUARD]]
-
-; CHECK: [[OUTER_EXIT]]:
-; CHECK-NEXT: br label %[[FUNC_EXIT:func_exit]]
-
-; CHECK: [[FUNC_EXIT]]:
-; CHECK-NEXT: ret
-
-define i32 @triple_loop_nest_inner_guard(i32 %m, i32 %n, i32 %M, i32 %N) {
+define i32 @triple_loop_nest_inner_guard(i32 %m, i32 %n, i32 %mm) {
+; CHECK-LABEL: define i32 @triple_loop_nest_inner_guard(
+; CHECK-SAME: i32 [[M:%.*]], i32 [[N:%.*]], i32 [[MM:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP101:%.*]] = icmp sgt i32 [[M]], 0
+; CHECK-NEXT:    br i1 [[CMP101]], label %[[OUTER1_PH:.*]], label %[[FUNC_EXIT:.*]]
+; CHECK:       [[OUTER1_PH]]:
+; CHECK-NEXT:    [[CMP298:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    [[CMP696:%.*]] = icmp sgt i32 [[MM]], 0
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT122:%.*]] = zext i32 [[M]] to i64
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT118:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT114:%.*]] = zext i32 [[MM]] to i64
+; CHECK-NEXT:    br label %[[OUTER1_BODY_MIDDLE1_GUARD:.*]]
+; CHECK:       [[OUTER1_BODY_MIDDLE1_GUARD]]:
+; CHECK-NEXT:    [[IV120:%.*]] = phi i64 [ 0, %[[OUTER1_PH]] ], [ [[IV_NEXT121:%.*]], %[[OUTER2_LATCH:.*]] ]
+; CHECK-NEXT:    [[IV108:%.*]] = phi i64 [ [[IV_NEXT109:%.*]], %[[OUTER2_LATCH]] ], [ 0, %[[OUTER1_PH]] ]
+; CHECK-NEXT:    br i1 [[CMP298]], label %[[MIDDLE1_PH:.*]], label %[[OUTER2_LATCH]]
+; CHECK:       [[MIDDLE1_PH]]:
+; CHECK-NEXT:    br label %[[MIDDLE1_BODY_INNER1_GUARD:.*]]
+; CHECK:       [[MIDDLE1_BODY_INNER1_GUARD]]:
+; CHECK-NEXT:    [[IV116:%.*]] = phi i64 [ [[IV_NEXT117:%.*]], %[[MIDDLE2_LATCH:.*]] ], [ 0, %[[MIDDLE1_PH]] ]
+; CHECK-NEXT:    [[IV104:%.*]] = phi i64 [ [[IV_NEXT105:%.*]], %[[MIDDLE2_LATCH]] ], [ 0, %[[MIDDLE1_PH]] ]
+; CHECK-NEXT:    br i1 [[CMP696]], label %[[INNER1_PH:.*]], label %[[MIDDLE2_LATCH]]
+; CHECK:       [[INNER1_PH]]:
+; CHECK-NEXT:    br label %[[INNER1_BODY:.*]]
+; CHECK:       [[INNER1_BODY]]:
+; CHECK-NEXT:    [[IV112:%.*]] = phi i64 [ [[IV_NEXT113:%.*]], %[[INNER1_BODY]] ], [ 0, %[[INNER1_PH]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[INNER1_BODY]] ], [ 0, %[[INNER1_PH]] ]
+; CHECK-NEXT:    [[IDX12:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr @a, i64 0, i64 [[IV120]], i64 [[IV116]], i64 [[IV112]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[IDX12]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 2
+; CHECK-NEXT:    [[IDX18:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr @b, i64 0, i64 [[IV120]], i64 [[IV116]], i64 [[IV112]]
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[IDX18]], align 4
+; CHECK-NEXT:    [[IV_NEXT113]] = add nuw nsw i64 [[IV112]], 1
+; CHECK-NEXT:    [[EXITCOND115:%.*]] = icmp eq i64 [[IV_NEXT113]], [[WIDE_TRIP_COUNT114]]
+; CHECK-NEXT:    [[IDX45:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr @a, i64 0, i64 [[IV108]], i64 [[IV104]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[IDX45]], align 4
+; CHECK-NEXT:    [[MUL:%.*]] = shl nsw i32 [[TMP1]], 1
+; CHECK-NEXT:    [[IDX51:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr @c, i64 0, i64 [[IV108]], i64 [[IV104]], i64 [[IV]]
+; CHECK-NEXT:    store i32 [[MUL]], ptr [[IDX51]], align 4
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT114]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[INNER2_EXIT:.*]], label %[[INNER1_BODY]]
+; CHECK:       [[INNER2_EXIT]]:
+; CHECK-NEXT:    br label %[[MIDDLE2_LATCH]]
+; CHECK:       [[MIDDLE2_LATCH]]:
+; CHECK-NEXT:    [[IV_NEXT117]] = add nuw nsw i64 [[IV116]], 1
+; CHECK-NEXT:    [[EXITCOND119:%.*]] = icmp eq i64 [[IV_NEXT117]], [[WIDE_TRIP_COUNT118]]
+; CHECK-NEXT:    [[IV_NEXT105]] = add nuw nsw i64 [[IV104]], 1
+; CHECK-NEXT:    [[EXITCOND107:%.*]] = icmp eq i64 [[IV_NEXT105]], [[WIDE_TRIP_COUNT118]]
+; CHECK-NEXT:    br i1 [[EXITCOND107]], label %[[MIDDLE2_EXIT:.*]], label %[[MIDDLE1_BODY_INNER1_GUARD]]
+; CHECK:       [[MIDDLE2_EXIT]]:
+; CHECK-NEXT:    br label %[[OUTER2_LATCH]]
+; CHECK:       [[OUTER2_LATCH]]:
+; CHECK-NEXT:    [[IV_NEXT121]] = add nuw nsw i64 [[IV120]], 1
+; CHECK-NEXT:    [[EXITCOND123:%.*]] = icmp eq i64 [[IV_NEXT121]], [[WIDE_TRIP_COUNT122]]
+; CHECK-NEXT:    [[IV_NEXT109]] = add nuw nsw i64 [[IV108]], 1
+; CHECK-NEXT:    [[EXITCOND111:%.*]] = icmp eq i64 [[IV_NEXT109]], [[WIDE_TRIP_COUNT122]]
+; CHECK-NEXT:    br i1 [[EXITCOND111]], label %[[OUTER2_EXIT:.*]], label %[[OUTER1_BODY_MIDDLE1_GUARD]]
+; CHECK:       [[OUTER2_EXIT]]:
+; CHECK-NEXT:    br label %[[FUNC_EXIT]]
+; CHECK:       [[FUNC_EXIT]]:
+; CHECK-NEXT:    ret i32 0
+;
 entry:
   %cmp101 = icmp sgt i32 %m, 0
   br i1 %cmp101, label %outer1.ph, label %func_exit
 
 outer1.ph:
   %cmp298 = icmp sgt i32 %n, 0
-  %cmp696 = icmp sgt i32 %M, 0
+  %cmp696 = icmp sgt i32 %mm, 0
   %wide.trip.count122 = zext i32 %m to i64
   %wide.trip.count118 = zext i32 %n to i64
-  %wide.trip.count114 = zext i32 %M to i64
+  %wide.trip.count114 = zext i32 %mm to i64
   br label %outer1.body.middle1.guard
 
 outer1.body.middle1.guard:
@@ -156,5 +173,5 @@ outer2.exit:
   br label %func_exit
 
 func_exit:
-  ret i32 undef
+  ret i32 0
 }

>From 68e696825b203bae8732a4d5856133ffbf2a4dcb Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Thu, 16 Apr 2026 08:00:28 -0500
Subject: [PATCH 3/5] [Flang][OpenMP] Clear close on descriptor members for box
 parents in USM

Extend the MapInfoFinalization walk introduced in #185330 so
parent/member close consistency is enforced whenever
unified_shared_memory is in effect, not only when the parent map's
variable is a fir.RecordType. Allocatable (box) roots expand to member
maps the same way as derived-type instances; getDescriptorMapType may
add OMP_MAP_CLOSE to implicit descriptor members while the parent map
does not set close, which led to bad device behavior under
-fopenmp-force-usm with multiple mapped allocatables.

Co-authored-by: Composer (Cursor) <ai at cursor.com>
---
 .../Optimizer/OpenMP/MapInfoFinalization.cpp  | 18 +++----
 .../omp-map-info-finalization-usm.fir         | 24 ++++-----
 .../usm-box-parent-descriptor-close.f90       | 49 +++++++++++++++++++
 3 files changed, 67 insertions(+), 24 deletions(-)
 create mode 100644 offload/test/offloading/fortran/usm-box-parent-descriptor-close.f90

diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
index bc0f96478ddf4..741d3174c29ee 100644
--- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
@@ -1186,21 +1186,15 @@ class MapInfoFinalizationPass
       });
 
       func->walk([&](mlir::omp::MapInfoOp op) {
-        // If a record type is not mapped with the `close` modifier while some
-        // of its members are (e.g. descriptor maps), then in USM mode, the
-        // memory for the record will be allocated in unified memory while the
-        // the members might be allocated in device memory. This creates an
-        // inconsistent map for the record type where some of its members are
-        // allocated in different address spaces.
-        //
-        // This fixes this issue by taking a conservative approach and removing
-        // the `close` flag from members if it is not used for mapping the
-        // parent record.
+        // If a parent map is not mapped with the `close` modifier while some of
+        // its members are (e.g. implicit descriptor maps from
+        // getDescriptorMapType in USM), those members must not keep `close` —
+        // otherwise the runtime can treat unified and device placement
+        // inconsistently.
         if (op.getMembers().empty())
           return;
 
-        mlir::Type varTy = fir::unwrapRefType(op.getVarPtr().getType());
-        if (!mlir::isa<fir::RecordType>(varTy))
+        if (!moduleRequiresUSM(op->getParentOfType<mlir::ModuleOp>()))
           return;
 
         auto mapFlag = op.getMapType();
diff --git a/flang/test/Transforms/omp-map-info-finalization-usm.fir b/flang/test/Transforms/omp-map-info-finalization-usm.fir
index 5f5a0d7213719..24f08474ed1d5 100644
--- a/flang/test/Transforms/omp-map-info-finalization-usm.fir
+++ b/flang/test/Transforms/omp-map-info-finalization-usm.fir
@@ -1,24 +1,24 @@
 // RUN: fir-opt --split-input-file --omp-map-info-finalization %s | FileCheck %s
 
 // Test that the 'close' map flag is cleared from member maps if the parent map
-// (derived type) does not have the 'close' flag. This typically happens in
-// Unified Shared Memory (USM) mode where the parent is in USM (no close) but
-// members (like descriptors) might have been initially tagged with close.
-
+// does not have the 'close' flag. This typically happens in USM mode.
+// In this test, the parent is a fir.array of derived type.
 module attributes {omp.requires = #omp<clause_requires unified_shared_memory>} {
-  func.func @test_usm_close_flag_cleanup(%arg0: !fir.ref<!fir.type<t{a:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) {
-    %map = omp.map.info var_ptr(%arg0 : !fir.ref<!fir.type<t{a:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>, !fir.type<t{a:!fir.box<!fir.heap<!fir.array<?xf32>>>}>) map_clauses(to) capture(ByRef) -> !fir.ref<!fir.type<t{a:!fir.box<!fir.heap<!fir.array<?xf32>>>}>> {name = "parent"}
+  func.func @test_usm_close_flag_cleanup_array(%arg0: !fir.ref<!fir.array<10x!fir.type<t{a:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>>) {
+    // The implicit descriptor map for the member, with 'close'
+    %member = omp.map.info var_ptr(%arg0 : !fir.ref<!fir.array<10x!fir.type<t{a:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>>, !fir.array<10x!fir.type<t{a:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) map_clauses(always, close, descriptor, to) capture(ByRef) -> !fir.ref<!fir.array<10x!fir.type<t{a:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>> {name = "parent.a.implicit_map"}
+
+    // The parent map, which is a fir.array, without 'close'
+    %map = omp.map.info var_ptr(%arg0 : !fir.ref<!fir.array<10x!fir.type<t{a:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>>, !fir.array<10x!fir.type<t{a:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) map_clauses(to) capture(ByRef) members(%member : [0] : !fir.ref<!fir.array<10x!fir.type<t{a:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>>) -> !fir.ref<!fir.array<10x!fir.type<t{a:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>> {name = "parent"}
 
-    omp.target map_entries(%map -> %arg1 : !fir.ref<!fir.type<t{a:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) {
-      // Simulate usage to trigger implicit map addition
-      %1 = hlfir.designate %arg1{"a"} : (!fir.ref<!fir.type<t{a:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+    omp.target map_entries(%map -> %arg1 : !fir.ref<!fir.array<10x!fir.type<t{a:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>>) {
       omp.terminator
     }
     return
   }
 }
 
-// CHECK-LABEL: func.func @test_usm_close_flag_cleanup
-// CHECK: %[[MEMBER:.*]] = omp.map.info {{.*}} map_clauses(always, to) {{.*}} {name = "parent.a.implicit_map"}
-// CHECK: %[[PARENT:.*]] = omp.map.info {{.*}} map_clauses(to) {{.*}} members(%[[MEMBER]], {{.*}}) {{.*}} {name = "parent", {{.*}}}
+// CHECK-LABEL: func.func @test_usm_close_flag_cleanup_array
+// CHECK: %[[MEMBER:.*]] = omp.map.info {{.*}} map_clauses(always, to) {{.*}} {name = "parent{{.*}}implicit_map"}
+// CHECK: %[[PARENT:.*]] = omp.map.info {{.*}} map_clauses(to) {{.*}} members(%[[MEMBER]]{{.*}}) {{.*}} {name = "parent"}
 // CHECK-NOT: close
diff --git a/offload/test/offloading/fortran/usm-box-parent-descriptor-close.f90 b/offload/test/offloading/fortran/usm-box-parent-descriptor-close.f90
new file mode 100644
index 0000000000000..6bacc1b4a761b
--- /dev/null
+++ b/offload/test/offloading/fortran/usm-box-parent-descriptor-close.f90
@@ -0,0 +1,49 @@
+! Test for PR fixing close flag on descriptor members for box parents in USM
+! REQUIRES: flang, amdgpu
+! RUN: %libomptarget-compile-fortran-generic -fopenmp-force-usm
+! RUN: env LIBOMPTARGET_INFO=16 HSA_XNACK=1 %libomptarget-run-generic 2>&1 | %fcheck-generic
+
+module m
+    implicit none
+    integer :: ng
+    type :: gt
+        integer :: k
+    end type
+    type(gt), allocatable :: g(:)
+    !$omp declare target(ng, g)
+    type :: f
+        real, allocatable :: a(:)
+    end type
+end module m
+
+program r
+    use m
+    implicit none
+    integer :: i
+    type(f), target :: u(2)
+    integer :: ig
+    real, contiguous, pointer :: p(:)
+
+    ng = 1
+    allocate(g(1))
+    g(1)%k = 1
+
+    do i = 1, 2
+        allocate(u(i)%a(1), source=0.0)
+    end do
+    u(1)%a(1) = 1.0
+    u(2)%a(1) = -1.0
+
+    !$omp target enter data map(to: g, ng, u(1)%a, u(2)%a)
+
+    !$omp target teams distribute private(ig, p)
+    do ig = 1, ng
+        p(1:1) => u(2)%a(1:1)
+        p(1) = 3.14
+    end do
+    !$omp end target teams distribute
+
+    ! CHECK: PluginInterface device {{[0-9]+}} info: Launching kernel
+    ! CHECK: Result: 3.14
+    print *, "Result: ", u(2)%a(1)
+end program r

>From be68b10cbf4d12a2beed2d6cef1653ed2abf81d3 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Thu, 23 Apr 2026 00:39:22 -0500
Subject: [PATCH 4/5] [MLIR][OpenMP] Post-translate declare-target USM
 indirection in OpenMPIRBuilder

When lowering OpenMP to LLVM IR for the target device, record pairs of the
`declare target` device global and the OMPIRBuilder "ref" pointer global
(used for unified shared memory) via `OpenMPIRBuilder`. During the
`OpenMPIRBuilder::finalize` pass, run a postpass that rewrites remaining uses of the
original global to load from the ref global and adjust the pointer (shared
path for `ConstantExpr` addrspace/bitcast chains and for direct
instruction uses).

This follows what is done by clang for similar cases:
https://reviews.llvm.org/D63108.

Co-authored-by: Composer
Co-authored-by: Gemini Pro
---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       | 20 ++++++
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 68 +++++++++++++++++++
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 14 +++-
 .../omptarget-declare-target-usm-ref-ptr.mlir | 24 +++++++
 .../fortran/declare-target-usm-ref-ptr.f90    | 39 +++++++++++
 5 files changed, 162 insertions(+), 3 deletions(-)
 create mode 100644 mlir/test/Target/LLVMIR/omptarget-declare-target-usm-ref-ptr.mlir
 create mode 100644 offload/test/offloading/fortran/declare-target-usm-ref-ptr.f90

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index dbd8f0c6b8927..3a184da7a0855 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -538,6 +538,26 @@ class OpenMPIRBuilder {
   /// used in the OpenMPIRBuilder generated from OMPKinds.def.
   LLVM_ABI void initialize();
 
+  SmallVector<std::pair<GlobalVariable *, GlobalVariable *>>
+      declareTargetUsmRefPtrPairs;
+
+  /// Replaces all uses of `origGV` with a load from `refPtrGV`.
+  /// This is used for OpenMP `declare target` global variables mapped under
+  /// Unified Shared Memory (USM) where access is routed through a reference
+  /// pointer.
+  Error rewriteDeclareTargetGlobalUsesWithRefPtr(GlobalVariable *origGV,
+                                                 GlobalVariable *refPtrGV);
+
+  /// Registers a mapping between an original `declare target` global variable
+  /// and the corresponding reference pointer global variable generated for
+  /// Unified Shared Memory (USM).
+  void addDeclareTargetUsmRefPair(GlobalVariable *orig, GlobalVariable *refPtr);
+
+  /// Rewrites the uses of all `declare target` global variables registered via
+  /// `addDeclareTargetUsmRefPair` to use their corresponding USM reference
+  /// pointers. This pass is executed at the end of the module translation.
+  Error finalizeDeclareTargetUsmIndirectLoads();
+
   void setConfig(OpenMPIRBuilderConfig C) { Config = C; }
 
   /// Finalize the underlying module, e.g., by outlining regions.
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 125620bd49502..778cc63d74abc 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -743,6 +743,71 @@ CallInst *OpenMPIRBuilder::createRuntimeFunctionCall(FunctionCallee Callee,
   return Call;
 }
 
+void OpenMPIRBuilder::addDeclareTargetUsmRefPair(GlobalVariable *orig,
+                                                 GlobalVariable *refPtr) {
+  declareTargetUsmRefPtrPairs.emplace_back(orig, refPtr);
+}
+
+Error OpenMPIRBuilder::rewriteDeclareTargetGlobalUsesWithRefPtr(
+    GlobalVariable *origGV, GlobalVariable *refPtrGV) {
+  auto replaceUsesWithRefLoad = [refPtrGV](Instruction *inst, Value *replaced) {
+    IRBuilder<> b(inst);
+    Value *rep =
+        b.CreateLoad(refPtrGV->getValueType(), refPtrGV, "decltgt.ref");
+    if (rep->getType() != replaced->getType())
+      rep = b.CreatePointerBitCastOrAddrSpaceCast(rep, replaced->getType(),
+                                                  "decltgt.as");
+    inst->replaceUsesOfWith(replaced, rep);
+  };
+
+  SmallSetVector<User *, 8> users;
+  for (User *u : origGV->users())
+    users.insert(u);
+
+  for (User *u : users) {
+    if (auto *ce = dyn_cast<ConstantExpr>(u)) {
+      const bool isPointerCast =
+          ce->getOpcode() == Instruction::AddrSpaceCast ||
+          (ce->getOpcode() == Instruction::BitCast &&
+           ce->getType()->isPointerTy());
+
+      if (ce->getOperand(0) != origGV || !isPointerCast)
+        continue;
+
+      SmallVector<User *, 8> instUsers;
+      for (User *ceUser : ce->users())
+        if (isa<Instruction>(ceUser))
+          instUsers.push_back(ceUser);
+
+      for (User *ceUser : instUsers) {
+        auto *inst = cast<Instruction>(ceUser);
+        replaceUsesWithRefLoad(inst, ce);
+      }
+
+      if (ce->use_empty())
+        ce->destroyConstant();
+    } else if (auto *insn = dyn_cast<Instruction>(u)) {
+      replaceUsesWithRefLoad(insn, origGV);
+    }
+  }
+
+  if (!origGV->use_empty())
+    return createStringError(inconvertibleErrorCode(),
+                             "expected all uses of '%s' to be replaced",
+                             origGV->getName().str().c_str());
+
+  return Error::success();
+}
+
+Error OpenMPIRBuilder::finalizeDeclareTargetUsmIndirectLoads() {
+  if (!Config.isTargetDevice() || declareTargetUsmRefPtrPairs.empty())
+    return Error::success();
+  for (auto [orig, ref] : declareTargetUsmRefPtrPairs)
+    if (Error Err = rewriteDeclareTargetGlobalUsesWithRefPtr(orig, ref))
+      return Err;
+  return Error::success();
+}
+
 void OpenMPIRBuilder::initialize() { initializeTypes(M); }
 
 static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder,
@@ -948,6 +1013,9 @@ void OpenMPIRBuilder::finalize(Function *Fn) {
     emitUsed("llvm.compiler.used", LLVMCompilerUsed);
   }
 
+  if (Error Err = finalizeDeclareTargetUsmIndirectLoads())
+    report_fatal_error(std::move(Err));
+
   IsFinalized = true;
 }
 
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 8614aed1ab80c..bdf738eacc113 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -30,6 +30,8 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/ReplaceConstant.h"
@@ -7475,12 +7477,18 @@ convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
           (attribute.getCaptureClause().getValue() !=
                mlir::omp::DeclareTargetCaptureClause::to ||
            ompBuilder->Config.hasRequiresUnifiedSharedMemory())) {
-        ompBuilder->getAddrOfDeclareTargetVar(
+        llvm::Type *ptrTy = gVal->getType();
+        if (ompBuilder->Config.hasRequiresUnifiedSharedMemory())
+          ptrTy = llvm::PointerType::get(llvmModule->getContext(), 0);
+        llvm::Constant *refPtrConst = ompBuilder->getAddrOfDeclareTargetVar(
             captureClause, deviceClause, isDeclaration, isExternallyVisible,
             ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack, *vfs),
             mangledName, generatedRefs, /*OpenMPSimd*/ false, targetTriple,
-            gVal->getType(), /*GlobalInitializer*/ nullptr,
-            /*VariableLinkage*/ nullptr);
+            ptrTy, /*GlobalInitializer*/ nullptr, /*VariableLinkage*/ nullptr);
+        if (auto *origGV = llvm::dyn_cast<llvm::GlobalVariable>(gVal))
+          if (auto *refPtrGV =
+                  llvm::dyn_cast_or_null<llvm::GlobalVariable>(refPtrConst))
+            ompBuilder->addDeclareTargetUsmRefPair(origGV, refPtrGV);
       }
     }
   }
diff --git a/mlir/test/Target/LLVMIR/omptarget-declare-target-usm-ref-ptr.mlir b/mlir/test/Target/LLVMIR/omptarget-declare-target-usm-ref-ptr.mlir
new file mode 100644
index 0000000000000..fdbf16914e25c
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-declare-target-usm-ref-ptr.mlir
@@ -0,0 +1,24 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// This tests the replacement of uses of a declare target global variable with
+// the unified shared memory (USM) generated reference pointer in an explicit device function.
+
+module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true, omp.requires = #omp<clause_requires unified_shared_memory>} {
+  // CHECK-DAG: @_QMmEnx_vals_decl_tgt_ref_ptr = weak global ptr null, align 8
+  llvm.mlir.global external @_QMmEnx_vals() {addr_space = 1 : i32, omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link)>} : i32 {
+    %0 = llvm.mlir.zero : i32
+    llvm.return %0 : i32
+  }
+
+  // CHECK-LABEL: define void @_QMmPget_dims_noarg(ptr %0)
+  llvm.func @_QMmPget_dims_noarg(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>} {
+    // CHECK: %[[REF_LOAD:.*]] = load ptr, ptr @_QMmEnx_vals_decl_tgt_ref_ptr, align 8
+    // CHECK: %[[AS_CAST:.*]] = addrspacecast ptr %[[REF_LOAD]] to ptr addrspace(1)
+    // CHECK: %[[VAL:.*]] = load i32, ptr addrspace(1) %[[AS_CAST]], align 4
+    // CHECK: store i32 %[[VAL]], ptr %0, align 4
+    %0 = llvm.mlir.addressof @_QMmEnx_vals : !llvm.ptr<1>
+    %1 = llvm.load %0 : !llvm.ptr<1> -> i32
+    llvm.store %1, %arg0 : i32, !llvm.ptr
+    llvm.return
+  }
+}
\ No newline at end of file
diff --git a/offload/test/offloading/fortran/declare-target-usm-ref-ptr.f90 b/offload/test/offloading/fortran/declare-target-usm-ref-ptr.f90
new file mode 100644
index 0000000000000..7d539a82af91b
--- /dev/null
+++ b/offload/test/offloading/fortran/declare-target-usm-ref-ptr.f90
@@ -0,0 +1,39 @@
+! Test declare target global replacement with USM reference pointer.
+!
+! REQUIRES: flang, amdgpu
+! RUN: %libomptarget-compile-fortran-generic -fopenmp-force-usm
+! RUN: env LIBOMPTARGET_INFO=16 HSA_XNACK=1 %libomptarget-run-generic 2>&1 | %fcheck-generic
+
+module m
+    implicit none
+    integer :: nx_vals
+    !$omp declare target(nx_vals)
+contains
+    subroutine get_dims_noarg(kv)
+        !$omp declare target
+        integer, intent(out) :: kv
+        kv = nx_vals
+    end subroutine get_dims_noarg
+end module m
+
+program reproducer
+    use m
+    implicit none
+    integer :: kv, kv_debug
+
+    nx_vals = 6
+    !$omp target enter data map(always, to: nx_vals)
+
+    kv_debug = -1
+    !$omp target map(tofrom: kv_debug)
+    call get_dims_noarg(kv)
+    kv_debug = kv
+    !$omp end target
+
+    print *, 'kv_debug after target (host)', kv_debug
+
+    !$omp target exit data map(release: nx_vals)
+end program reproducer
+
+! CHECK: PluginInterface device {{[0-9]+}} info: Launching kernel
+    ! CHECK: kv_debug after target (host) 6

>From fda7c9f1119853959270c8be6961936f86854f8e Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Mon, 27 Apr 2026 01:46:02 -0500
Subject: [PATCH 5/5] Try to fix unit tests

---
 .../Frontend/OpenMPIRBuilderTest.cpp          | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 5b9b280ec7671..b55dee514c707 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -1373,6 +1373,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) {
 TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) {
   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   OMPBuilder.initialize();
   IRBuilder<> Builder(BB);
   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
@@ -1430,6 +1431,7 @@ TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) {
 
 TEST_F(OpenMPIRBuilderTest, CanonicalLoopTripCount) {
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   OMPBuilder.initialize();
   IRBuilder<> Builder(BB);
 
@@ -1499,6 +1501,7 @@ TEST_F(OpenMPIRBuilderTest, CanonicalLoopTripCount) {
 TEST_F(OpenMPIRBuilderTest, CollapseNestedLoops) {
   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   OMPBuilder.initialize();
   F->setName("func");
 
@@ -1597,6 +1600,7 @@ TEST_F(OpenMPIRBuilderTest, CollapseNestedLoops) {
 
 TEST_F(OpenMPIRBuilderTest, TileSingleLoop) {
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   CallInst *Call;
   BasicBlock *BodyCode;
   CanonicalLoopInfo *Loop =
@@ -1642,6 +1646,7 @@ TEST_F(OpenMPIRBuilderTest, TileSingleLoop) {
 TEST_F(OpenMPIRBuilderTest, TileNestedLoops) {
   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   OMPBuilder.initialize();
   F->setName("func");
 
@@ -1731,6 +1736,7 @@ TEST_F(OpenMPIRBuilderTest, TileNestedLoops) {
 TEST_F(OpenMPIRBuilderTest, TileNestedLoopsWithBounds) {
   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   OMPBuilder.initialize();
   F->setName("func");
 
@@ -1867,6 +1873,7 @@ TEST_F(OpenMPIRBuilderTest, TileNestedLoopsWithBounds) {
 TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) {
   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   OMPBuilder.initialize();
   IRBuilder<> Builder(BB);
 
@@ -1956,6 +1963,7 @@ TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) {
 
 TEST_F(OpenMPIRBuilderTest, ApplySimd) {
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   MapVector<Value *, Value *> AlignedVars;
   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
   ASSERT_NE(CLI, nullptr);
@@ -1991,6 +1999,7 @@ TEST_F(OpenMPIRBuilderTest, ApplySimd) {
 
 TEST_F(OpenMPIRBuilderTest, ApplySimdCustomAligned) {
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   IRBuilder<> Builder(BB);
   const int AlignmentValue = 32;
   llvm::BasicBlock *sourceBlock = Builder.GetInsertBlock();
@@ -2054,6 +2063,7 @@ TEST_F(OpenMPIRBuilderTest, ApplySimdCustomAligned) {
 }
 TEST_F(OpenMPIRBuilderTest, ApplySimdlen) {
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   MapVector<Value *, Value *> AlignedVars;
   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
   ASSERT_NE(CLI, nullptr);
@@ -2090,6 +2100,7 @@ TEST_F(OpenMPIRBuilderTest, ApplySimdlen) {
 
 TEST_F(OpenMPIRBuilderTest, ApplySafelenOrderConcurrent) {
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   MapVector<Value *, Value *> AlignedVars;
 
   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
@@ -2128,6 +2139,7 @@ TEST_F(OpenMPIRBuilderTest, ApplySafelenOrderConcurrent) {
 
 TEST_F(OpenMPIRBuilderTest, ApplySafelen) {
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   MapVector<Value *, Value *> AlignedVars;
 
   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
@@ -2163,6 +2175,7 @@ TEST_F(OpenMPIRBuilderTest, ApplySafelen) {
 
 TEST_F(OpenMPIRBuilderTest, ApplySimdlenSafelen) {
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   MapVector<Value *, Value *> AlignedVars;
 
   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
@@ -2199,6 +2212,7 @@ TEST_F(OpenMPIRBuilderTest, ApplySimdlenSafelen) {
 
 TEST_F(OpenMPIRBuilderTest, ApplySimdIf) {
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   IRBuilder<> Builder(BB);
   MapVector<Value *, Value *> AlignedVars;
   AllocaInst *Alloc1 = Builder.CreateAlloca(Builder.getInt32Ty());
@@ -2262,6 +2276,7 @@ TEST_F(OpenMPIRBuilderTest, ApplySimdIf) {
 
 TEST_F(OpenMPIRBuilderTest, UnrollLoopFull) {
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
 
   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
   ASSERT_NE(CLI, nullptr);
@@ -2287,6 +2302,7 @@ TEST_F(OpenMPIRBuilderTest, UnrollLoopFull) {
 
 TEST_F(OpenMPIRBuilderTest, UnrollLoopPartial) {
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
   ASSERT_NE(CLI, nullptr);
 
@@ -2321,6 +2337,7 @@ TEST_F(OpenMPIRBuilderTest, UnrollLoopPartial) {
 
 TEST_F(OpenMPIRBuilderTest, UnrollLoopHeuristic) {
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
 
   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
   ASSERT_NE(CLI, nullptr);
@@ -3095,6 +3112,7 @@ TEST_F(OpenMPIRBuilderTest, CriticalDirective) {
 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSource) {
   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   OMPBuilder.initialize();
   F->setName("func");
   IRBuilder<> Builder(BB);
@@ -3180,6 +3198,7 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSource) {
 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSink) {
   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   OMPBuilder.initialize();
   F->setName("func");
   IRBuilder<> Builder(BB);
@@ -3265,6 +3284,7 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSink) {
 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) {
   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   OMPBuilder.initialize();
   F->setName("func");
   IRBuilder<> Builder(BB);
@@ -3341,6 +3361,7 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) {
 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveSimd) {
   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   OMPBuilder.initialize();
   F->setName("func");
   IRBuilder<> Builder(BB);
@@ -3778,6 +3799,7 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveCopyPrivate) {
 
 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadFlt) {
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   OMPBuilder.initialize();
   F->setName("func");
   IRBuilder<> Builder(BB);
@@ -3821,6 +3843,7 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicReadFlt) {
 
 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadInt) {
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   OMPBuilder.initialize();
   F->setName("func");
   IRBuilder<> Builder(BB);
@@ -3872,6 +3895,7 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicReadInt) {
 
 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteFlt) {
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   OMPBuilder.initialize();
   F->setName("func");
   IRBuilder<> Builder(BB);
@@ -3909,6 +3933,7 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteFlt) {
 
 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteInt) {
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   OMPBuilder.initialize();
   F->setName("func");
   IRBuilder<> Builder(BB);
@@ -3952,6 +3977,7 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteInt) {
 
 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) {
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   OMPBuilder.initialize();
   F->setName("func");
   IRBuilder<> Builder(BB);
@@ -4021,6 +4047,7 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) {
 
 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateFloat) {
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   OMPBuilder.initialize();
   F->setName("func");
   IRBuilder<> Builder(BB);
@@ -4089,6 +4116,7 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateFloat) {
 
 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateIntr) {
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   OMPBuilder.initialize();
   F->setName("func");
   IRBuilder<> Builder(BB);
@@ -4158,6 +4186,7 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateIntr) {
 
 TEST_F(OpenMPIRBuilderTest, OMPAtomicCapture) {
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   OMPBuilder.initialize();
   F->setName("func");
   IRBuilder<> Builder(BB);
@@ -4210,6 +4239,7 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicCapture) {
 
 TEST_F(OpenMPIRBuilderTest, OMPAtomicCompare) {
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   OMPBuilder.initialize();
   F->setName("func");
   IRBuilder<> Builder(BB);
@@ -4270,6 +4300,7 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicCompare) {
 
 TEST_F(OpenMPIRBuilderTest, OMPAtomicCompareCapture) {
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   OMPBuilder.initialize();
   F->setName("func");
   IRBuilder<> Builder(BB);
@@ -4519,6 +4550,7 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicRWStructType) {
   // Test for issue #165184: atomic read/write on struct types should use
   // element type size, not pointer size.
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   OMPBuilder.initialize();
   F->setName("func");
   IRBuilder<> Builder(BB);
@@ -7866,6 +7898,7 @@ TEST_F(OpenMPIRBuilderTest, CreateIteratorLoopInvalidLoopBody) {
 TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) {
   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
   OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.Config.IsTargetDevice = false;
   OMPBuilder.initialize();
   F->setName("func");
   IRBuilder<> Builder(BB);



More information about the llvm-branch-commits mailing list