[llvm-branch-commits] [flang] [llvm] [mlir] [MLIR][OpenMP] Simplify OpenMP device codegen (PR #137201)

Sergio Afonso via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Aug 25 08:16:52 PDT 2025


https://github.com/skatrak updated https://github.com/llvm/llvm-project/pull/137201

>From 9b5e75950a88e68aa49f5341f488eb6f57f58375 Mon Sep 17 00:00:00 2001
From: Sergio Afonso <safonsof at amd.com>
Date: Tue, 22 Apr 2025 12:04:45 +0100
Subject: [PATCH 1/2] [MLIR][OpenMP] Simplify OpenMP device codegen

After removing host operations from the device MLIR module, it is no longer
necessary to provide special codegen logic to prevent these operations from
causing compiler crashes or miscompilations.

This patch removes these now unnecessary code paths to simplify codegen logic.
Some MLIR tests are now replaced with Flang tests, since the responsibility of
dealing with host operations has been moved earlier in the compilation flow.

MLIR tests holding target device modules are updated to no longer include now
unsupported host operations.
---
 .../OpenMP/target-nesting-in-host-ops.f90     |  87 ++++
 .../Integration/OpenMP/task-target-device.f90 |  37 ++
 .../OpenMP/threadprivate-target-device.f90    |  40 ++
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 423 +++++++-----------
 ...arget-constant-indexing-device-region.mlir |  25 +-
 .../LLVMIR/omptarget-debug-loop-loc.mlir      |  36 +-
 .../Target/LLVMIR/omptarget-debug-var-1.mlir  |  19 +-
 .../omptarget-memcpy-align-metadata.mlir      |  61 +--
 .../LLVMIR/omptarget-target-inside-task.mlir  |  43 --
 ...ptarget-threadprivate-device-lowering.mlir |  31 --
 .../Target/LLVMIR/openmp-llvm-invalid.mlir    |  45 ++
 .../openmp-target-nesting-in-host-ops.mlir    | 160 -------
 .../LLVMIR/openmp-task-target-device.mlir     |  26 --
 13 files changed, 422 insertions(+), 611 deletions(-)
 create mode 100644 flang/test/Integration/OpenMP/target-nesting-in-host-ops.f90
 create mode 100644 flang/test/Integration/OpenMP/task-target-device.f90
 create mode 100644 flang/test/Integration/OpenMP/threadprivate-target-device.f90
 delete mode 100644 mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir
 delete mode 100644 mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir
 delete mode 100644 mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir
 delete mode 100644 mlir/test/Target/LLVMIR/openmp-task-target-device.mlir

diff --git a/flang/test/Integration/OpenMP/target-nesting-in-host-ops.f90 b/flang/test/Integration/OpenMP/target-nesting-in-host-ops.f90
new file mode 100644
index 0000000000000..8c85a3c1784ed
--- /dev/null
+++ b/flang/test/Integration/OpenMP/target-nesting-in-host-ops.f90
@@ -0,0 +1,87 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+!REQUIRES: amdgpu-registered-target
+!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-llvm -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s
+
+! CHECK-NOT: define void @nested_target_in_parallel
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_parallel_{{.*}}(ptr %{{.*}}, ptr %{{.*}})
+subroutine nested_target_in_parallel(v)
+  implicit none
+  integer, intent(inout) :: v(10)
+
+  !$omp parallel
+    !$omp target map(tofrom: v)
+    !$omp end target
+  !$omp end parallel
+end subroutine
+
+! CHECK-NOT: define void @nested_target_in_wsloop
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_wsloop_{{.*}}(ptr %{{.*}}, ptr %{{.*}})
+subroutine nested_target_in_wsloop(v)
+  implicit none
+  integer, intent(inout) :: v(10)
+  integer :: i
+
+  !$omp do
+  do i=1, 10
+    !$omp target map(tofrom: v)
+    !$omp end target
+  end do
+end subroutine
+
+! CHECK-NOT: define void @nested_target_in_parallel_with_private
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_parallel_with_private_{{.*}}(ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}})
+subroutine nested_target_in_parallel_with_private(v)
+  implicit none
+  integer, intent(inout) :: v(10)
+  integer :: x
+  x = 10
+
+  !$omp parallel firstprivate(x)
+    !$omp target map(tofrom: v(1:x))
+    !$omp end target
+  !$omp end parallel
+end subroutine
+
+! CHECK-NOT: define void @nested_target_in_task_with_private
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_task_with_private_{{.*}}(ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}})
+subroutine nested_target_in_task_with_private(v)
+  implicit none
+  integer, intent(inout) :: v(10)
+  integer :: x
+  x = 10
+
+  !$omp task firstprivate(x)
+    !$omp target map(tofrom: v(1:x))
+    !$omp end target
+  !$omp end task
+end subroutine
+
+! CHECK-NOT: define void @target_and_atomic_update
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_target_and_atomic_update_{{.*}}(ptr %{{.*}})
+subroutine target_and_atomic_update(x, expr)
+  implicit none
+  integer, intent(inout) :: x, expr
+
+  !$omp target
+  !$omp end target
+
+  !$omp atomic update
+  x = x + expr
+end subroutine
+
+! CHECK-NOT: define void @nested_target_in_associate
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_associate_{{.*}}(ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}})
+subroutine nested_target_in_associate(x)
+  integer, pointer, contiguous :: x(:)
+  associate(y => x)
+    !$omp target map(tofrom: y)
+    !$omp end target
+  end associate
+end subroutine
diff --git a/flang/test/Integration/OpenMP/task-target-device.f90 b/flang/test/Integration/OpenMP/task-target-device.f90
new file mode 100644
index 0000000000000..b92dee65e3f7f
--- /dev/null
+++ b/flang/test/Integration/OpenMP/task-target-device.f90
@@ -0,0 +1,37 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+!REQUIRES: amdgpu-registered-target
+!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-llvm -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s
+
+! This tests the fix for https://github.com/llvm/llvm-project/issues/84606
+! We are only interested in ensuring that the -mlir-to-llmvir pass doesn't crash.
+
+! CHECK: define weak_odr protected amdgpu_kernel void @{{.*}}QQmain{{.*}}({{.*}})
+program main
+  implicit none
+  integer, parameter :: N = 5
+  integer, dimension(5) :: a
+  integer :: i
+  integer :: target_a = 0
+
+  !$omp task depend(out:a)
+  do i = 1, N
+    a(i) = i
+  end do
+  !$omp end task
+
+  !$omp target map(tofrom:target_a) map(tofrom:a)
+  do i = 1, N
+    target_a = target_a + i
+    a(i) = a(i) + i
+  end do
+  !$omp end target
+  print*, target_a
+  print*, a
+end program main
diff --git a/flang/test/Integration/OpenMP/threadprivate-target-device.f90 b/flang/test/Integration/OpenMP/threadprivate-target-device.f90
new file mode 100644
index 0000000000000..662d6c6357af0
--- /dev/null
+++ b/flang/test/Integration/OpenMP/threadprivate-target-device.f90
@@ -0,0 +1,40 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+!REQUIRES: amdgpu-registered-target
+!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-llvm -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s
+
+! The aim of this test is to verify host threadprivate directives do not cause
+! crashes during OpenMP target device codegen when used in conjunction with
+! target code in the same function.
+
+! CHECK: define weak_odr protected amdgpu_kernel void @{{.*}}(ptr %{{.*}}, ptr %[[ARG1:.*]], ptr %[[ARG2:.*]]) #{{[0-9]+}} {
+! CHECK:  %[[ALLOCA_X:.*]] = alloca ptr, align 8, addrspace(5)
+! CHECK:  %[[ASCAST_X:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA_X]] to ptr
+! CHECK:  store ptr %[[ARG1]], ptr %[[ASCAST_X]], align 8
+
+! CHECK:  %[[ALLOCA_N:.*]] = alloca ptr, align 8, addrspace(5)
+! CHECK:  %[[ASCAST_N:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA_N]] to ptr
+! CHECK:  store ptr %[[ARG2]], ptr %[[ASCAST_N]], align 8
+
+! CHECK:  %[[LOAD_X:.*]] = load ptr, ptr %[[ASCAST_X]], align 8
+! CHECK:  call void @bar_(ptr %[[LOAD_X]], ptr %[[ASCAST_N]])
+
+module test
+  implicit none
+  integer :: n
+  !$omp threadprivate(n)
+  
+  contains
+  subroutine foo(x)
+    integer, intent(inout) :: x(10)
+    !$omp target map(tofrom: x(1:n))
+      call bar(x, n)
+    !$omp end target
+  end subroutine
+end module
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 6694de8383534..8323ca97a1dc3 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3499,19 +3499,14 @@ convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
   LLVM::GlobalOp global =
       addressOfOp.getGlobal(moduleTranslation.symbolTable());
   llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
-
-  if (!ompBuilder->Config.isTargetDevice()) {
-    llvm::Type *type = globalValue->getValueType();
-    llvm::TypeSize typeSize =
-        builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
-            type);
-    llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue());
-    llvm::Value *callInst = ompBuilder->createCachedThreadPrivate(
-        ompLoc, globalValue, size, global.getSymName() + ".cache");
-    moduleTranslation.mapValue(opInst.getResult(0), callInst);
-  } else {
-    moduleTranslation.mapValue(opInst.getResult(0), globalValue);
-  }
+  llvm::Type *type = globalValue->getValueType();
+  llvm::TypeSize typeSize =
+      builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
+          type);
+  llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue());
+  llvm::Value *callInst = ompBuilder->createCachedThreadPrivate(
+      ompLoc, globalValue, size, global.getSymName() + ".cache");
+  moduleTranslation.mapValue(opInst.getResult(0), callInst);
 
   return success();
 }
@@ -5852,33 +5847,154 @@ convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
   return success();
 }
 
-// Returns true if the operation is inside a TargetOp or
-// is part of a declare target function.
-static bool isTargetDeviceOp(Operation *op) {
-  // Assumes no reverse offloading
-  if (op->getParentOfType<omp::TargetOp>())
-    return true;
+namespace {
 
-  // Certain operations return results, and whether utilised in host or
-  // target there is a chance an LLVM Dialect operation depends on it
-  // by taking it in as an operand, so we must always lower these in
-  // some manner or result in an ICE (whether they end up in a no-op
-  // or otherwise).
-  if (mlir::isa<omp::ThreadprivateOp>(op))
-    return true;
+/// Implementation of the dialect interface that converts operations belonging
+/// to the OpenMP dialect to LLVM IR.
+class OpenMPDialectLLVMIRTranslationInterface
+    : public LLVMTranslationDialectInterface {
+public:
+  using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface;
 
-  if (mlir::isa<omp::TargetAllocMemOp>(op) ||
-      mlir::isa<omp::TargetFreeMemOp>(op))
-    return true;
+  /// Translates the given operation to LLVM IR using the provided IR builder
+  /// and saving the state in `moduleTranslation`.
+  LogicalResult
+  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
+                   LLVM::ModuleTranslation &moduleTranslation) const final;
 
-  if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>())
+  /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR,
+  /// runtime calls, or operation amendments
+  LogicalResult
+  amendOperation(Operation *op, ArrayRef<llvm::Instruction *> instructions,
+                 NamedAttribute attribute,
+                 LLVM::ModuleTranslation &moduleTranslation) const final;
+};
+
+} // namespace
+
+LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
+    Operation *op, ArrayRef<llvm::Instruction *> instructions,
+    NamedAttribute attribute,
+    LLVM::ModuleTranslation &moduleTranslation) const {
+  return llvm::StringSwitch<llvm::function_ref<LogicalResult(Attribute)>>(
+             attribute.getName())
+      .Case("omp.is_target_device",
+            [&](Attribute attr) {
+              if (auto deviceAttr = dyn_cast<BoolAttr>(attr)) {
+                llvm::OpenMPIRBuilderConfig &config =
+                    moduleTranslation.getOpenMPBuilder()->Config;
+                config.setIsTargetDevice(deviceAttr.getValue());
+                return success();
+              }
+              return failure();
+            })
+      .Case("omp.is_gpu",
+            [&](Attribute attr) {
+              if (auto gpuAttr = dyn_cast<BoolAttr>(attr)) {
+                llvm::OpenMPIRBuilderConfig &config =
+                    moduleTranslation.getOpenMPBuilder()->Config;
+                config.setIsGPU(gpuAttr.getValue());
+                return success();
+              }
+              return failure();
+            })
+      .Case("omp.host_ir_filepath",
+            [&](Attribute attr) {
+              if (auto filepathAttr = dyn_cast<StringAttr>(attr)) {
+                llvm::OpenMPIRBuilder *ompBuilder =
+                    moduleTranslation.getOpenMPBuilder();
+                ompBuilder->loadOffloadInfoMetadata(filepathAttr.getValue());
+                return success();
+              }
+              return failure();
+            })
+      .Case("omp.flags",
+            [&](Attribute attr) {
+              if (auto rtlAttr = dyn_cast<omp::FlagsAttr>(attr))
+                return convertFlagsAttr(op, rtlAttr, moduleTranslation);
+              return failure();
+            })
+      .Case("omp.version",
+            [&](Attribute attr) {
+              if (auto versionAttr = dyn_cast<omp::VersionAttr>(attr)) {
+                llvm::OpenMPIRBuilder *ompBuilder =
+                    moduleTranslation.getOpenMPBuilder();
+                ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp",
+                                            versionAttr.getVersion());
+                return success();
+              }
+              return failure();
+            })
+      .Case("omp.declare_target",
+            [&](Attribute attr) {
+              if (auto declareTargetAttr =
+                      dyn_cast<omp::DeclareTargetAttr>(attr))
+                return convertDeclareTargetAttr(op, declareTargetAttr,
+                                                moduleTranslation);
+              return failure();
+            })
+      .Case("omp.requires",
+            [&](Attribute attr) {
+              if (auto requiresAttr = dyn_cast<omp::ClauseRequiresAttr>(attr)) {
+                using Requires = omp::ClauseRequires;
+                Requires flags = requiresAttr.getValue();
+                llvm::OpenMPIRBuilderConfig &config =
+                    moduleTranslation.getOpenMPBuilder()->Config;
+                config.setHasRequiresReverseOffload(
+                    bitEnumContainsAll(flags, Requires::reverse_offload));
+                config.setHasRequiresUnifiedAddress(
+                    bitEnumContainsAll(flags, Requires::unified_address));
+                config.setHasRequiresUnifiedSharedMemory(
+                    bitEnumContainsAll(flags, Requires::unified_shared_memory));
+                config.setHasRequiresDynamicAllocators(
+                    bitEnumContainsAll(flags, Requires::dynamic_allocators));
+                return success();
+              }
+              return failure();
+            })
+      .Case("omp.target_triples",
+            [&](Attribute attr) {
+              if (auto triplesAttr = dyn_cast<ArrayAttr>(attr)) {
+                llvm::OpenMPIRBuilderConfig &config =
+                    moduleTranslation.getOpenMPBuilder()->Config;
+                config.TargetTriples.clear();
+                config.TargetTriples.reserve(triplesAttr.size());
+                for (Attribute tripleAttr : triplesAttr) {
+                  if (auto tripleStrAttr = dyn_cast<StringAttr>(tripleAttr))
+                    config.TargetTriples.emplace_back(tripleStrAttr.getValue());
+                  else
+                    return failure();
+                }
+                return success();
+              }
+              return failure();
+            })
+      .Default([](Attribute) {
+        // Fall through for omp attributes that do not require lowering.
+        return success();
+      })(attribute.getValue());
+
+  return failure();
+}
+
+// Returns true if the operation is not inside a TargetOp, it is part of a
+// function and that function is not declare target.
+static bool isHostDeviceOp(Operation *op) {
+  // Assumes no reverse offloading
+  if (op->getParentOfType<omp::TargetOp>())
+    return false;
+
+  if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>()) {
     if (auto declareTargetIface =
             llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
                 parentFn.getOperation()))
       if (declareTargetIface.isDeclareTarget() &&
           declareTargetIface.getDeclareTargetDeviceType() !=
               mlir::omp::DeclareTargetDeviceType::host)
-        return true;
+        return false;
+
+    return true;
+  }
 
   return false;
 }
@@ -5964,11 +6080,18 @@ convertTargetFreeMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
 
 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR (including
 /// OpenMP runtime calls).
-static LogicalResult
-convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
-                             LLVM::ModuleTranslation &moduleTranslation) {
+LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
+    Operation *op, llvm::IRBuilderBase &builder,
+    LLVM::ModuleTranslation &moduleTranslation) const {
   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
 
+  if (ompBuilder->Config.isTargetDevice() &&
+      !isa<omp::TargetOp, omp::TargetDataOp, omp::TargetEnterDataOp,
+           omp::TargetExitDataOp, omp::TargetUpdateOp, omp::MapInfoOp,
+           omp::TerminatorOp, omp::YieldOp>(op) &&
+      isHostDeviceOp(op))
+    return op->emitOpError() << "unsupported host op found in device";
+
   // For each loop, introduce one stack frame to hold loop information. Ensure
   // this is only done for the outermost loop wrapper to prevent introducing
   // multiple stack frames for a single loop. Initially set to null, the loop
@@ -6153,238 +6276,6 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
   return result;
 }
 
-static LogicalResult
-convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder,
-                      LLVM::ModuleTranslation &moduleTranslation) {
-  return convertHostOrTargetOperation(op, builder, moduleTranslation);
-}
-
-static LogicalResult
-convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
-                       LLVM::ModuleTranslation &moduleTranslation) {
-  if (isa<omp::TargetOp>(op))
-    return convertOmpTarget(*op, builder, moduleTranslation);
-  if (isa<omp::TargetDataOp>(op))
-    return convertOmpTargetData(op, builder, moduleTranslation);
-  bool interrupted =
-      op->walk<WalkOrder::PreOrder>([&](Operation *oper) {
-          if (isa<omp::TargetOp>(oper)) {
-            if (failed(convertOmpTarget(*oper, builder, moduleTranslation)))
-              return WalkResult::interrupt();
-            return WalkResult::skip();
-          }
-          if (isa<omp::TargetDataOp>(oper)) {
-            if (failed(convertOmpTargetData(oper, builder, moduleTranslation)))
-              return WalkResult::interrupt();
-            return WalkResult::skip();
-          }
-
-          // Non-target ops might nest target-related ops, therefore, we
-          // translate them as non-OpenMP scopes. Translating them is needed by
-          // nested target-related ops since they might need LLVM values defined
-          // in their parent non-target ops.
-          if (isa<omp::OpenMPDialect>(oper->getDialect()) &&
-              oper->getParentOfType<LLVM::LLVMFuncOp>() &&
-              !oper->getRegions().empty()) {
-            if (auto blockArgsIface =
-                    dyn_cast<omp::BlockArgOpenMPOpInterface>(oper))
-              forwardArgs(moduleTranslation, blockArgsIface);
-            else {
-              // Here we map entry block arguments of
-              // non-BlockArgOpenMPOpInterface ops if they can be encountered
-              // inside of a function and they define any of these arguments.
-              if (isa<mlir::omp::AtomicUpdateOp>(oper))
-                for (auto [operand, arg] :
-                     llvm::zip_equal(oper->getOperands(),
-                                     oper->getRegion(0).getArguments())) {
-                  moduleTranslation.mapValue(
-                      arg, builder.CreateLoad(
-                               moduleTranslation.convertType(arg.getType()),
-                               moduleTranslation.lookupValue(operand)));
-                }
-            }
-
-            if (auto loopNest = dyn_cast<omp::LoopNestOp>(oper)) {
-              assert(builder.GetInsertBlock() &&
-                     "No insert block is set for the builder");
-              for (auto iv : loopNest.getIVs()) {
-                // Map iv to an undefined value just to keep the IR validity.
-                moduleTranslation.mapValue(
-                    iv, llvm::PoisonValue::get(
-                            moduleTranslation.convertType(iv.getType())));
-              }
-            }
-
-            for (Region &region : oper->getRegions()) {
-              // Regions are fake in the sense that they are not a truthful
-              // translation of the OpenMP construct being converted (e.g. no
-              // OpenMP runtime calls will be generated). We just need this to
-              // prepare the kernel invocation args.
-              SmallVector<llvm::PHINode *> phis;
-              auto result = convertOmpOpRegions(
-                  region, oper->getName().getStringRef().str() + ".fake.region",
-                  builder, moduleTranslation, &phis);
-              if (failed(handleError(result, *oper)))
-                return WalkResult::interrupt();
-
-              builder.SetInsertPoint(result.get(), result.get()->end());
-            }
-
-            return WalkResult::skip();
-          }
-
-          return WalkResult::advance();
-        }).wasInterrupted();
-  return failure(interrupted);
-}
-
-namespace {
-
-/// Implementation of the dialect interface that converts operations belonging
-/// to the OpenMP dialect to LLVM IR.
-class OpenMPDialectLLVMIRTranslationInterface
-    : public LLVMTranslationDialectInterface {
-public:
-  using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface;
-
-  /// Translates the given operation to LLVM IR using the provided IR builder
-  /// and saving the state in `moduleTranslation`.
-  LogicalResult
-  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
-                   LLVM::ModuleTranslation &moduleTranslation) const final;
-
-  /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR,
-  /// runtime calls, or operation amendments
-  LogicalResult
-  amendOperation(Operation *op, ArrayRef<llvm::Instruction *> instructions,
-                 NamedAttribute attribute,
-                 LLVM::ModuleTranslation &moduleTranslation) const final;
-};
-
-} // namespace
-
-LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
-    Operation *op, ArrayRef<llvm::Instruction *> instructions,
-    NamedAttribute attribute,
-    LLVM::ModuleTranslation &moduleTranslation) const {
-  return llvm::StringSwitch<llvm::function_ref<LogicalResult(Attribute)>>(
-             attribute.getName())
-      .Case("omp.is_target_device",
-            [&](Attribute attr) {
-              if (auto deviceAttr = dyn_cast<BoolAttr>(attr)) {
-                llvm::OpenMPIRBuilderConfig &config =
-                    moduleTranslation.getOpenMPBuilder()->Config;
-                config.setIsTargetDevice(deviceAttr.getValue());
-                return success();
-              }
-              return failure();
-            })
-      .Case("omp.is_gpu",
-            [&](Attribute attr) {
-              if (auto gpuAttr = dyn_cast<BoolAttr>(attr)) {
-                llvm::OpenMPIRBuilderConfig &config =
-                    moduleTranslation.getOpenMPBuilder()->Config;
-                config.setIsGPU(gpuAttr.getValue());
-                return success();
-              }
-              return failure();
-            })
-      .Case("omp.host_ir_filepath",
-            [&](Attribute attr) {
-              if (auto filepathAttr = dyn_cast<StringAttr>(attr)) {
-                llvm::OpenMPIRBuilder *ompBuilder =
-                    moduleTranslation.getOpenMPBuilder();
-                ompBuilder->loadOffloadInfoMetadata(filepathAttr.getValue());
-                return success();
-              }
-              return failure();
-            })
-      .Case("omp.flags",
-            [&](Attribute attr) {
-              if (auto rtlAttr = dyn_cast<omp::FlagsAttr>(attr))
-                return convertFlagsAttr(op, rtlAttr, moduleTranslation);
-              return failure();
-            })
-      .Case("omp.version",
-            [&](Attribute attr) {
-              if (auto versionAttr = dyn_cast<omp::VersionAttr>(attr)) {
-                llvm::OpenMPIRBuilder *ompBuilder =
-                    moduleTranslation.getOpenMPBuilder();
-                ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp",
-                                            versionAttr.getVersion());
-                return success();
-              }
-              return failure();
-            })
-      .Case("omp.declare_target",
-            [&](Attribute attr) {
-              if (auto declareTargetAttr =
-                      dyn_cast<omp::DeclareTargetAttr>(attr))
-                return convertDeclareTargetAttr(op, declareTargetAttr,
-                                                moduleTranslation);
-              return failure();
-            })
-      .Case("omp.requires",
-            [&](Attribute attr) {
-              if (auto requiresAttr = dyn_cast<omp::ClauseRequiresAttr>(attr)) {
-                using Requires = omp::ClauseRequires;
-                Requires flags = requiresAttr.getValue();
-                llvm::OpenMPIRBuilderConfig &config =
-                    moduleTranslation.getOpenMPBuilder()->Config;
-                config.setHasRequiresReverseOffload(
-                    bitEnumContainsAll(flags, Requires::reverse_offload));
-                config.setHasRequiresUnifiedAddress(
-                    bitEnumContainsAll(flags, Requires::unified_address));
-                config.setHasRequiresUnifiedSharedMemory(
-                    bitEnumContainsAll(flags, Requires::unified_shared_memory));
-                config.setHasRequiresDynamicAllocators(
-                    bitEnumContainsAll(flags, Requires::dynamic_allocators));
-                return success();
-              }
-              return failure();
-            })
-      .Case("omp.target_triples",
-            [&](Attribute attr) {
-              if (auto triplesAttr = dyn_cast<ArrayAttr>(attr)) {
-                llvm::OpenMPIRBuilderConfig &config =
-                    moduleTranslation.getOpenMPBuilder()->Config;
-                config.TargetTriples.clear();
-                config.TargetTriples.reserve(triplesAttr.size());
-                for (Attribute tripleAttr : triplesAttr) {
-                  if (auto tripleStrAttr = dyn_cast<StringAttr>(tripleAttr))
-                    config.TargetTriples.emplace_back(tripleStrAttr.getValue());
-                  else
-                    return failure();
-                }
-                return success();
-              }
-              return failure();
-            })
-      .Default([](Attribute) {
-        // Fall through for omp attributes that do not require lowering.
-        return success();
-      })(attribute.getValue());
-
-  return failure();
-}
-
-/// Given an OpenMP MLIR operation, create the corresponding LLVM IR
-/// (including OpenMP runtime calls).
-LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
-    Operation *op, llvm::IRBuilderBase &builder,
-    LLVM::ModuleTranslation &moduleTranslation) const {
-
-  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
-  if (ompBuilder->Config.isTargetDevice()) {
-    if (isTargetDeviceOp(op)) {
-      return convertTargetDeviceOp(op, builder, moduleTranslation);
-    } else {
-      return convertTargetOpsInNest(op, builder, moduleTranslation);
-    }
-  }
-  return convertHostOrTargetOperation(op, builder, moduleTranslation);
-}
-
 void mlir::registerOpenMPDialectTranslation(DialectRegistry &registry) {
   registry.insert<omp::OpenMPDialect>();
   registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) {
diff --git a/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir b/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir
index ed66ff2c9ad7e..f21ffc45c8bcc 100644
--- a/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir
@@ -3,21 +3,16 @@
 module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
   llvm.func @_QQmain() attributes {bindc_name = "main"} {
     %0 = llvm.mlir.addressof @_QFEsp : !llvm.ptr
-    %1 = llvm.mlir.constant(10 : index) : i64
-    %2 = llvm.mlir.constant(1 : index) : i64
-    %3 = llvm.mlir.constant(0 : index) : i64
-    %4 = llvm.mlir.constant(9 : index) : i64
-    %5 = omp.map.bounds lower_bound(%3 : i64) upper_bound(%4 : i64) extent(%1 : i64) stride(%2 : i64) start_idx(%2 : i64)
-    %6 = omp.map.info var_ptr(%0 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%5) -> !llvm.ptr {name = "sp"}
-    omp.target map_entries(%6 -> %arg0 : !llvm.ptr) {
-      %7 = llvm.mlir.constant(20 : i32) : i32
-      %8 = llvm.mlir.constant(0 : i64) : i64
-      %9 = llvm.getelementptr %arg0[0, %8] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<10 x i32>
-      llvm.store %7, %9 : i32, !llvm.ptr
-      %10 = llvm.mlir.constant(10 : i32) : i32
-      %11 = llvm.mlir.constant(4 : i64) : i64
-      %12 = llvm.getelementptr %arg0[0, %11] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<10 x i32>
-      llvm.store %10, %12 : i32, !llvm.ptr
+    %1 = omp.map.info var_ptr(%0 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sp"}
+    omp.target map_entries(%1 -> %arg0 : !llvm.ptr) {
+      %2 = llvm.mlir.constant(20 : i32) : i32
+      %3 = llvm.mlir.constant(0 : i64) : i64
+      %4 = llvm.getelementptr %arg0[0, %3] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<10 x i32>
+      llvm.store %2, %4 : i32, !llvm.ptr
+      %5 = llvm.mlir.constant(10 : i32) : i32
+      %6 = llvm.mlir.constant(4 : i64) : i64
+      %7 = llvm.getelementptr %arg0[0, %6] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<10 x i32>
+      llvm.store %5, %7 : i32, !llvm.ptr
       omp.terminator
     }
     llvm.return
diff --git a/mlir/test/Target/LLVMIR/omptarget-debug-loop-loc.mlir b/mlir/test/Target/LLVMIR/omptarget-debug-loop-loc.mlir
index c3320382f8d45..aa4c1f0354fdc 100644
--- a/mlir/test/Target/LLVMIR/omptarget-debug-loop-loc.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-debug-loop-loc.mlir
@@ -4,31 +4,25 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
   omp.private {type = private} @_QFEj_private_i32 : i32 loc(#loc1)
   omp.private {type = private} @_QFEi_private_i32 : i32 loc(#loc1)
   llvm.func @test() {
-    %3 = llvm.mlir.constant(1 : i64) : i64
-    %4 = llvm.alloca %3 x i32 {bindc_name = "j"} : (i64) -> !llvm.ptr<5> loc(#loc4)
-    %5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr loc(#loc4)
-    %6 = llvm.mlir.constant(1 : i64) : i64
-    %7 = llvm.alloca %6 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5> loc(#loc4)
-    %8 = llvm.addrspacecast %7 : !llvm.ptr<5> to !llvm.ptr
-    %9 = llvm.mlir.constant(16383 : index) : i64
-    %10 = llvm.mlir.constant(0 : index) : i64
-    %11 = llvm.mlir.constant(1 : index) : i64
-    %12 = llvm.mlir.constant(16384 : i32) : i32
-    %14 = llvm.mlir.addressof @_QFEarray : !llvm.ptr
-    %18 = omp.map.info var_ptr(%8 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} loc(#loc3)
-    %20 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "j"} loc(#loc3)
-    %22 = omp.map.bounds lower_bound(%10 : i64) upper_bound(%9 : i64) extent(%9 : i64) stride(%11 : i64) start_idx(%11 : i64) loc(#loc3)
-    %23 = omp.map.info var_ptr(%14 : !llvm.ptr, !llvm.array<16384 x i32>) map_clauses(implicit, tofrom) capture(ByRef) bounds(%22) -> !llvm.ptr {name = "array"} loc(#loc3)
-    %24 = omp.map.info var_ptr(%8 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} loc(#loc3)
-    omp.target map_entries(%18 -> %arg0, %20 -> %arg2, %23 -> %arg4, %24 -> %arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {
-      %25 = llvm.mlir.constant(1 : i32) : i32
-      %27 = llvm.mlir.constant(16384 : i32) : i32
+    %0 = llvm.mlir.constant(1 : i64) : i64
+    %1 = llvm.alloca %0 x i32 {bindc_name = "j"} : (i64) -> !llvm.ptr<5> loc(#loc4)
+    %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr loc(#loc4)
+    %3 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5> loc(#loc4)
+    %4 = llvm.addrspacecast %3 : !llvm.ptr<5> to !llvm.ptr
+    %5 = llvm.mlir.addressof @_QFEarray : !llvm.ptr
+    %6 = omp.map.info var_ptr(%4 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} loc(#loc3)
+    %7 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "j"} loc(#loc3)
+    %8 = omp.map.info var_ptr(%5 : !llvm.ptr, !llvm.array<16384 x i32>) map_clauses(implicit, tofrom) capture(ByRef) -> !llvm.ptr {name = "array"} loc(#loc3)
+    %9 = omp.map.info var_ptr(%4 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} loc(#loc3)
+    omp.target map_entries(%6 -> %arg0, %7 -> %arg2, %8 -> %arg4, %9 -> %arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+      %10 = llvm.mlir.constant(1 : i32) : i32
+      %11 = llvm.mlir.constant(16384 : i32) : i32
       omp.teams {
         omp.distribute private(@_QFEi_private_i32 %arg5 -> %arg6 : !llvm.ptr) {
-          omp.loop_nest (%arg7) : i32 = (%25) to (%27) inclusive step (%25) {
+          omp.loop_nest (%arg7) : i32 = (%10) to (%11) inclusive step (%10) {
             omp.parallel {
               omp.wsloop private(@_QFEj_private_i32 %arg2 -> %arg8 : !llvm.ptr) {
-                omp.loop_nest (%arg9) : i32 = (%25) to (%27) inclusive step (%25) {
+                omp.loop_nest (%arg9) : i32 = (%10) to (%11) inclusive step (%10) {
                   llvm.store %arg9, %arg8 : i32, !llvm.ptr loc(#loc9)
                   omp.yield
                 } loc(#loc9)
diff --git a/mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir b/mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir
index 8f42995af23a8..f5ed9646cf33c 100644
--- a/mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir
@@ -32,19 +32,14 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
   llvm.func @test() {
     %0 = llvm.mlir.constant(1 : i64) : i64
     %1 = llvm.alloca %0 x f32 : (i64) -> !llvm.ptr<5>
-    %4 = llvm.alloca %0 x i32 : (i64) -> !llvm.ptr<5>
+    %2 = llvm.alloca %0 x i32 : (i64) -> !llvm.ptr<5>
     %ascast = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
-    %ascast2 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr
-    %6 = llvm.mlir.constant(9 : index) : i64
-    %7 = llvm.mlir.constant(0 : index) : i64
-    %8 = llvm.mlir.constant(1 : index) : i64
-    %10 = llvm.mlir.constant(10 : index) : i64
-    %11 = llvm.mlir.addressof @_QFEarr : !llvm.ptr
-    %14 = omp.map.info var_ptr(%ascast : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
-    %15 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%6 : i64) extent(%10 : i64) stride(%8 : i64) start_idx(%8 : i64)
-    %16 = omp.map.info var_ptr(%11 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%15) -> !llvm.ptr
-    %17 = omp.map.info var_ptr(%ascast2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr
-    omp.target map_entries(%14 -> %arg0, %16 -> %arg1, %17 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+    %ascast2 = llvm.addrspacecast %2 : !llvm.ptr<5> to !llvm.ptr
+    %3 = llvm.mlir.addressof @_QFEarr : !llvm.ptr
+    %4 = omp.map.info var_ptr(%ascast : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
+    %5 = omp.map.info var_ptr(%3 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
+    %6 = omp.map.info var_ptr(%ascast2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr
+    omp.target map_entries(%4 -> %arg0, %5 -> %arg1, %6 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
       llvm.intr.dbg.declare #var_x = %arg0 : !llvm.ptr
       llvm.intr.dbg.declare #var_arr = %arg1 : !llvm.ptr
       llvm.intr.dbg.declare #var_i = %arg2 : !llvm.ptr
diff --git a/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir b/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir
index 13c18401cafab..ce9a4dcbd55be 100644
--- a/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir
@@ -7,49 +7,36 @@
 module attributes {llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} {
   omp.private {type = private} @_QFEk_private_i32 : i32
   llvm.func @_QQmain()  {
-    %0 = llvm.mlir.constant(1 : i32) : i32
-    %7 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
-    %8 = llvm.addrspacecast %7 : !llvm.ptr<5> to !llvm.ptr
-    %12 = llvm.mlir.constant(1 : i64) : i64
-    %13 = llvm.alloca %12 x i32 {bindc_name = "k"} : (i64) -> !llvm.ptr<5>
-    %14 = llvm.addrspacecast %13 : !llvm.ptr<5> to !llvm.ptr
-    %15 = llvm.mlir.constant(1 : i64) : i64
-    %16 = llvm.alloca %15 x i32 {bindc_name = "b"} : (i64) -> !llvm.ptr<5>
-    %17 = llvm.addrspacecast %16 : !llvm.ptr<5> to !llvm.ptr
-    %19 = llvm.mlir.constant(1 : index) : i64
-    %20 = llvm.mlir.constant(0 : index) : i64
-    %22 = llvm.mlir.addressof @_QFEa : !llvm.ptr
-    %25 = llvm.mlir.addressof @_QFECnz : !llvm.ptr
-    %60 = llvm.getelementptr %8[0, 7, %20, 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
-    %61 = llvm.load %60 : !llvm.ptr -> i64
-    %62 = llvm.getelementptr %8[0, 7, %20, 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
-    %63 = llvm.load %62 : !llvm.ptr -> i64
-    %64 = llvm.getelementptr %8[0, 7, %20, 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
-    %65 = llvm.load %64 : !llvm.ptr -> i64
-    %66 = llvm.sub %63, %19 : i64
-    %67 = omp.map.bounds lower_bound(%20 : i64) upper_bound(%66 : i64) extent(%63 : i64) stride(%65 : i64) start_idx(%61 : i64) {stride_in_bytes = true}
-    %68 = llvm.getelementptr %22[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
-    %69 = omp.map.info var_ptr(%22 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%68 : !llvm.ptr) bounds(%67) -> !llvm.ptr {name = ""}
-    %70 = omp.map.info var_ptr(%22 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(to) capture(ByRef) members(%69 : [0] : !llvm.ptr) -> !llvm.ptr {name = "a"}
-    %71 = omp.map.info var_ptr(%17 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "b"}
-    %72 = omp.map.info var_ptr(%14 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "k"}
-    %73 = omp.map.info var_ptr(%25 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "nz"}
-    omp.target map_entries(%70 -> %arg0, %71 -> %arg1, %72 -> %arg2, %73 -> %arg3, %69 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {
-      %106 = llvm.mlir.constant(0 : index) : i64
-      %107 = llvm.mlir.constant(13 : i32) : i32
-      %108 = llvm.mlir.constant(1000 : i32) : i32
-      %109 = llvm.mlir.constant(1 : i32) : i32
+    %0 = llvm.mlir.constant(1 : i64) : i64
+    %1 = llvm.alloca %0 x i32 {bindc_name = "k"} : (i64) -> !llvm.ptr<5>
+    %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+    %3 = llvm.mlir.constant(1 : i64) : i64
+    %4 = llvm.alloca %3 x i32 {bindc_name = "b"} : (i64) -> !llvm.ptr<5>
+    %5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr
+    %6 = llvm.mlir.addressof @_QFEa : !llvm.ptr
+    %7 = llvm.mlir.addressof @_QFECnz : !llvm.ptr
+    %8 = llvm.getelementptr %6[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+    %9 = omp.map.info var_ptr(%6 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%8 : !llvm.ptr) -> !llvm.ptr {name = ""}
+    %10 = omp.map.info var_ptr(%6 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(to) capture(ByRef) members(%9 : [0] : !llvm.ptr) -> !llvm.ptr {name = "a"}
+    %11 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "b"}
+    %12 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "k"}
+    %13 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "nz"}
+    omp.target map_entries(%10 -> %arg0, %11 -> %arg1, %12 -> %arg2, %13 -> %arg3, %9 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+      %14 = llvm.mlir.constant(0 : index) : i64
+      %15 = llvm.mlir.constant(13 : i32) : i32
+      %16 = llvm.mlir.constant(1000 : i32) : i32
+      %17 = llvm.mlir.constant(1 : i32) : i32
       omp.teams {
         omp.parallel private(@_QFEk_private_i32 %arg2 -> %arg5 : !llvm.ptr) {
-          %110 = llvm.mlir.constant(1 : i32) : i32
-          %111 = llvm.alloca %110 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
-          %112 = llvm.addrspacecast %111 : !llvm.ptr<5> to !llvm.ptr
+          %18 = llvm.mlir.constant(1 : i32) : i32
+          %19 = llvm.alloca %18 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
+          %20 = llvm.addrspacecast %19 : !llvm.ptr<5> to !llvm.ptr
           omp.distribute {
             omp.wsloop {
-              omp.loop_nest (%arg6) : i32 = (%109) to (%108) inclusive step (%109) {
+              omp.loop_nest (%arg6) : i32 = (%17) to (%16) inclusive step (%17) {
                 llvm.store %arg6, %arg5  : i32, !llvm.ptr
                 %115 = llvm.mlir.constant(48 : i32) : i32
-                "llvm.intr.memcpy"(%112, %arg0, %115) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+                "llvm.intr.memcpy"(%20, %arg0, %115) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
                 omp.yield
               }
             } {omp.composite}
diff --git a/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir b/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir
deleted file mode 100644
index 16be0773bd14b..0000000000000
--- a/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir
+++ /dev/null
@@ -1,43 +0,0 @@
-// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-
-module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
-  llvm.func @omp_target_region_() {
-    %0 = llvm.mlir.constant(20 : i32) : i32
-    %1 = llvm.mlir.constant(10 : i32) : i32
-    %2 = llvm.mlir.constant(1 : i64) : i64
-    %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr<5>
-    %ascast = llvm.addrspacecast %3 : !llvm.ptr<5> to !llvm.ptr
-    %4 = llvm.mlir.constant(1 : i64) : i64
-    %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr<5>
-    %ascast2 = llvm.addrspacecast %5 : !llvm.ptr<5> to !llvm.ptr
-    %6 = llvm.mlir.constant(1 : i64) : i64
-    %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr<5>
-    %ascast3 = llvm.addrspacecast %7 : !llvm.ptr<5> to !llvm.ptr
-    llvm.store %1, %ascast : i32, !llvm.ptr
-    llvm.store %0, %ascast2 : i32, !llvm.ptr
-    omp.task {
-        %map1 = omp.map.info var_ptr(%ascast : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
-        %map2 = omp.map.info var_ptr(%ascast2 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
-        %map3 = omp.map.info var_ptr(%ascast3 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
-      omp.target map_entries(%map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
-        %8 = llvm.load %arg0 : !llvm.ptr -> i32
-        %9 = llvm.load %arg1 : !llvm.ptr -> i32
-        %10 = llvm.add %8, %9  : i32
-        llvm.store %10, %arg2 : i32, !llvm.ptr
-        omp.terminator
-      }
-      omp.terminator
-    }
-   llvm.return
-  }
-
-  llvm.func @omp_target_no_map() {
-    omp.target {
-      omp.terminator
-    }
-    llvm.return
-  }
-}
-
-// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_{{.*}}_omp_target_region__l22
-// CHECK: ret void
diff --git a/mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir b/mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir
deleted file mode 100644
index ba182374a9e3b..0000000000000
--- a/mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir
+++ /dev/null
@@ -1,31 +0,0 @@
-// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-
-// Not intended to be a functional example, the aim of this test is to verify
-// omp.threadprivate does not crash on lowering during the OpenMP target device
-// pass when used in conjunction with target code in the same module.
-
-module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true } {
-  llvm.func @func() attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>} {
-    %0 = llvm.mlir.addressof @_QFEpointer2 : !llvm.ptr
-    %1 = omp.threadprivate %0 : !llvm.ptr -> !llvm.ptr
-    %2 = omp.map.info var_ptr(%1 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(implicit, to) capture(ByRef) -> !llvm.ptr
-    omp.target map_entries(%2 -> %arg0 : !llvm.ptr) {
-      %3 = llvm.mlir.constant(1 : i32) : i32
-      %4 = llvm.getelementptr %arg0[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
-      llvm.store %3, %4 : i32, !llvm.ptr
-      omp.terminator
-    }
-    llvm.return
-  }
-   llvm.mlir.global internal @_QFEpointer2() {addr_space = 0 : i32} : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {
-    %0 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
-    llvm.return %0 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
-  }
-}
-
-// CHECK: define weak_odr protected amdgpu_kernel void @{{.*}}(ptr %{{.*}}, ptr %[[ARG1:.*]]) #{{[0-9]+}} {
-// CHECK:  %[[ALLOCA:.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK:  %[[ALLOCA_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA]] to ptr
-// CHECK:  store ptr %[[ARG1]], ptr %[[ALLOCA_ASCAST]], align 8
-// CHECK:  %[[LOAD_ALLOCA:.*]] = load ptr, ptr %[[ALLOCA_ASCAST]], align 8
-// CHECK:  store i32 1, ptr %[[LOAD_ALLOCA]], align 4
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir b/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir
index 41bc5c4ba525f..1efea084d0dcf 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir
@@ -89,3 +89,48 @@ llvm.func @omp_threadprivate() {
   llvm.store %3, %5 : i32, !llvm.ptr
   llvm.return
 }
+
+// -----
+
+module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+  llvm.func @host_op_in_device(%arg0 : !llvm.ptr) {
+    // expected-error @below {{unsupported host op found in device}}
+    // expected-error @below {{LLVM Translation failed for operation: omp.threadprivate}}
+    %0 = omp.threadprivate %arg0 : !llvm.ptr -> !llvm.ptr
+    llvm.return
+  }
+}
+
+// -----
+
+module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+  llvm.func @host_op_in_device_nested_target(%arg0 : !llvm.ptr) {
+    // expected-error @below {{unsupported host op found in device}}
+    // expected-error @below {{LLVM Translation failed for operation: omp.parallel}}
+    omp.parallel {
+      omp.target {
+        omp.terminator
+      }
+      omp.terminator
+    }
+    llvm.return
+  }
+}
+
+// -----
+
+module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+  llvm.func @host_op_in_device_sibling_target(%x: !llvm.ptr, %expr: i32) {
+    omp.target {
+      omp.terminator
+    }
+    // expected-error @below {{unsupported host op found in device}}
+    // expected-error @below {{LLVM Translation failed for operation: omp.atomic.update}}
+    omp.atomic.update %x : !llvm.ptr {
+    ^bb0(%xval: i32):
+      %newval = llvm.add %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    llvm.return
+  }
+}
diff --git a/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir b/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir
deleted file mode 100644
index cbf273b887bc7..0000000000000
--- a/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir
+++ /dev/null
@@ -1,160 +0,0 @@
-// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-
-module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} {
-
-  omp.private {type = private} @i32_privatizer : i32
-
-  llvm.func @test_nested_target_in_parallel(%arg0: !llvm.ptr) {
-    omp.parallel {
-    %0 = llvm.mlir.constant(4 : index) : i64
-    %1 = llvm.mlir.constant(1 : index) : i64
-    %4 = omp.map.bounds   lower_bound(%1 : i64) upper_bound(%0 : i64) stride(%1 : i64) start_idx(%1 : i64)
-    %mapv1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.array<10 x i32>)   map_clauses(tofrom) capture(ByRef) bounds(%4) -> !llvm.ptr {name = ""}
-    omp.target map_entries(%mapv1 -> %map_arg : !llvm.ptr) {
-      omp.terminator
-    }
-      omp.terminator
-    }
-    llvm.return
-  }
-
-// CHECK-LABEL: define void @test_nested_target_in_parallel({{.*}}) {
-// CHECK-NEXT:    br label %omp.parallel.fake.region
-// CHECK:       omp.parallel.fake.region:
-// CHECK-NEXT:    br label %omp.region.cont
-// CHECK:       omp.region.cont:
-// CHECK-NEXT:    ret void
-// CHECK-NEXT:  }
-
-  llvm.func @test_nested_target_in_wsloop(%arg0: !llvm.ptr) {
-    %8 = llvm.mlir.constant(1 : i64) : i64
-    %9 = llvm.alloca %8 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5>
-    %ascast = llvm.addrspacecast %9 : !llvm.ptr<5> to !llvm.ptr
-    %16 = llvm.mlir.constant(10 : i32) : i32
-    %17 = llvm.mlir.constant(1 : i32) : i32
-    omp.wsloop private(@i32_privatizer %ascast -> %loop_arg : !llvm.ptr) {
-      omp.loop_nest (%arg1) : i32 = (%17) to (%16) inclusive step (%17) {
-        llvm.store %arg1, %loop_arg : i32, !llvm.ptr
-        %0 = llvm.mlir.constant(4 : index) : i64
-        %1 = llvm.mlir.constant(1 : index) : i64
-        %4 = omp.map.bounds   lower_bound(%1 : i64) upper_bound(%0 : i64) stride(%1 : i64) start_idx(%1 : i64)
-        %mapv1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.array<10 x i32>)   map_clauses(tofrom) capture(ByRef) bounds(%4) -> !llvm.ptr {name = ""}
-        omp.target map_entries(%mapv1 -> %map_arg : !llvm.ptr) {
-          omp.terminator
-        }
-        omp.yield
-      }
-    }
-    llvm.return
-  }
-
-// CHECK-LABEL: define void @test_nested_target_in_wsloop(ptr %0) {
-// CHECK-NEXT:    %{{.*}} = alloca i32, i64 1, align 4, addrspace(5)
-// CHECK-NEXT:    %{{.*}} = addrspacecast ptr addrspace(5) %{{.*}} to ptr
-// CHECK-NEXT:    br label %omp.wsloop.fake.region
-// CHECK:       omp.wsloop.fake.region:
-// CHECK-NEXT:    br label %omp.loop_nest.fake.region
-// CHECK:       omp.loop_nest.fake.region:
-// CHECK-NEXT:    store i32 poison, ptr %{{.*}}
-// CHECK-NEXT:    br label %omp.region.cont1
-// CHECK:       omp.region.cont1:
-// CHECK-NEXT:    br label %omp.region.cont
-// CHECK:       omp.region.cont:
-// CHECK-NEXT:    ret void
-// CHECK-NEXT:  }
-
-  llvm.func @test_nested_target_in_parallel_with_private(%arg0: !llvm.ptr) {
-    %8 = llvm.mlir.constant(1 : i64) : i64
-    %9 = llvm.alloca %8 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5>
-    %ascast = llvm.addrspacecast %9 : !llvm.ptr<5> to !llvm.ptr
-    omp.parallel private(@i32_privatizer %ascast -> %i_priv_arg : !llvm.ptr) {
-        %1 = llvm.mlir.constant(1 : index) : i64
-        // Use the private clause from omp.parallel to make sure block arguments
-        // are handled.
-        %i_val = llvm.load %i_priv_arg : !llvm.ptr -> i64
-        %4 = omp.map.bounds   lower_bound(%1 : i64) upper_bound(%i_val : i64) stride(%1 : i64) start_idx(%1 : i64)
-        %mapv1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.array<10 x i32>)   map_clauses(tofrom) capture(ByRef) bounds(%4) -> !llvm.ptr {name = ""}
-        omp.target map_entries(%mapv1 -> %map_arg : !llvm.ptr) {
-          omp.terminator
-        }
-        omp.terminator
-    }
-    llvm.return
-  }
-
-  llvm.func @test_nested_target_in_task_with_private(%arg0: !llvm.ptr) {
-    %8 = llvm.mlir.constant(1 : i64) : i64
-    %9 = llvm.alloca %8 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5>
-    %ascast = llvm.addrspacecast %9 : !llvm.ptr<5> to !llvm.ptr
-    omp.task private(@i32_privatizer %ascast -> %i_priv_arg : !llvm.ptr) {
-        %1 = llvm.mlir.constant(1 : index) : i64
-        // Use the private clause from omp.task to make sure block arguments
-        // are handled.
-        %i_val = llvm.load %i_priv_arg : !llvm.ptr -> i64
-        %4 = omp.map.bounds   lower_bound(%1 : i64) upper_bound(%i_val : i64) stride(%1 : i64) start_idx(%1 : i64)
-        %mapv1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.array<10 x i32>)   map_clauses(tofrom) capture(ByRef) bounds(%4) -> !llvm.ptr {name = ""}
-        omp.target map_entries(%mapv1 -> %map_arg : !llvm.ptr) {
-          omp.terminator
-        }
-        omp.terminator
-    }
-    llvm.return
-  }
-
-  llvm.func @test_target_and_atomic_update(%x: !llvm.ptr, %expr : i32) {
-    omp.target {
-      omp.terminator
-    }
-
-    omp.atomic.update %x : !llvm.ptr {
-    ^bb0(%xval: i32):
-      %newval = llvm.add %xval, %expr : i32
-      omp.yield(%newval : i32)
-    }
-
-    llvm.return
-  }
-
-// CHECK-LABEL: define void @test_nested_target_in_parallel_with_private({{.*}}) {
-// CHECK:        br label %omp.parallel.fake.region
-// CHECK:       omp.parallel.fake.region:
-// CHECK:         br label %omp.region.cont
-// CHECK:       omp.region.cont:
-// CHECK-NEXT:    ret void
-// CHECK-NEXT:  }
-
-// CHECK-LABEL: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_parallel_{{.*}} {
-// CHECK:         call i32 @__kmpc_target_init
-// CHECK:       user_code.entry:
-// CHECK:         call void @__kmpc_target_deinit()
-// CHECK:         ret void
-// CHECK:       }
-
-// CHECK-LABEL: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_test_nested_target_in_wsloop_{{.*}} {
-// CHECK:         call i32 @__kmpc_target_init
-// CHECK:       user_code.entry:
-// CHECK:         call void @__kmpc_target_deinit()
-// CHECK:         ret void
-// CHECK:       }
-
-// CHECK-LABEL: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_test_nested_target_in_parallel_with_private_{{.*}} {
-// CHECK:         call i32 @__kmpc_target_init
-// CHECK:       user_code.entry:
-// CHECK:         call void @__kmpc_target_deinit()
-// CHECK:         ret void
-// CHECK:       }
-
-// CHECK-LABEL: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_test_nested_target_in_task_with_private_{{.*}} {
-// CHECK:         call i32 @__kmpc_target_init
-// CHECK:       user_code.entry:
-// CHECK:         call void @__kmpc_target_deinit()
-// CHECK:         ret void
-// CHECK:       }
-
-// CHECK-LABEL: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_test_target_and_atomic_update_{{.*}} {
-// CHECK:         call i32 @__kmpc_target_init
-// CHECK:       user_code.entry:
-// CHECK:         call void @__kmpc_target_deinit()
-// CHECK:         ret void
-// CHECK:       }
-}
diff --git a/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir b/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir
deleted file mode 100644
index 2ce2424cf9541..0000000000000
--- a/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir
+++ /dev/null
@@ -1,26 +0,0 @@
-// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-
-// This tests the fix for https://github.com/llvm/llvm-project/issues/84606
-// We are only interested in ensuring that the -mlir-to-llmvir pass doesn't crash.
-// CHECK: {{.*}} = add i32 {{.*}}, 5
-module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
-  llvm.func @_QQmain() attributes {fir.bindc_name = "main", omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>} {
-    %0 = llvm.mlir.constant(0 : i32) : i32
-    %1 = llvm.mlir.constant(1 : i64) : i64
-    %2 = llvm.alloca %1 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr<5>
-    %3 = llvm.addrspacecast %2 : !llvm.ptr<5> to !llvm.ptr
-    omp.task {
-      llvm.store %0, %3 : i32, !llvm.ptr
-      omp.terminator
-    }
-    %4 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "a"}
-    omp.target map_entries(%4 -> %arg0 : !llvm.ptr) {
-      %5 = llvm.mlir.constant(5 : i32) : i32
-      %6 = llvm.load %arg0  : !llvm.ptr -> i32
-      %7 = llvm.add %6, %5  : i32
-      llvm.store %7, %arg0  : i32, !llvm.ptr
-      omp.terminator
-    }
-    llvm.return
-  }
-}

>From a5261d9cddd93ee7056896bbc9ee262e4c0a0aea Mon Sep 17 00:00:00 2001
From: Sergio Afonso <safonsof at amd.com>
Date: Mon, 25 Aug 2025 13:37:59 +0100
Subject: [PATCH 2/2] Make omp.target[_{enter,exit}]_data and omp.target_update
 host-only ops

---
 .../OpenMP/target-use-device-nested.f90       | 46 +++++++++++++++++++
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 11 -----
 .../Frontend/OpenMPIRBuilderTest.cpp          | 21 ---------
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 34 ++++----------
 .../openmp-target-use-device-nested.mlir      | 46 -------------------
 5 files changed, 56 insertions(+), 102 deletions(-)
 create mode 100644 flang/test/Integration/OpenMP/target-use-device-nested.f90
 delete mode 100644 mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir

diff --git a/flang/test/Integration/OpenMP/target-use-device-nested.f90 b/flang/test/Integration/OpenMP/target-use-device-nested.f90
new file mode 100644
index 0000000000000..9bb4c39842731
--- /dev/null
+++ b/flang/test/Integration/OpenMP/target-use-device-nested.f90
@@ -0,0 +1,46 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+! This tests check that target code nested inside a target data region which
+! has only use_device_ptr mapping corectly generates code on the device pass.
+
+!REQUIRES: amdgpu-registered-target
+!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-llvm -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s
+
+program main
+  use iso_c_binding
+  implicit none
+  type(c_ptr) :: a
+  !$omp target data use_device_ptr(a)
+    !$omp target map(tofrom: a)
+      call foo(a)
+    !$omp end target
+  !$omp end target data
+end program
+
+! CHECK:         define weak_odr protected amdgpu_kernel void @__omp_offloading{{.*}}main_
+! CHECK-NEXT:       entry:
+! CHECK-NEXT:         %[[VAL_3:.*]] = alloca ptr, align 8, addrspace(5)
+! CHECK-NEXT:         %[[ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[VAL_3]] to ptr
+! CHECK-NEXT:         store ptr %[[VAL_4:.*]], ptr %[[ASCAST]], align 8
+! CHECK-NEXT:         %[[VAL_5:.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @__omp_offloading_{{.*}}_kernel_environment to ptr), ptr %[[VAL_6:.*]])
+! CHECK-NEXT:         %[[VAL_7:.*]] = icmp eq i32 %[[VAL_5]], -1
+! CHECK-NEXT:         br i1 %[[VAL_7]], label %[[VAL_8:.*]], label %[[VAL_9:.*]]
+! CHECK:            user_code.entry:                                  ; preds = %[[VAL_10:.*]]
+! CHECK-NEXT:         %[[VAL_11:.*]] = load ptr, ptr %[[ASCAST]], align 8
+! CHECK-NEXT:         br label %[[AFTER_ALLOC:.*]]
+
+! CHECK:            [[AFTER_ALLOC]]:
+! CHECK-NEXT:         br label %[[VAL_12:.*]]
+
+! CHECK:            [[VAL_12]]:
+! CHECK-NEXT:         br label %[[TARGET_REG_ENTRY:.*]]
+
+! CHECK:            [[TARGET_REG_ENTRY]]:                                       ; preds = %[[VAL_12]]
+! CHECK-NEXT:         call void @{{.*}}foo{{.*}}(ptr %[[VAL_11]])
+! CHECK-NEXT:         br label
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 50ab206e2db8e..8d72194aa80fc 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -7080,17 +7080,6 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData(
     return InsertPointTy();
 
   Builder.restoreIP(CodeGenIP);
-  // Disable TargetData CodeGen on Device pass.
-  if (Config.IsTargetDevice.value_or(false)) {
-    if (BodyGenCB) {
-      InsertPointOrErrorTy AfterIP =
-          BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
-      if (!AfterIP)
-        return AfterIP.takeError();
-      Builder.restoreIP(*AfterIP);
-    }
-    return Builder.saveIP();
-  }
 
   bool IsStandAlone = !BodyGenCB;
   MapInfosTy *MapInfo;
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index b7a060bb3563d..a4558e55a7704 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -6281,27 +6281,6 @@ TEST_F(OpenMPIRBuilderTest, TargetDataRegion) {
   EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32));
   EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy());
 
-  // Check that BodyGenCB is still made when IsTargetDevice is set to true.
-  OMPBuilder.Config.setIsTargetDevice(true);
-  bool CheckDevicePassBodyGen = false;
-  auto BodyTargetCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
-    CheckDevicePassBodyGen = true;
-    Builder.restoreIP(CodeGenIP);
-    CallInst *TargetDataCall =
-        dyn_cast<CallInst>(BB->back().getPrevNode()->getPrevNode());
-    // Make sure no begin_mapper call is present for device pass.
-    EXPECT_EQ(TargetDataCall, nullptr);
-    return Builder.saveIP();
-  };
-  ASSERT_EXPECTED_INIT(
-      OpenMPIRBuilder::InsertPointTy, TargetDataIP2,
-      OMPBuilder.createTargetData(Loc, AllocaIP, Builder.saveIP(),
-                                  Builder.getInt64(DeviceID),
-                                  /* IfCond= */ nullptr, Info, GenMapInfoCB,
-                                  CustomMapperCB, nullptr, BodyTargetCB));
-  Builder.restoreIP(TargetDataIP2);
-  EXPECT_TRUE(CheckDevicePassBodyGen);
-
   Builder.CreateRetVoid();
   EXPECT_FALSE(verifyModule(*M, &errs()));
 }
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 8323ca97a1dc3..efd94613a25fb 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4510,8 +4510,8 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
   llvm::OpenMPIRBuilder::TargetDataInfo info(/*RequiresDevicePointerInfo=*/true,
                                              /*SeparateBeginEndCalls=*/true);
   bool isTargetDevice = ompBuilder->Config.isTargetDevice();
-  bool isOffloadEntry =
-      isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
+  assert(!isTargetDevice && "target data/enter/exit/update are host ops");
+  bool isOffloadEntry = !ompBuilder->Config.TargetTriples.empty();
 
   LogicalResult result =
       llvm::TypeSwitch<Operation *, LogicalResult>(op)
@@ -4687,30 +4687,17 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
       if (info.DevicePtrInfoMap.empty()) {
         // For host device we still need to do the mapping for codegen,
         // otherwise it may try to lookup a missing value.
-        if (!ompBuilder->Config.IsTargetDevice.value_or(false)) {
-          mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
-                       blockArgIface.getUseDeviceAddrBlockArgs(),
-                       useDeviceAddrVars, mapData);
-          mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
-                       blockArgIface.getUseDevicePtrBlockArgs(),
-                       useDevicePtrVars, mapData);
-        }
+        mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
+                     blockArgIface.getUseDeviceAddrBlockArgs(),
+                     useDeviceAddrVars, mapData);
+        mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
+                     blockArgIface.getUseDevicePtrBlockArgs(), useDevicePtrVars,
+                     mapData);
       }
       break;
     case BodyGenTy::NoPriv:
       // If device info is available then region has already been generated
       if (info.DevicePtrInfoMap.empty()) {
-        // For device pass, if use_device_ptr(addr) mappings were present,
-        // we need to link them here before codegen.
-        if (ompBuilder->Config.IsTargetDevice.value_or(false)) {
-          mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Address,
-                       blockArgIface.getUseDeviceAddrBlockArgs(),
-                       useDeviceAddrVars, mapData);
-          mapUseDevice(llvm::OpenMPIRBuilder::DeviceInfoTy::Pointer,
-                       blockArgIface.getUseDevicePtrBlockArgs(),
-                       useDevicePtrVars, mapData);
-        }
-
         if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
                                            moduleTranslation)))
           return llvm::make_error<PreviouslyReportedError>();
@@ -6086,9 +6073,8 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
 
   if (ompBuilder->Config.isTargetDevice() &&
-      !isa<omp::TargetOp, omp::TargetDataOp, omp::TargetEnterDataOp,
-           omp::TargetExitDataOp, omp::TargetUpdateOp, omp::MapInfoOp,
-           omp::TerminatorOp, omp::YieldOp>(op) &&
+      !isa<omp::TargetOp, omp::MapInfoOp, omp::TerminatorOp, omp::YieldOp>(
+          op) &&
       isHostDeviceOp(op))
     return op->emitOpError() << "unsupported host op found in device";
 
diff --git a/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir b/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir
deleted file mode 100644
index 9c6b06e3aab96..0000000000000
--- a/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir
+++ /dev/null
@@ -1,46 +0,0 @@
-// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-
-// This tests check that target code nested inside a target data region which
-// has only use_device_ptr mapping corectly generates code on the device pass.
-
-// CHECK:         define weak_odr protected amdgpu_kernel void @__omp_offloading{{.*}}main_
-// CHECK-NEXT:       entry:
-// CHECK-NEXT:         %[[VAL_3:.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT:         %[[ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[VAL_3]] to ptr
-// CHECK-NEXT:         store ptr %[[VAL_4:.*]], ptr %[[ASCAST]], align 8
-// CHECK-NEXT:         %[[VAL_5:.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_{{.*}}_kernel_environment, ptr %[[VAL_6:.*]])
-// CHECK-NEXT:         %[[VAL_7:.*]] = icmp eq i32 %[[VAL_5]], -1
-// CHECK-NEXT:         br i1 %[[VAL_7]], label %[[VAL_8:.*]], label %[[VAL_9:.*]]
-// CHECK:            user_code.entry:                                  ; preds = %[[VAL_10:.*]]
-// CHECK-NEXT:         %[[VAL_11:.*]] = load ptr, ptr %[[ASCAST]], align 8
-// CHECK-NEXT:         br label %[[AFTER_ALLOC:.*]]
-
-// CHECK:            [[AFTER_ALLOC]]:
-// CHECK-NEXT:         br label %[[VAL_12:.*]]
-
-// CHECK:            [[VAL_12]]:
-// CHECK-NEXT:         br label %[[TARGET_REG_ENTRY:.*]]
-
-// CHECK:            [[TARGET_REG_ENTRY]]:                                       ; preds = %[[VAL_12]]
-// CHECK-NEXT:         %[[VAL_13:.*]] = load ptr, ptr %[[VAL_11]], align 8
-// CHECK-NEXT:         store i32 999, ptr %[[VAL_13]], align 4
-// CHECK-NEXT:         br label %[[VAL_14:.*]]
-module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true } {
-  llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
-    %0 = llvm.mlir.constant(1 : i64) : i64
-    %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr<5>
-    %ascast = llvm.addrspacecast %a : !llvm.ptr<5> to !llvm.ptr
-    %map = omp.map.info var_ptr(%ascast : !llvm.ptr, !llvm.ptr)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
-    omp.target_data use_device_ptr(%map -> %arg0 : !llvm.ptr)  {
-      %map1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.ptr)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
-      omp.target map_entries(%map1 -> %arg1 : !llvm.ptr){
-        %1 = llvm.mlir.constant(999 : i32) : i32
-        %2 = llvm.load %arg1 : !llvm.ptr -> !llvm.ptr
-        llvm.store %1, %2 : i32, !llvm.ptr
-        omp.terminator
-      }
-      omp.terminator
-    }
-    llvm.return
-  }
-}



More information about the llvm-branch-commits mailing list