[llvm-branch-commits] [flang] [mlir] [MLIR][OpenMP] Simplify OpenMP device codegen (PR #137201)

Mon May 5 07:02:22 PDT 2025

https://github.com/skatrak updated https://github.com/llvm/llvm-project/pull/137201

>From 22f22aa0ca2c98dfcc48a70f2f7e0a5b68d7b1d9 Mon Sep 17 00:00:00 2001
From: Sergio Afonso <safonsof at amd.com>
Date: Tue, 22 Apr 2025 12:04:45 +0100
Subject: [PATCH] [MLIR][OpenMP] Simplify OpenMP device codegen

After removing host operations from the device MLIR module, it is no longer
necessary to provide special codegen logic to prevent these operations from
causing compiler crashes or miscompilations.

This patch removes these now unnecessary code paths to simplify codegen logic.
Some MLIR tests are now replaced with Flang tests, since the responsibility of
dealing with host operations has been moved earlier in the compilation flow.

MLIR tests holding target device modules are updated to no longer include now
unsupported host operations.
---
 .../OpenMP/target-nesting-in-host-ops.f90     |  87 ++++
 .../Integration/OpenMP/task-target-device.f90 |  37 ++
 .../OpenMP/threadprivate-target-device.f90    |  40 ++
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 423 +++++++-----------
 ...arget-constant-indexing-device-region.mlir |  25 +-
 .../Target/LLVMIR/omptarget-debug-var-1.mlir  |  19 +-
 .../omptarget-memcpy-align-metadata.mlir      |  61 +--
 .../LLVMIR/omptarget-target-inside-task.mlir  |  43 --
 ...ptarget-threadprivate-device-lowering.mlir |  31 --
 .../Target/LLVMIR/openmp-llvm-invalid.mlir    |  45 ++
 .../openmp-target-nesting-in-host-ops.mlir    | 160 -------
 .../LLVMIR/openmp-task-target-device.mlir     |  26 --
 12 files changed, 409 insertions(+), 588 deletions(-)
 create mode 100644 flang/test/Integration/OpenMP/target-nesting-in-host-ops.f90
 create mode 100644 flang/test/Integration/OpenMP/task-target-device.f90
 create mode 100644 flang/test/Integration/OpenMP/threadprivate-target-device.f90
 delete mode 100644 mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir
 delete mode 100644 mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir
 delete mode 100644 mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir
 delete mode 100644 mlir/test/Target/LLVMIR/openmp-task-target-device.mlir

diff --git a/flang/test/Integration/OpenMP/target-nesting-in-host-ops.f90 b/flang/test/Integration/OpenMP/target-nesting-in-host-ops.f90
new file mode 100644
index 0000000000000..8c85a3c1784ed
--- /dev/null
+++ b/flang/test/Integration/OpenMP/target-nesting-in-host-ops.f90
@@ -0,0 +1,87 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+!REQUIRES: amdgpu-registered-target
+!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-llvm -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s
+
+! CHECK-NOT: define void @nested_target_in_parallel
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_parallel_{{.*}}(ptr %{{.*}}, ptr %{{.*}})
+subroutine nested_target_in_parallel(v)
+  implicit none
+  integer, intent(inout) :: v(10)
+
+  !$omp parallel
+    !$omp target map(tofrom: v)
+    !$omp end target
+  !$omp end parallel
+end subroutine
+
+! CHECK-NOT: define void @nested_target_in_wsloop
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_wsloop_{{.*}}(ptr %{{.*}}, ptr %{{.*}})
+subroutine nested_target_in_wsloop(v)
+  implicit none
+  integer, intent(inout) :: v(10)
+  integer :: i
+
+  !$omp do
+  do i=1, 10
+    !$omp target map(tofrom: v)
+    !$omp end target
+  end do
+end subroutine
+
+! CHECK-NOT: define void @nested_target_in_parallel_with_private
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_parallel_with_private_{{.*}}(ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}})
+subroutine nested_target_in_parallel_with_private(v)
+  implicit none
+  integer, intent(inout) :: v(10)
+  integer :: x
+  x = 10
+
+  !$omp parallel firstprivate(x)
+    !$omp target map(tofrom: v(1:x))
+    !$omp end target
+  !$omp end parallel
+end subroutine
+
+! CHECK-NOT: define void @nested_target_in_task_with_private
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_task_with_private_{{.*}}(ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}})
+subroutine nested_target_in_task_with_private(v)
+  implicit none
+  integer, intent(inout) :: v(10)
+  integer :: x
+  x = 10
+
+  !$omp task firstprivate(x)
+    !$omp target map(tofrom: v(1:x))
+    !$omp end target
+  !$omp end task
+end subroutine
+
+! CHECK-NOT: define void @target_and_atomic_update
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_target_and_atomic_update_{{.*}}(ptr %{{.*}})
+subroutine target_and_atomic_update(x, expr)
+  implicit none
+  integer, intent(inout) :: x, expr
+
+  !$omp target
+  !$omp end target
+
+  !$omp atomic update
+  x = x + expr
+end subroutine
+
+! CHECK-NOT: define void @nested_target_in_associate
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_associate_{{.*}}(ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}})
+subroutine nested_target_in_associate(x)
+  integer, pointer, contiguous :: x(:)
+  associate(y => x)
+    !$omp target map(tofrom: y)
+    !$omp end target
+  end associate
+end subroutine
diff --git a/flang/test/Integration/OpenMP/task-target-device.f90 b/flang/test/Integration/OpenMP/task-target-device.f90
new file mode 100644
index 0000000000000..b92dee65e3f7f
--- /dev/null
+++ b/flang/test/Integration/OpenMP/task-target-device.f90
@@ -0,0 +1,37 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+!REQUIRES: amdgpu-registered-target
+!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-llvm -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s
+
+! This tests the fix for https://github.com/llvm/llvm-project/issues/84606
+! We are only interested in ensuring that the -mlir-to-llmvir pass doesn't crash.
+
+! CHECK: define weak_odr protected amdgpu_kernel void @{{.*}}QQmain{{.*}}({{.*}})
+program main
+  implicit none
+  integer, parameter :: N = 5
+  integer, dimension(5) :: a
+  integer :: i
+  integer :: target_a = 0
+
+  !$omp task depend(out:a)
+  do i = 1, N
+    a(i) = i
+  end do
+  !$omp end task
+
+  !$omp target map(tofrom:target_a) map(tofrom:a)
+  do i = 1, N
+    target_a = target_a + i
+    a(i) = a(i) + i
+  end do
+  !$omp end target
+  print*, target_a
+  print*, a
+end program main
diff --git a/flang/test/Integration/OpenMP/threadprivate-target-device.f90 b/flang/test/Integration/OpenMP/threadprivate-target-device.f90
new file mode 100644
index 0000000000000..662d6c6357af0
--- /dev/null
+++ b/flang/test/Integration/OpenMP/threadprivate-target-device.f90
@@ -0,0 +1,40 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+!REQUIRES: amdgpu-registered-target
+!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-llvm -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s
+
+! The aim of this test is to verify host threadprivate directives do not cause
+! crashes during OpenMP target device codegen when used in conjunction with
+! target code in the same function.
+
+! CHECK: define weak_odr protected amdgpu_kernel void @{{.*}}(ptr %{{.*}}, ptr %[[ARG1:.*]], ptr %[[ARG2:.*]]) #{{[0-9]+}} {
+! CHECK:  %[[ALLOCA_X:.*]] = alloca ptr, align 8, addrspace(5)
+! CHECK:  %[[ASCAST_X:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA_X]] to ptr
+! CHECK:  store ptr %[[ARG1]], ptr %[[ASCAST_X]], align 8
+
+! CHECK:  %[[ALLOCA_N:.*]] = alloca ptr, align 8, addrspace(5)
+! CHECK:  %[[ASCAST_N:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA_N]] to ptr
+! CHECK:  store ptr %[[ARG2]], ptr %[[ASCAST_N]], align 8
+
+! CHECK:  %[[LOAD_X:.*]] = load ptr, ptr %[[ASCAST_X]], align 8
+! CHECK:  call void @bar_(ptr %[[LOAD_X]], ptr %[[ASCAST_N]])
+
+module test
+  implicit none
+  integer :: n
+  !$omp threadprivate(n)
+  
+  contains
+  subroutine foo(x)
+    integer, intent(inout) :: x(10)
+    !$omp target map(tofrom: x(1:n))
+      call bar(x, n)
+    !$omp end target
+  end subroutine
+end module
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 69591415a9cf3..d5f2919ba421a 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3076,19 +3076,14 @@ convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
   LLVM::GlobalOp global =
       addressOfOp.getGlobal(moduleTranslation.symbolTable());
   llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
-
-  if (!ompBuilder->Config.isTargetDevice()) {
-    llvm::Type *type = globalValue->getValueType();
-    llvm::TypeSize typeSize =
-        builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
-            type);
-    llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue());
-    llvm::Value *callInst = ompBuilder->createCachedThreadPrivate(
-        ompLoc, globalValue, size, global.getSymName() + ".cache");
-    moduleTranslation.mapValue(opInst.getResult(0), callInst);
-  } else {
-    moduleTranslation.mapValue(opInst.getResult(0), globalValue);
-  }
+  llvm::Type *type = globalValue->getValueType();
+  llvm::TypeSize typeSize =
+      builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
+          type);
+  llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue());
+  llvm::Value *callInst = ompBuilder->createCachedThreadPrivate(
+      ompLoc, globalValue, size, global.getSymName() + ".cache");
+  moduleTranslation.mapValue(opInst.getResult(0), callInst);
 
   return success();
 }
@@ -5385,40 +5380,172 @@ convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
   return success();
 }
 
-// Returns true if the operation is inside a TargetOp or
-// is part of a declare target function.
-static bool isTargetDeviceOp(Operation *op) {
+namespace {
+
+/// Implementation of the dialect interface that converts operations belonging
+/// to the OpenMP dialect to LLVM IR.
+class OpenMPDialectLLVMIRTranslationInterface
+    : public LLVMTranslationDialectInterface {
+public:
+  using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface;
+
+  /// Translates the given operation to LLVM IR using the provided IR builder
+  /// and saving the state in `moduleTranslation`.
+  LogicalResult
+  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
+                   LLVM::ModuleTranslation &moduleTranslation) const final;
+
+  /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR,
+  /// runtime calls, or operation amendments
+  LogicalResult
+  amendOperation(Operation *op, ArrayRef<llvm::Instruction *> instructions,
+                 NamedAttribute attribute,
+                 LLVM::ModuleTranslation &moduleTranslation) const final;
+};
+
+} // namespace
+
+LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
+    Operation *op, ArrayRef<llvm::Instruction *> instructions,
+    NamedAttribute attribute,
+    LLVM::ModuleTranslation &moduleTranslation) const {
+  return llvm::StringSwitch<llvm::function_ref<LogicalResult(Attribute)>>(
+             attribute.getName())
+      .Case("omp.is_target_device",
+            [&](Attribute attr) {
+              if (auto deviceAttr = dyn_cast<BoolAttr>(attr)) {
+                llvm::OpenMPIRBuilderConfig &config =
+                    moduleTranslation.getOpenMPBuilder()->Config;
+                config.setIsTargetDevice(deviceAttr.getValue());
+                return success();
+              }
+              return failure();
+            })
+      .Case("omp.is_gpu",
+            [&](Attribute attr) {
+              if (auto gpuAttr = dyn_cast<BoolAttr>(attr)) {
+                llvm::OpenMPIRBuilderConfig &config =
+                    moduleTranslation.getOpenMPBuilder()->Config;
+                config.setIsGPU(gpuAttr.getValue());
+                return success();
+              }
+              return failure();
+            })
+      .Case("omp.host_ir_filepath",
+            [&](Attribute attr) {
+              if (auto filepathAttr = dyn_cast<StringAttr>(attr)) {
+                llvm::OpenMPIRBuilder *ompBuilder =
+                    moduleTranslation.getOpenMPBuilder();
+                ompBuilder->loadOffloadInfoMetadata(filepathAttr.getValue());
+                return success();
+              }
+              return failure();
+            })
+      .Case("omp.flags",
+            [&](Attribute attr) {
+              if (auto rtlAttr = dyn_cast<omp::FlagsAttr>(attr))
+                return convertFlagsAttr(op, rtlAttr, moduleTranslation);
+              return failure();
+            })
+      .Case("omp.version",
+            [&](Attribute attr) {
+              if (auto versionAttr = dyn_cast<omp::VersionAttr>(attr)) {
+                llvm::OpenMPIRBuilder *ompBuilder =
+                    moduleTranslation.getOpenMPBuilder();
+                ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp",
+                                            versionAttr.getVersion());
+                return success();
+              }
+              return failure();
+            })
+      .Case("omp.declare_target",
+            [&](Attribute attr) {
+              if (auto declareTargetAttr =
+                      dyn_cast<omp::DeclareTargetAttr>(attr))
+                return convertDeclareTargetAttr(op, declareTargetAttr,
+                                                moduleTranslation);
+              return failure();
+            })
+      .Case("omp.requires",
+            [&](Attribute attr) {
+              if (auto requiresAttr = dyn_cast<omp::ClauseRequiresAttr>(attr)) {
+                using Requires = omp::ClauseRequires;
+                Requires flags = requiresAttr.getValue();
+                llvm::OpenMPIRBuilderConfig &config =
+                    moduleTranslation.getOpenMPBuilder()->Config;
+                config.setHasRequiresReverseOffload(
+                    bitEnumContainsAll(flags, Requires::reverse_offload));
+                config.setHasRequiresUnifiedAddress(
+                    bitEnumContainsAll(flags, Requires::unified_address));
+                config.setHasRequiresUnifiedSharedMemory(
+                    bitEnumContainsAll(flags, Requires::unified_shared_memory));
+                config.setHasRequiresDynamicAllocators(
+                    bitEnumContainsAll(flags, Requires::dynamic_allocators));
+                return success();
+              }
+              return failure();
+            })
+      .Case("omp.target_triples",
+            [&](Attribute attr) {
+              if (auto triplesAttr = dyn_cast<ArrayAttr>(attr)) {
+                llvm::OpenMPIRBuilderConfig &config =
+                    moduleTranslation.getOpenMPBuilder()->Config;
+                config.TargetTriples.clear();
+                config.TargetTriples.reserve(triplesAttr.size());
+                for (Attribute tripleAttr : triplesAttr) {
+                  if (auto tripleStrAttr = dyn_cast<StringAttr>(tripleAttr))
+                    config.TargetTriples.emplace_back(tripleStrAttr.getValue());
+                  else
+                    return failure();
+                }
+                return success();
+              }
+              return failure();
+            })
+      .Default([](Attribute) {
+        // Fall through for omp attributes that do not require lowering.
+        return success();
+      })(attribute.getValue());
+
+  return failure();
+}
+
+// Returns true if the operation is not inside a TargetOp, it is part of a
+// function and that function is not declare target.
+static bool isHostDeviceOp(Operation *op) {
   // Assumes no reverse offloading
   if (op->getParentOfType<omp::TargetOp>())
-    return true;
-
-  // Certain operations return results, and whether utilised in host or
-  // target there is a chance an LLVM Dialect operation depends on it
-  // by taking it in as an operand, so we must always lower these in
-  // some manner or result in an ICE (whether they end up in a no-op
-  // or otherwise).
-  if (mlir::isa<omp::ThreadprivateOp>(op))
-    return true;
+    return false;
 
-  if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>())
+  if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>()) {
     if (auto declareTargetIface =
             llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
                 parentFn.getOperation()))
       if (declareTargetIface.isDeclareTarget() &&
           declareTargetIface.getDeclareTargetDeviceType() !=
               mlir::omp::DeclareTargetDeviceType::host)
-        return true;
+        return false;
+
+    return true;
+  }
 
   return false;
 }
 
-/// Given an OpenMP MLIR operation, create the corresponding LLVM IR (including
-/// OpenMP runtime calls).
-static LogicalResult
-convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
-                             LLVM::ModuleTranslation &moduleTranslation) {
+/// Given an OpenMP MLIR operation, create the corresponding LLVM IR
+/// (including OpenMP runtime calls).
+LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
+    Operation *op, llvm::IRBuilderBase &builder,
+    LLVM::ModuleTranslation &moduleTranslation) const {
   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
 
+  if (ompBuilder->Config.isTargetDevice() &&
+      !isa<omp::TargetOp, omp::TargetDataOp, omp::TargetEnterDataOp,
+           omp::TargetExitDataOp, omp::TargetUpdateOp, omp::MapInfoOp,
+           omp::TerminatorOp, omp::YieldOp>(op) &&
+      isHostDeviceOp(op))
+    return op->emitOpError() << "unsupported host op found in device";
+
   // For each loop, introduce one stack frame to hold loop information. Ensure
   // this is only done for the outermost loop wrapper to prevent introducing
   // multiple stack frames for a single loop. Initially set to null, the loop
@@ -5571,238 +5698,6 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
   return result;
 }
 
-static LogicalResult
-convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder,
-                      LLVM::ModuleTranslation &moduleTranslation) {
-  return convertHostOrTargetOperation(op, builder, moduleTranslation);
-}
-
-static LogicalResult
-convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
-                       LLVM::ModuleTranslation &moduleTranslation) {
-  if (isa<omp::TargetOp>(op))
-    return convertOmpTarget(*op, builder, moduleTranslation);
-  if (isa<omp::TargetDataOp>(op))
-    return convertOmpTargetData(op, builder, moduleTranslation);
-  bool interrupted =
-      op->walk<WalkOrder::PreOrder>([&](Operation *oper) {
-          if (isa<omp::TargetOp>(oper)) {
-            if (failed(convertOmpTarget(*oper, builder, moduleTranslation)))
-              return WalkResult::interrupt();
-            return WalkResult::skip();
-          }
-          if (isa<omp::TargetDataOp>(oper)) {
-            if (failed(convertOmpTargetData(oper, builder, moduleTranslation)))
-              return WalkResult::interrupt();
-            return WalkResult::skip();
-          }
-
-          // Non-target ops might nest target-related ops, therefore, we
-          // translate them as non-OpenMP scopes. Translating them is needed by
-          // nested target-related ops since they might need LLVM values defined
-          // in their parent non-target ops.
-          if (isa<omp::OpenMPDialect>(oper->getDialect()) &&
-              oper->getParentOfType<LLVM::LLVMFuncOp>() &&
-              !oper->getRegions().empty()) {
-            if (auto blockArgsIface =
-                    dyn_cast<omp::BlockArgOpenMPOpInterface>(oper))
-              forwardArgs(moduleTranslation, blockArgsIface);
-            else {
-              // Here we map entry block arguments of
-              // non-BlockArgOpenMPOpInterface ops if they can be encountered
-              // inside of a function and they define any of these arguments.
-              if (isa<mlir::omp::AtomicUpdateOp>(oper))
-                for (auto [operand, arg] :
-                     llvm::zip_equal(oper->getOperands(),
-                                     oper->getRegion(0).getArguments())) {
-                  moduleTranslation.mapValue(
-                      arg, builder.CreateLoad(
-                               moduleTranslation.convertType(arg.getType()),
-                               moduleTranslation.lookupValue(operand)));
-                }
-            }
-
-            if (auto loopNest = dyn_cast<omp::LoopNestOp>(oper)) {
-              assert(builder.GetInsertBlock() &&
-                     "No insert block is set for the builder");
-              for (auto iv : loopNest.getIVs()) {
-                // Map iv to an undefined value just to keep the IR validity.
-                moduleTranslation.mapValue(
-                    iv, llvm::PoisonValue::get(
-                            moduleTranslation.convertType(iv.getType())));
-              }
-            }
-
-            for (Region &region : oper->getRegions()) {
-              // Regions are fake in the sense that they are not a truthful
-              // translation of the OpenMP construct being converted (e.g. no
-              // OpenMP runtime calls will be generated). We just need this to
-              // prepare the kernel invocation args.
-              SmallVector<llvm::PHINode *> phis;
-              auto result = convertOmpOpRegions(
-                  region, oper->getName().getStringRef().str() + ".fake.region",
-                  builder, moduleTranslation, &phis);
-              if (failed(handleError(result, *oper)))
-                return WalkResult::interrupt();
-
-              builder.SetInsertPoint(result.get(), result.get()->end());
-            }
-
-            return WalkResult::skip();
-          }
-
-          return WalkResult::advance();
-        }).wasInterrupted();
-  return failure(interrupted);
-}
-
-namespace {
-
-/// Implementation of the dialect interface that converts operations belonging
-/// to the OpenMP dialect to LLVM IR.
-class OpenMPDialectLLVMIRTranslationInterface
-    : public LLVMTranslationDialectInterface {
-public:
-  using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface;
-
-  /// Translates the given operation to LLVM IR using the provided IR builder
-  /// and saving the state in `moduleTranslation`.
-  LogicalResult
-  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
-                   LLVM::ModuleTranslation &moduleTranslation) const final;
-
-  /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR,
-  /// runtime calls, or operation amendments
-  LogicalResult
-  amendOperation(Operation *op, ArrayRef<llvm::Instruction *> instructions,
-                 NamedAttribute attribute,
-                 LLVM::ModuleTranslation &moduleTranslation) const final;
-};
-
-} // namespace
-
-LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
-    Operation *op, ArrayRef<llvm::Instruction *> instructions,
-    NamedAttribute attribute,
-    LLVM::ModuleTranslation &moduleTranslation) const {
-  return llvm::StringSwitch<llvm::function_ref<LogicalResult(Attribute)>>(
-             attribute.getName())
-      .Case("omp.is_target_device",
-            [&](Attribute attr) {
-              if (auto deviceAttr = dyn_cast<BoolAttr>(attr)) {
-                llvm::OpenMPIRBuilderConfig &config =
-                    moduleTranslation.getOpenMPBuilder()->Config;
-                config.setIsTargetDevice(deviceAttr.getValue());
-                return success();
-              }
-              return failure();
-            })
-      .Case("omp.is_gpu",
-            [&](Attribute attr) {
-              if (auto gpuAttr = dyn_cast<BoolAttr>(attr)) {
-                llvm::OpenMPIRBuilderConfig &config =
-                    moduleTranslation.getOpenMPBuilder()->Config;
-                config.setIsGPU(gpuAttr.getValue());
-                return success();
-              }
-              return failure();
-            })
-      .Case("omp.host_ir_filepath",
-            [&](Attribute attr) {
-              if (auto filepathAttr = dyn_cast<StringAttr>(attr)) {
-                llvm::OpenMPIRBuilder *ompBuilder =
-                    moduleTranslation.getOpenMPBuilder();
-                ompBuilder->loadOffloadInfoMetadata(filepathAttr.getValue());
-                return success();
-              }
-              return failure();
-            })
-      .Case("omp.flags",
-            [&](Attribute attr) {
-              if (auto rtlAttr = dyn_cast<omp::FlagsAttr>(attr))
-                return convertFlagsAttr(op, rtlAttr, moduleTranslation);
-              return failure();
-            })
-      .Case("omp.version",
-            [&](Attribute attr) {
-              if (auto versionAttr = dyn_cast<omp::VersionAttr>(attr)) {
-                llvm::OpenMPIRBuilder *ompBuilder =
-                    moduleTranslation.getOpenMPBuilder();
-                ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp",
-                                            versionAttr.getVersion());
-                return success();
-              }
-              return failure();
-            })
-      .Case("omp.declare_target",
-            [&](Attribute attr) {
-              if (auto declareTargetAttr =
-                      dyn_cast<omp::DeclareTargetAttr>(attr))
-                return convertDeclareTargetAttr(op, declareTargetAttr,
-                                                moduleTranslation);
-              return failure();
-            })
-      .Case("omp.requires",
-            [&](Attribute attr) {
-              if (auto requiresAttr = dyn_cast<omp::ClauseRequiresAttr>(attr)) {
-                using Requires = omp::ClauseRequires;
-                Requires flags = requiresAttr.getValue();
-                llvm::OpenMPIRBuilderConfig &config =
-                    moduleTranslation.getOpenMPBuilder()->Config;
-                config.setHasRequiresReverseOffload(
-                    bitEnumContainsAll(flags, Requires::reverse_offload));
-                config.setHasRequiresUnifiedAddress(
-                    bitEnumContainsAll(flags, Requires::unified_address));
-                config.setHasRequiresUnifiedSharedMemory(
-                    bitEnumContainsAll(flags, Requires::unified_shared_memory));
-                config.setHasRequiresDynamicAllocators(
-                    bitEnumContainsAll(flags, Requires::dynamic_allocators));
-                return success();
-              }
-              return failure();
-            })
-      .Case("omp.target_triples",
-            [&](Attribute attr) {
-              if (auto triplesAttr = dyn_cast<ArrayAttr>(attr)) {
-                llvm::OpenMPIRBuilderConfig &config =
-                    moduleTranslation.getOpenMPBuilder()->Config;
-                config.TargetTriples.clear();
-                config.TargetTriples.reserve(triplesAttr.size());
-                for (Attribute tripleAttr : triplesAttr) {
-                  if (auto tripleStrAttr = dyn_cast<StringAttr>(tripleAttr))
-                    config.TargetTriples.emplace_back(tripleStrAttr.getValue());
-                  else
-                    return failure();
-                }
-                return success();
-              }
-              return failure();
-            })
-      .Default([](Attribute) {
-        // Fall through for omp attributes that do not require lowering.
-        return success();
-      })(attribute.getValue());
-
-  return failure();
-}
-
-/// Given an OpenMP MLIR operation, create the corresponding LLVM IR
-/// (including OpenMP runtime calls).
-LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
-    Operation *op, llvm::IRBuilderBase &builder,
-    LLVM::ModuleTranslation &moduleTranslation) const {
-
-  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
-  if (ompBuilder->Config.isTargetDevice()) {
-    if (isTargetDeviceOp(op)) {
-      return convertTargetDeviceOp(op, builder, moduleTranslation);
-    } else {
-      return convertTargetOpsInNest(op, builder, moduleTranslation);
-    }
-  }
-  return convertHostOrTargetOperation(op, builder, moduleTranslation);
-}
-
 void mlir::registerOpenMPDialectTranslation(DialectRegistry &registry) {
   registry.insert<omp::OpenMPDialect>();
   registry.addExtension(+[](MLIRContext *ctx, omp::OpenMPDialect *dialect) {
diff --git a/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir b/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir
index ed66ff2c9ad7e..f21ffc45c8bcc 100644
--- a/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir
@@ -3,21 +3,16 @@
 module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
   llvm.func @_QQmain() attributes {bindc_name = "main"} {
     %0 = llvm.mlir.addressof @_QFEsp : !llvm.ptr
-    %1 = llvm.mlir.constant(10 : index) : i64
-    %2 = llvm.mlir.constant(1 : index) : i64
-    %3 = llvm.mlir.constant(0 : index) : i64
-    %4 = llvm.mlir.constant(9 : index) : i64
-    %5 = omp.map.bounds lower_bound(%3 : i64) upper_bound(%4 : i64) extent(%1 : i64) stride(%2 : i64) start_idx(%2 : i64)
-    %6 = omp.map.info var_ptr(%0 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%5) -> !llvm.ptr {name = "sp"}
-    omp.target map_entries(%6 -> %arg0 : !llvm.ptr) {
-      %7 = llvm.mlir.constant(20 : i32) : i32
-      %8 = llvm.mlir.constant(0 : i64) : i64
-      %9 = llvm.getelementptr %arg0[0, %8] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<10 x i32>
-      llvm.store %7, %9 : i32, !llvm.ptr
-      %10 = llvm.mlir.constant(10 : i32) : i32
-      %11 = llvm.mlir.constant(4 : i64) : i64
-      %12 = llvm.getelementptr %arg0[0, %11] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<10 x i32>
-      llvm.store %10, %12 : i32, !llvm.ptr
+    %1 = omp.map.info var_ptr(%0 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sp"}
+    omp.target map_entries(%1 -> %arg0 : !llvm.ptr) {
+      %2 = llvm.mlir.constant(20 : i32) : i32
+      %3 = llvm.mlir.constant(0 : i64) : i64
+      %4 = llvm.getelementptr %arg0[0, %3] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<10 x i32>
+      llvm.store %2, %4 : i32, !llvm.ptr
+      %5 = llvm.mlir.constant(10 : i32) : i32
+      %6 = llvm.mlir.constant(4 : i64) : i64
+      %7 = llvm.getelementptr %arg0[0, %6] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<10 x i32>
+      llvm.store %5, %7 : i32, !llvm.ptr
       omp.terminator
     }
     llvm.return
diff --git a/mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir b/mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir
index ea92589bbd031..5d845f8eaa1ac 100644
--- a/mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir
@@ -30,19 +30,14 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
   llvm.func @test() {
     %0 = llvm.mlir.constant(1 : i64) : i64
     %1 = llvm.alloca %0 x f32 : (i64) -> !llvm.ptr<5>
-    %4 = llvm.alloca %0 x i32 : (i64) -> !llvm.ptr<5>
+    %2 = llvm.alloca %0 x i32 : (i64) -> !llvm.ptr<5>
     %ascast = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
-    %ascast2 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr
-    %6 = llvm.mlir.constant(9 : index) : i64
-    %7 = llvm.mlir.constant(0 : index) : i64
-    %8 = llvm.mlir.constant(1 : index) : i64
-    %10 = llvm.mlir.constant(10 : index) : i64
-    %11 = llvm.mlir.addressof @_QFEarr : !llvm.ptr
-    %14 = omp.map.info var_ptr(%ascast : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
-    %15 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%6 : i64) extent(%10 : i64) stride(%8 : i64) start_idx(%8 : i64)
-    %16 = omp.map.info var_ptr(%11 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%15) -> !llvm.ptr
-    %17 = omp.map.info var_ptr(%ascast2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr
-    omp.target map_entries(%14 -> %arg0, %16 -> %arg1, %17 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+    %ascast2 = llvm.addrspacecast %2 : !llvm.ptr<5> to !llvm.ptr
+    %3 = llvm.mlir.addressof @_QFEarr : !llvm.ptr
+    %4 = omp.map.info var_ptr(%ascast : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
+    %5 = omp.map.info var_ptr(%3 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
+    %6 = omp.map.info var_ptr(%ascast2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr
+    omp.target map_entries(%4 -> %arg0, %5 -> %arg1, %6 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
       llvm.intr.dbg.declare #var_x = %arg0 : !llvm.ptr
       llvm.intr.dbg.declare #var_arr = %arg1 : !llvm.ptr
       llvm.intr.dbg.declare #var_i = %arg2 : !llvm.ptr
diff --git a/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir b/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir
index 633df96866885..74a76685e0c51 100644
--- a/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir
@@ -7,49 +7,36 @@
 module attributes {llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} {
   omp.private {type = private} @_QFEk_private_i32 : i32
   llvm.func @_QQmain()  {
-    %0 = llvm.mlir.constant(1 : i32) : i32
-    %7 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
-    %8 = llvm.addrspacecast %7 : !llvm.ptr<5> to !llvm.ptr
-    %12 = llvm.mlir.constant(1 : i64) : i64
-    %13 = llvm.alloca %12 x i32 {bindc_name = "k"} : (i64) -> !llvm.ptr<5>
-    %14 = llvm.addrspacecast %13 : !llvm.ptr<5> to !llvm.ptr
-    %15 = llvm.mlir.constant(1 : i64) : i64
-    %16 = llvm.alloca %15 x i32 {bindc_name = "b"} : (i64) -> !llvm.ptr<5>
-    %17 = llvm.addrspacecast %16 : !llvm.ptr<5> to !llvm.ptr
-    %19 = llvm.mlir.constant(1 : index) : i64
-    %20 = llvm.mlir.constant(0 : index) : i64
-    %22 = llvm.mlir.addressof @_QFEa : !llvm.ptr
-    %25 = llvm.mlir.addressof @_QFECnz : !llvm.ptr
-    %60 = llvm.getelementptr %8[0, 7, %20, 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
-    %61 = llvm.load %60 : !llvm.ptr -> i64
-    %62 = llvm.getelementptr %8[0, 7, %20, 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
-    %63 = llvm.load %62 : !llvm.ptr -> i64
-    %64 = llvm.getelementptr %8[0, 7, %20, 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
-    %65 = llvm.load %64 : !llvm.ptr -> i64
-    %66 = llvm.sub %63, %19 : i64
-    %67 = omp.map.bounds lower_bound(%20 : i64) upper_bound(%66 : i64) extent(%63 : i64) stride(%65 : i64) start_idx(%61 : i64) {stride_in_bytes = true}
-    %68 = llvm.getelementptr %22[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
-    %69 = omp.map.info var_ptr(%22 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%68 : !llvm.ptr) bounds(%67) -> !llvm.ptr {name = ""}
-    %70 = omp.map.info var_ptr(%22 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(to) capture(ByRef) members(%69 : [0] : !llvm.ptr) -> !llvm.ptr {name = "a"}
-    %71 = omp.map.info var_ptr(%17 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "b"}
-    %72 = omp.map.info var_ptr(%14 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "k"}
-    %73 = omp.map.info var_ptr(%25 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "nz"}
-    omp.target map_entries(%70 -> %arg0, %71 -> %arg1, %72 -> %arg2, %73 -> %arg3, %69 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {
-      %106 = llvm.mlir.constant(0 : index) : i64
-      %107 = llvm.mlir.constant(13 : i32) : i32
-      %108 = llvm.mlir.constant(1000 : i32) : i32
-      %109 = llvm.mlir.constant(1 : i32) : i32
+    %0 = llvm.mlir.constant(1 : i64) : i64
+    %1 = llvm.alloca %0 x i32 {bindc_name = "k"} : (i64) -> !llvm.ptr<5>
+    %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+    %3 = llvm.mlir.constant(1 : i64) : i64
+    %4 = llvm.alloca %3 x i32 {bindc_name = "b"} : (i64) -> !llvm.ptr<5>
+    %5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr
+    %6 = llvm.mlir.addressof @_QFEa : !llvm.ptr
+    %7 = llvm.mlir.addressof @_QFECnz : !llvm.ptr
+    %8 = llvm.getelementptr %6[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+    %9 = omp.map.info var_ptr(%6 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%8 : !llvm.ptr) -> !llvm.ptr {name = ""}
+    %10 = omp.map.info var_ptr(%6 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(to) capture(ByRef) members(%9 : [0] : !llvm.ptr) -> !llvm.ptr {name = "a"}
+    %11 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "b"}
+    %12 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "k"}
+    %13 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "nz"}
+    omp.target map_entries(%10 -> %arg0, %11 -> %arg1, %12 -> %arg2, %13 -> %arg3, %9 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+      %14 = llvm.mlir.constant(0 : index) : i64
+      %15 = llvm.mlir.constant(13 : i32) : i32
+      %16 = llvm.mlir.constant(1000 : i32) : i32
+      %17 = llvm.mlir.constant(1 : i32) : i32
       omp.teams {
         omp.parallel private(@_QFEk_private_i32 %arg2 -> %arg5 : !llvm.ptr) {
-          %110 = llvm.mlir.constant(1 : i32) : i32
-          %111 = llvm.alloca %110 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
-          %112 = llvm.addrspacecast %111 : !llvm.ptr<5> to !llvm.ptr
+          %18 = llvm.mlir.constant(1 : i32) : i32
+          %19 = llvm.alloca %18 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
+          %20 = llvm.addrspacecast %19 : !llvm.ptr<5> to !llvm.ptr
           omp.distribute {
             omp.wsloop {
-              omp.loop_nest (%arg6) : i32 = (%109) to (%108) inclusive step (%109) {
+              omp.loop_nest (%arg6) : i32 = (%17) to (%16) inclusive step (%17) {
                 llvm.store %arg6, %arg5  : i32, !llvm.ptr
                 %115 = llvm.mlir.constant(48 : i32) : i32
-                "llvm.intr.memcpy"(%112, %arg0, %115) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+                "llvm.intr.memcpy"(%20, %arg0, %115) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
                 omp.yield
               }
             } {omp.composite}
diff --git a/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir b/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir
deleted file mode 100644
index 16be0773bd14b..0000000000000
--- a/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir
+++ /dev/null
@@ -1,43 +0,0 @@
-// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-
-module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} {
-  llvm.func @omp_target_region_() {
-    %0 = llvm.mlir.constant(20 : i32) : i32
-    %1 = llvm.mlir.constant(10 : i32) : i32
-    %2 = llvm.mlir.constant(1 : i64) : i64
-    %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr<5>
-    %ascast = llvm.addrspacecast %3 : !llvm.ptr<5> to !llvm.ptr
-    %4 = llvm.mlir.constant(1 : i64) : i64
-    %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr<5>
-    %ascast2 = llvm.addrspacecast %5 : !llvm.ptr<5> to !llvm.ptr
-    %6 = llvm.mlir.constant(1 : i64) : i64
-    %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr<5>
-    %ascast3 = llvm.addrspacecast %7 : !llvm.ptr<5> to !llvm.ptr
-    llvm.store %1, %ascast : i32, !llvm.ptr
-    llvm.store %0, %ascast2 : i32, !llvm.ptr
-    omp.task {
-        %map1 = omp.map.info var_ptr(%ascast : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
-        %map2 = omp.map.info var_ptr(%ascast2 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
-        %map3 = omp.map.info var_ptr(%ascast3 : !llvm.ptr, i32)   map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
-      omp.target map_entries(%map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
-        %8 = llvm.load %arg0 : !llvm.ptr -> i32
-        %9 = llvm.load %arg1 : !llvm.ptr -> i32
-        %10 = llvm.add %8, %9  : i32
-        llvm.store %10, %arg2 : i32, !llvm.ptr
-        omp.terminator
-      }
-      omp.terminator
-    }
-   llvm.return
-  }
-
-  llvm.func @omp_target_no_map() {
-    omp.target {
-      omp.terminator
-    }
-    llvm.return
-  }
-}
-
-// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_{{.*}}_omp_target_region__l22
-// CHECK: ret void
diff --git a/mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir b/mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir
deleted file mode 100644
index ba182374a9e3b..0000000000000
--- a/mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir
+++ /dev/null
@@ -1,31 +0,0 @@
-// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-
-// Not intended to be a functional example, the aim of this test is to verify
-// omp.threadprivate does not crash on lowering during the OpenMP target device
-// pass when used in conjunction with target code in the same module.
-
-module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true } {
-  llvm.func @func() attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>} {
-    %0 = llvm.mlir.addressof @_QFEpointer2 : !llvm.ptr
-    %1 = omp.threadprivate %0 : !llvm.ptr -> !llvm.ptr
-    %2 = omp.map.info var_ptr(%1 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(implicit, to) capture(ByRef) -> !llvm.ptr
-    omp.target map_entries(%2 -> %arg0 : !llvm.ptr) {
-      %3 = llvm.mlir.constant(1 : i32) : i32
-      %4 = llvm.getelementptr %arg0[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
-      llvm.store %3, %4 : i32, !llvm.ptr
-      omp.terminator
-    }
-    llvm.return
-  }
-   llvm.mlir.global internal @_QFEpointer2() {addr_space = 0 : i32} : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {
-    %0 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
-    llvm.return %0 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
-  }
-}
-
-// CHECK: define weak_odr protected amdgpu_kernel void @{{.*}}(ptr %{{.*}}, ptr %[[ARG1:.*]]) #{{[0-9]+}} {
-// CHECK:  %[[ALLOCA:.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK:  %[[ALLOCA_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA]] to ptr
-// CHECK:  store ptr %[[ARG1]], ptr %[[ALLOCA_ASCAST]], align 8
-// CHECK:  %[[LOAD_ALLOCA:.*]] = load ptr, ptr %[[ALLOCA_ASCAST]], align 8
-// CHECK:  store i32 1, ptr %[[LOAD_ALLOCA]], align 4
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir b/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir
index 41bc5c4ba525f..1efea084d0dcf 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir
@@ -89,3 +89,48 @@ llvm.func @omp_threadprivate() {
   llvm.store %3, %5 : i32, !llvm.ptr
   llvm.return
 }
+
+// -----
+
+module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+  llvm.func @host_op_in_device(%arg0 : !llvm.ptr) {
+    // expected-error @below {{unsupported host op found in device}}
+    // expected-error @below {{LLVM Translation failed for operation: omp.threadprivate}}
+    %0 = omp.threadprivate %arg0 : !llvm.ptr -> !llvm.ptr
+    llvm.return
+  }
+}
+
+// -----
+
+module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+  llvm.func @host_op_in_device_nested_target(%arg0 : !llvm.ptr) {
+    // expected-error @below {{unsupported host op found in device}}
+    // expected-error @below {{LLVM Translation failed for operation: omp.parallel}}
+    omp.parallel {
+      omp.target {
+        omp.terminator
+      }
+      omp.terminator
+    }
+    llvm.return
+  }
+}
+
+// -----
+
+module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+  llvm.func @host_op_in_device_sibling_target(%x: !llvm.ptr, %expr: i32) {
+    omp.target {
+      omp.terminator
+    }
+    // expected-error @below {{unsupported host op found in device}}
+    // expected-error @below {{LLVM Translation failed for operation: omp.atomic.update}}
+    omp.atomic.update %x : !llvm.ptr {
+    ^bb0(%xval: i32):
+      %newval = llvm.add %xval, %expr : i32
+      omp.yield(%newval : i32)
+    }
+    llvm.return
+  }
+}
diff --git a/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir b/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir
deleted file mode 100644
index cbf273b887bc7..0000000000000
--- a/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir
+++ /dev/null
@@ -1,160 +0,0 @@
-// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-
-module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} {
-
-  omp.private {type = private} @i32_privatizer : i32
-
-  llvm.func @test_nested_target_in_parallel(%arg0: !llvm.ptr) {
-    omp.parallel {
-    %0 = llvm.mlir.constant(4 : index) : i64
-    %1 = llvm.mlir.constant(1 : index) : i64
-    %4 = omp.map.bounds   lower_bound(%1 : i64) upper_bound(%0 : i64) stride(%1 : i64) start_idx(%1 : i64)
-    %mapv1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.array<10 x i32>)   map_clauses(tofrom) capture(ByRef) bounds(%4) -> !llvm.ptr {name = ""}
-    omp.target map_entries(%mapv1 -> %map_arg : !llvm.ptr) {
-      omp.terminator
-    }
-      omp.terminator
-    }
-    llvm.return
-  }
-
-// CHECK-LABEL: define void @test_nested_target_in_parallel({{.*}}) {
-// CHECK-NEXT:    br label %omp.parallel.fake.region
-// CHECK:       omp.parallel.fake.region:
-// CHECK-NEXT:    br label %omp.region.cont
-// CHECK:       omp.region.cont:
-// CHECK-NEXT:    ret void
-// CHECK-NEXT:  }
-
-  llvm.func @test_nested_target_in_wsloop(%arg0: !llvm.ptr) {
-    %8 = llvm.mlir.constant(1 : i64) : i64
-    %9 = llvm.alloca %8 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5>
-    %ascast = llvm.addrspacecast %9 : !llvm.ptr<5> to !llvm.ptr
-    %16 = llvm.mlir.constant(10 : i32) : i32
-    %17 = llvm.mlir.constant(1 : i32) : i32
-    omp.wsloop private(@i32_privatizer %ascast -> %loop_arg : !llvm.ptr) {
-      omp.loop_nest (%arg1) : i32 = (%17) to (%16) inclusive step (%17) {
-        llvm.store %arg1, %loop_arg : i32, !llvm.ptr
-        %0 = llvm.mlir.constant(4 : index) : i64
-        %1 = llvm.mlir.constant(1 : index) : i64
-        %4 = omp.map.bounds   lower_bound(%1 : i64) upper_bound(%0 : i64) stride(%1 : i64) start_idx(%1 : i64)
-        %mapv1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.array<10 x i32>)   map_clauses(tofrom) capture(ByRef) bounds(%4) -> !llvm.ptr {name = ""}
-        omp.target map_entries(%mapv1 -> %map_arg : !llvm.ptr) {
-          omp.terminator
-        }
-        omp.yield
-      }
-    }
-    llvm.return
-  }
-
-// CHECK-LABEL: define void @test_nested_target_in_wsloop(ptr %0) {
-// CHECK-NEXT:    %{{.*}} = alloca i32, i64 1, align 4, addrspace(5)
-// CHECK-NEXT:    %{{.*}} = addrspacecast ptr addrspace(5) %{{.*}} to ptr
-// CHECK-NEXT:    br label %omp.wsloop.fake.region
-// CHECK:       omp.wsloop.fake.region:
-// CHECK-NEXT:    br label %omp.loop_nest.fake.region
-// CHECK:       omp.loop_nest.fake.region:
-// CHECK-NEXT:    store i32 poison, ptr %{{.*}}
-// CHECK-NEXT:    br label %omp.region.cont1
-// CHECK:       omp.region.cont1:
-// CHECK-NEXT:    br label %omp.region.cont
-// CHECK:       omp.region.cont:
-// CHECK-NEXT:    ret void
-// CHECK-NEXT:  }
-
-  llvm.func @test_nested_target_in_parallel_with_private(%arg0: !llvm.ptr) {
-    %8 = llvm.mlir.constant(1 : i64) : i64
-    %9 = llvm.alloca %8 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5>
-    %ascast = llvm.addrspacecast %9 : !llvm.ptr<5> to !llvm.ptr
-    omp.parallel private(@i32_privatizer %ascast -> %i_priv_arg : !llvm.ptr) {
-        %1 = llvm.mlir.constant(1 : index) : i64
-        // Use the private clause from omp.parallel to make sure block arguments
-        // are handled.
-        %i_val = llvm.load %i_priv_arg : !llvm.ptr -> i64
-        %4 = omp.map.bounds   lower_bound(%1 : i64) upper_bound(%i_val : i64) stride(%1 : i64) start_idx(%1 : i64)
-        %mapv1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.array<10 x i32>)   map_clauses(tofrom) capture(ByRef) bounds(%4) -> !llvm.ptr {name = ""}
-        omp.target map_entries(%mapv1 -> %map_arg : !llvm.ptr) {
-          omp.terminator
-        }
-        omp.terminator
-    }
-    llvm.return
-  }
-
-  llvm.func @test_nested_target_in_task_with_private(%arg0: !llvm.ptr) {
-    %8 = llvm.mlir.constant(1 : i64) : i64
-    %9 = llvm.alloca %8 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5>
-    %ascast = llvm.addrspacecast %9 : !llvm.ptr<5> to !llvm.ptr
-    omp.task private(@i32_privatizer %ascast -> %i_priv_arg : !llvm.ptr) {
-        %1 = llvm.mlir.constant(1 : index) : i64
-        // Use the private clause from omp.task to make sure block arguments
-        // are handled.
-        %i_val = llvm.load %i_priv_arg : !llvm.ptr -> i64
-        %4 = omp.map.bounds   lower_bound(%1 : i64) upper_bound(%i_val : i64) stride(%1 : i64) start_idx(%1 : i64)
-        %mapv1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.array<10 x i32>)   map_clauses(tofrom) capture(ByRef) bounds(%4) -> !llvm.ptr {name = ""}
-        omp.target map_entries(%mapv1 -> %map_arg : !llvm.ptr) {
-          omp.terminator
-        }
-        omp.terminator
-    }
-    llvm.return
-  }
-
-  llvm.func @test_target_and_atomic_update(%x: !llvm.ptr, %expr : i32) {
-    omp.target {
-      omp.terminator
-    }
-
-    omp.atomic.update %x : !llvm.ptr {
-    ^bb0(%xval: i32):
-      %newval = llvm.add %xval, %expr : i32
-      omp.yield(%newval : i32)
-    }
-
-    llvm.return
-  }
-
-// CHECK-LABEL: define void @test_nested_target_in_parallel_with_private({{.*}}) {
-// CHECK:        br label %omp.parallel.fake.region
-// CHECK:       omp.parallel.fake.region:
-// CHECK:         br label %omp.region.cont
-// CHECK:       omp.region.cont:
-// CHECK-NEXT:    ret void
-// CHECK-NEXT:  }
-
-// CHECK-LABEL: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_parallel_{{.*}} {
-// CHECK:         call i32 @__kmpc_target_init
-// CHECK:       user_code.entry:
-// CHECK:         call void @__kmpc_target_deinit()
-// CHECK:         ret void
-// CHECK:       }
-
-// CHECK-LABEL: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_test_nested_target_in_wsloop_{{.*}} {
-// CHECK:         call i32 @__kmpc_target_init
-// CHECK:       user_code.entry:
-// CHECK:         call void @__kmpc_target_deinit()
-// CHECK:         ret void
-// CHECK:       }
-
-// CHECK-LABEL: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_test_nested_target_in_parallel_with_private_{{.*}} {
-// CHECK:         call i32 @__kmpc_target_init
-// CHECK:       user_code.entry:
-// CHECK:         call void @__kmpc_target_deinit()
-// CHECK:         ret void
-// CHECK:       }
-
-// CHECK-LABEL: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_test_nested_target_in_task_with_private_{{.*}} {
-// CHECK:         call i32 @__kmpc_target_init
-// CHECK:       user_code.entry:
-// CHECK:         call void @__kmpc_target_deinit()
-// CHECK:         ret void
-// CHECK:       }
-
-// CHECK-LABEL: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_test_target_and_atomic_update_{{.*}} {
-// CHECK:         call i32 @__kmpc_target_init
-// CHECK:       user_code.entry:
-// CHECK:         call void @__kmpc_target_deinit()
-// CHECK:         ret void
-// CHECK:       }
-}
diff --git a/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir b/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir
deleted file mode 100644
index 2ce2424cf9541..0000000000000
--- a/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir
+++ /dev/null
@@ -1,26 +0,0 @@
-// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-
-// This tests the fix for https://github.com/llvm/llvm-project/issues/84606
-// We are only interested in ensuring that the -mlir-to-llmvir pass doesn't crash.
-// CHECK: {{.*}} = add i32 {{.*}}, 5
-module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
-  llvm.func @_QQmain() attributes {fir.bindc_name = "main", omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>} {
-    %0 = llvm.mlir.constant(0 : i32) : i32
-    %1 = llvm.mlir.constant(1 : i64) : i64
-    %2 = llvm.alloca %1 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr<5>
-    %3 = llvm.addrspacecast %2 : !llvm.ptr<5> to !llvm.ptr
-    omp.task {
-      llvm.store %0, %3 : i32, !llvm.ptr
-      omp.terminator
-    }
-    %4 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "a"}
-    omp.target map_entries(%4 -> %arg0 : !llvm.ptr) {
-      %5 = llvm.mlir.constant(5 : i32) : i32
-      %6 = llvm.load %arg0  : !llvm.ptr -> i32
-      %7 = llvm.add %6, %5  : i32
-      llvm.store %7, %arg0  : i32, !llvm.ptr
-      omp.terminator
-    }
-    llvm.return
-  }
-}