[llvm-branch-commits] [flang] [mlir] [MLIR][OpenMP] Simplify OpenMP	device codegen (PR #137201)
    via llvm-branch-commits 
    llvm-branch-commits at lists.llvm.org
       
    Thu Apr 24 08:42:51 PDT 2025
    
    
  
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-flang-openmp
@llvm/pr-subscribers-mlir-llvm
@llvm/pr-subscribers-mlir
Author: Sergio Afonso (skatrak)
<details>
<summary>Changes</summary>
After removing host operations from the device MLIR module, it is no longer necessary to provide special codegen logic to prevent these operations from causing compiler crashes or miscompilations.
This patch removes these now unnecessary code paths to simplify codegen logic. Some MLIR tests are now replaced with Flang tests, since the responsibility of dealing with host operations has been moved earlier in the compilation flow.
MLIR tests holding target device modules are updated to no longer include now unsupported host operations.
---
Patch is 52.28 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137201.diff
12 Files Affected:
- (added) flang/test/Integration/OpenMP/target-nesting-in-host-ops.f90 (+87) 
- (added) flang/test/Integration/OpenMP/task-target-device.f90 (+37) 
- (added) flang/test/Integration/OpenMP/threadprivate-target-device.f90 (+40) 
- (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+159-264) 
- (modified) mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir (+10-15) 
- (modified) mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir (+6-11) 
- (modified) mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir (+24-37) 
- (removed) mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir (-40) 
- (removed) mlir/test/Target/LLVMIR/omptarget-threadprivate-device-lowering.mlir (-30) 
- (modified) mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir (+45) 
- (removed) mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir (-156) 
- (removed) mlir/test/Target/LLVMIR/openmp-task-target-device.mlir (-26) 
``````````diff
diff --git a/flang/test/Integration/OpenMP/target-nesting-in-host-ops.f90 b/flang/test/Integration/OpenMP/target-nesting-in-host-ops.f90
new file mode 100644
index 0000000000000..8c85a3c1784ed
--- /dev/null
+++ b/flang/test/Integration/OpenMP/target-nesting-in-host-ops.f90
@@ -0,0 +1,87 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+!REQUIRES: amdgpu-registered-target
+!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-llvm -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s
+
+! CHECK-NOT: define void @nested_target_in_parallel
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_parallel_{{.*}}(ptr %{{.*}}, ptr %{{.*}})
+subroutine nested_target_in_parallel(v)
+  implicit none
+  integer, intent(inout) :: v(10)
+
+  !$omp parallel
+    !$omp target map(tofrom: v)
+    !$omp end target
+  !$omp end parallel
+end subroutine
+
+! CHECK-NOT: define void @nested_target_in_wsloop
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_wsloop_{{.*}}(ptr %{{.*}}, ptr %{{.*}})
+subroutine nested_target_in_wsloop(v)
+  implicit none
+  integer, intent(inout) :: v(10)
+  integer :: i
+
+  !$omp do
+  do i=1, 10
+    !$omp target map(tofrom: v)
+    !$omp end target
+  end do
+end subroutine
+
+! CHECK-NOT: define void @nested_target_in_parallel_with_private
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_parallel_with_private_{{.*}}(ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}})
+subroutine nested_target_in_parallel_with_private(v)
+  implicit none
+  integer, intent(inout) :: v(10)
+  integer :: x
+  x = 10
+
+  !$omp parallel firstprivate(x)
+    !$omp target map(tofrom: v(1:x))
+    !$omp end target
+  !$omp end parallel
+end subroutine
+
+! CHECK-NOT: define void @nested_target_in_task_with_private
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_task_with_private_{{.*}}(ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}})
+subroutine nested_target_in_task_with_private(v)
+  implicit none
+  integer, intent(inout) :: v(10)
+  integer :: x
+  x = 10
+
+  !$omp task firstprivate(x)
+    !$omp target map(tofrom: v(1:x))
+    !$omp end target
+  !$omp end task
+end subroutine
+
+! CHECK-NOT: define void @target_and_atomic_update
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_target_and_atomic_update_{{.*}}(ptr %{{.*}})
+subroutine target_and_atomic_update(x, expr)
+  implicit none
+  integer, intent(inout) :: x, expr
+
+  !$omp target
+  !$omp end target
+
+  !$omp atomic update
+  x = x + expr
+end subroutine
+
+! CHECK-NOT: define void @nested_target_in_associate
+! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_nested_target_in_associate_{{.*}}(ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}})
+subroutine nested_target_in_associate(x)
+  integer, pointer, contiguous :: x(:)
+  associate(y => x)
+    !$omp target map(tofrom: y)
+    !$omp end target
+  end associate
+end subroutine
diff --git a/flang/test/Integration/OpenMP/task-target-device.f90 b/flang/test/Integration/OpenMP/task-target-device.f90
new file mode 100644
index 0000000000000..b92dee65e3f7f
--- /dev/null
+++ b/flang/test/Integration/OpenMP/task-target-device.f90
@@ -0,0 +1,37 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+!REQUIRES: amdgpu-registered-target
+!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-llvm -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s
+
+! This tests the fix for https://github.com/llvm/llvm-project/issues/84606
+! We are only interested in ensuring that the -mlir-to-llmvir pass doesn't crash.
+
+! CHECK: define weak_odr protected amdgpu_kernel void @{{.*}}QQmain{{.*}}({{.*}})
+program main
+  implicit none
+  integer, parameter :: N = 5
+  integer, dimension(5) :: a
+  integer :: i
+  integer :: target_a = 0
+
+  !$omp task depend(out:a)
+  do i = 1, N
+    a(i) = i
+  end do
+  !$omp end task
+
+  !$omp target map(tofrom:target_a) map(tofrom:a)
+  do i = 1, N
+    target_a = target_a + i
+    a(i) = a(i) + i
+  end do
+  !$omp end target
+  print*, target_a
+  print*, a
+end program main
diff --git a/flang/test/Integration/OpenMP/threadprivate-target-device.f90 b/flang/test/Integration/OpenMP/threadprivate-target-device.f90
new file mode 100644
index 0000000000000..662d6c6357af0
--- /dev/null
+++ b/flang/test/Integration/OpenMP/threadprivate-target-device.f90
@@ -0,0 +1,40 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+!REQUIRES: amdgpu-registered-target
+!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-llvm -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s
+
+! The aim of this test is to verify host threadprivate directives do not cause
+! crashes during OpenMP target device codegen when used in conjunction with
+! target code in the same function.
+
+! CHECK: define weak_odr protected amdgpu_kernel void @{{.*}}(ptr %{{.*}}, ptr %[[ARG1:.*]], ptr %[[ARG2:.*]]) #{{[0-9]+}} {
+! CHECK:  %[[ALLOCA_X:.*]] = alloca ptr, align 8, addrspace(5)
+! CHECK:  %[[ASCAST_X:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA_X]] to ptr
+! CHECK:  store ptr %[[ARG1]], ptr %[[ASCAST_X]], align 8
+
+! CHECK:  %[[ALLOCA_N:.*]] = alloca ptr, align 8, addrspace(5)
+! CHECK:  %[[ASCAST_N:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA_N]] to ptr
+! CHECK:  store ptr %[[ARG2]], ptr %[[ASCAST_N]], align 8
+
+! CHECK:  %[[LOAD_X:.*]] = load ptr, ptr %[[ASCAST_X]], align 8
+! CHECK:  call void @bar_(ptr %[[LOAD_X]], ptr %[[ASCAST_N]])
+
+module test
+  implicit none
+  integer :: n
+  !$omp threadprivate(n)
+  
+  contains
+  subroutine foo(x)
+    integer, intent(inout) :: x(10)
+    !$omp target map(tofrom: x(1:n))
+      call bar(x, n)
+    !$omp end target
+  end subroutine
+end module
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 6d80c66e3596e..e2368a294b548 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3016,19 +3016,14 @@ convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
   LLVM::GlobalOp global =
       addressOfOp.getGlobal(moduleTranslation.symbolTable());
   llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
-
-  if (!ompBuilder->Config.isTargetDevice()) {
-    llvm::Type *type = globalValue->getValueType();
-    llvm::TypeSize typeSize =
-        builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
-            type);
-    llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue());
-    llvm::Value *callInst = ompBuilder->createCachedThreadPrivate(
-        ompLoc, globalValue, size, global.getSymName() + ".cache");
-    moduleTranslation.mapValue(opInst.getResult(0), callInst);
-  } else {
-    moduleTranslation.mapValue(opInst.getResult(0), globalValue);
-  }
+  llvm::Type *type = globalValue->getValueType();
+  llvm::TypeSize typeSize =
+      builder.GetInsertBlock()->getModule()->getDataLayout().getTypeStoreSize(
+          type);
+  llvm::ConstantInt *size = builder.getInt64(typeSize.getFixedValue());
+  llvm::Value *callInst = ompBuilder->createCachedThreadPrivate(
+      ompLoc, globalValue, size, global.getSymName() + ".cache");
+  moduleTranslation.mapValue(opInst.getResult(0), callInst);
 
   return success();
 }
@@ -5325,40 +5320,172 @@ convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
   return success();
 }
 
-// Returns true if the operation is inside a TargetOp or
-// is part of a declare target function.
-static bool isTargetDeviceOp(Operation *op) {
+namespace {
+
+/// Implementation of the dialect interface that converts operations belonging
+/// to the OpenMP dialect to LLVM IR.
+class OpenMPDialectLLVMIRTranslationInterface
+    : public LLVMTranslationDialectInterface {
+public:
+  using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface;
+
+  /// Translates the given operation to LLVM IR using the provided IR builder
+  /// and saving the state in `moduleTranslation`.
+  LogicalResult
+  convertOperation(Operation *op, llvm::IRBuilderBase &builder,
+                   LLVM::ModuleTranslation &moduleTranslation) const final;
+
+  /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR,
+  /// runtime calls, or operation amendments
+  LogicalResult
+  amendOperation(Operation *op, ArrayRef<llvm::Instruction *> instructions,
+                 NamedAttribute attribute,
+                 LLVM::ModuleTranslation &moduleTranslation) const final;
+};
+
+} // namespace
+
+LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
+    Operation *op, ArrayRef<llvm::Instruction *> instructions,
+    NamedAttribute attribute,
+    LLVM::ModuleTranslation &moduleTranslation) const {
+  return llvm::StringSwitch<llvm::function_ref<LogicalResult(Attribute)>>(
+             attribute.getName())
+      .Case("omp.is_target_device",
+            [&](Attribute attr) {
+              if (auto deviceAttr = dyn_cast<BoolAttr>(attr)) {
+                llvm::OpenMPIRBuilderConfig &config =
+                    moduleTranslation.getOpenMPBuilder()->Config;
+                config.setIsTargetDevice(deviceAttr.getValue());
+                return success();
+              }
+              return failure();
+            })
+      .Case("omp.is_gpu",
+            [&](Attribute attr) {
+              if (auto gpuAttr = dyn_cast<BoolAttr>(attr)) {
+                llvm::OpenMPIRBuilderConfig &config =
+                    moduleTranslation.getOpenMPBuilder()->Config;
+                config.setIsGPU(gpuAttr.getValue());
+                return success();
+              }
+              return failure();
+            })
+      .Case("omp.host_ir_filepath",
+            [&](Attribute attr) {
+              if (auto filepathAttr = dyn_cast<StringAttr>(attr)) {
+                llvm::OpenMPIRBuilder *ompBuilder =
+                    moduleTranslation.getOpenMPBuilder();
+                ompBuilder->loadOffloadInfoMetadata(filepathAttr.getValue());
+                return success();
+              }
+              return failure();
+            })
+      .Case("omp.flags",
+            [&](Attribute attr) {
+              if (auto rtlAttr = dyn_cast<omp::FlagsAttr>(attr))
+                return convertFlagsAttr(op, rtlAttr, moduleTranslation);
+              return failure();
+            })
+      .Case("omp.version",
+            [&](Attribute attr) {
+              if (auto versionAttr = dyn_cast<omp::VersionAttr>(attr)) {
+                llvm::OpenMPIRBuilder *ompBuilder =
+                    moduleTranslation.getOpenMPBuilder();
+                ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp",
+                                            versionAttr.getVersion());
+                return success();
+              }
+              return failure();
+            })
+      .Case("omp.declare_target",
+            [&](Attribute attr) {
+              if (auto declareTargetAttr =
+                      dyn_cast<omp::DeclareTargetAttr>(attr))
+                return convertDeclareTargetAttr(op, declareTargetAttr,
+                                                moduleTranslation);
+              return failure();
+            })
+      .Case("omp.requires",
+            [&](Attribute attr) {
+              if (auto requiresAttr = dyn_cast<omp::ClauseRequiresAttr>(attr)) {
+                using Requires = omp::ClauseRequires;
+                Requires flags = requiresAttr.getValue();
+                llvm::OpenMPIRBuilderConfig &config =
+                    moduleTranslation.getOpenMPBuilder()->Config;
+                config.setHasRequiresReverseOffload(
+                    bitEnumContainsAll(flags, Requires::reverse_offload));
+                config.setHasRequiresUnifiedAddress(
+                    bitEnumContainsAll(flags, Requires::unified_address));
+                config.setHasRequiresUnifiedSharedMemory(
+                    bitEnumContainsAll(flags, Requires::unified_shared_memory));
+                config.setHasRequiresDynamicAllocators(
+                    bitEnumContainsAll(flags, Requires::dynamic_allocators));
+                return success();
+              }
+              return failure();
+            })
+      .Case("omp.target_triples",
+            [&](Attribute attr) {
+              if (auto triplesAttr = dyn_cast<ArrayAttr>(attr)) {
+                llvm::OpenMPIRBuilderConfig &config =
+                    moduleTranslation.getOpenMPBuilder()->Config;
+                config.TargetTriples.clear();
+                config.TargetTriples.reserve(triplesAttr.size());
+                for (Attribute tripleAttr : triplesAttr) {
+                  if (auto tripleStrAttr = dyn_cast<StringAttr>(tripleAttr))
+                    config.TargetTriples.emplace_back(tripleStrAttr.getValue());
+                  else
+                    return failure();
+                }
+                return success();
+              }
+              return failure();
+            })
+      .Default([](Attribute) {
+        // Fall through for omp attributes that do not require lowering.
+        return success();
+      })(attribute.getValue());
+
+  return failure();
+}
+
+// Returns true if the operation is not inside a TargetOp, it is part of a
+// function and that function is not declare target.
+static bool isHostDeviceOp(Operation *op) {
   // Assumes no reverse offloading
   if (op->getParentOfType<omp::TargetOp>())
-    return true;
-
-  // Certain operations return results, and whether utilised in host or
-  // target there is a chance an LLVM Dialect operation depends on it
-  // by taking it in as an operand, so we must always lower these in
-  // some manner or result in an ICE (whether they end up in a no-op
-  // or otherwise).
-  if (mlir::isa<omp::ThreadprivateOp>(op))
-    return true;
+    return false;
 
-  if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>())
+  if (auto parentFn = op->getParentOfType<LLVM::LLVMFuncOp>()) {
     if (auto declareTargetIface =
             llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(
                 parentFn.getOperation()))
       if (declareTargetIface.isDeclareTarget() &&
           declareTargetIface.getDeclareTargetDeviceType() !=
               mlir::omp::DeclareTargetDeviceType::host)
-        return true;
+        return false;
+
+    return true;
+  }
 
   return false;
 }
 
-/// Given an OpenMP MLIR operation, create the corresponding LLVM IR (including
-/// OpenMP runtime calls).
-static LogicalResult
-convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
-                             LLVM::ModuleTranslation &moduleTranslation) {
+/// Given an OpenMP MLIR operation, create the corresponding LLVM IR
+/// (including OpenMP runtime calls).
+LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
+    Operation *op, llvm::IRBuilderBase &builder,
+    LLVM::ModuleTranslation &moduleTranslation) const {
   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
 
+  if (ompBuilder->Config.isTargetDevice() &&
+      !isa<omp::TargetOp, omp::TargetDataOp, omp::TargetEnterDataOp,
+           omp::TargetExitDataOp, omp::TargetUpdateOp, omp::MapInfoOp,
+           omp::TerminatorOp, omp::YieldOp>(op) &&
+      isHostDeviceOp(op))
+    return op->emitOpError() << "unsupported host op found in device";
+
   // For each loop, introduce one stack frame to hold loop information. Ensure
   // this is only done for the outermost loop wrapper to prevent introducing
   // multiple stack frames for a single loop. Initially set to null, the loop
@@ -5508,238 +5635,6 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
   return result;
 }
 
-static LogicalResult
-convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder,
-                      LLVM::ModuleTranslation &moduleTranslation) {
-  return convertHostOrTargetOperation(op, builder, moduleTranslation);
-}
-
-static LogicalResult
-convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder,
-                       LLVM::ModuleTranslation &moduleTranslation) {
-  if (isa<omp::TargetOp>(op))
-    return convertOmpTarget(*op, builder, moduleTranslation);
-  if (isa<omp::TargetDataOp>(op))
-    return convertOmpTargetData(op, builder, moduleTranslation);
-  bool interrupted =
-      op->walk<WalkOrder::PreOrder>([&](Operation *oper) {
-          if (isa<omp::TargetOp>(oper)) {
-            if (failed(convertOmpTarget(*oper, builder, moduleTranslation)))
-              return WalkResult::interrupt();
-            return WalkResult::skip();
-          }
-          if (isa<omp::TargetDataOp>(oper)) {
-            if (failed(convertOmpTargetData(oper, builder, moduleTranslation)))
-              return WalkResult::interrupt();
-            return WalkResult::skip();
-          }
-
-          // Non-target ops might nest target-related ops, therefore, we
-          // translate them as non-OpenMP scopes. Translating them is needed by
-          // nested target-related ops since they might need LLVM values defined
-          // in their parent non-target ops.
-          if (isa<omp::OpenMPDialect>(oper->getDialect()) &&
-              oper->getParentOfType<LLVM::LLVMFuncOp>() &&
-              !oper->getRegions().empty()) {
-            if (auto blockArgsIface =
-                    dyn_cast<omp::BlockArgOpenMPOpInterface>(oper))
-              forwardArgs(moduleTranslation, blockArgsIface);
-            else {
-              // Here we map entry block arguments of
-              // non-BlockArgOpenMPOpInterface ops if they can be encountered
-              // inside of a function and they define any of these arguments.
-              if (isa<mlir::omp::AtomicUpdateOp>(oper))
-                for (auto [operand, arg] :
-                     llvm::zip_equal(oper->getOperands(),
-                                     oper->getRegion(0).getArguments())) {
-                  moduleTranslation.mapValue(
-                      arg, builder.CreateLoad(
-                               moduleTranslation.convertType(arg.getType()),
-                               moduleTranslation.lookupValue(operand)));
-                }
-            }
-
-            if (auto loopNest = dyn_cast<omp::LoopNestOp>(oper)) {
-              assert(builder.GetInsertBlock() &&
-                     "No insert block is set for the builder");
-              for (auto iv : loopNest.getIVs()) {
-                // Map iv to an undefined value just to keep the IR validity.
-                moduleTranslation.mapValue(
-                    iv, llvm::PoisonValue::get(
-                            moduleTranslation.convertType(iv.getType())));
-              }
-            }
-
-            for (Region ®ion : oper->getRe...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/137201
    
    
More information about the llvm-branch-commits
mailing list