[Mlir-commits] [mlir] 5fb4383 - [mlir][OpenMP] Lower device clause for target data/enter/exit/update (#174665)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Wed Jan 7 09:19:19 PST 2026


Author: Chi-Chun, Chen
Date: 2026-01-07T11:19:14-06:00
New Revision: 5fb43838afcd8412f473a3165639badd256acd71

URL: https://github.com/llvm/llvm-project/commit/5fb43838afcd8412f473a3165639badd256acd71
DIFF: https://github.com/llvm/llvm-project/commit/5fb43838afcd8412f473a3165639badd256acd71.diff

LOG: [mlir][OpenMP] Lower device clause for target data/enter/exit/update (#174665)

Extend OpenMP device clause lowering for target data, target enter data,
target exit data, and target update to accept non-constant values.
Previously, only constant device IDs could be lowered to LLVM IR.

Add Flang tests to validate device clause handling and mark the feature
as supported in the OpenMPSupport documentation. New tests cover:
- target teams
- target teams distribute
- target teams distribute parallel do
- target teams distribute parallel do simd
- target data

Tests for target update and target enter/exit were
already present in Flang.

Added: 
    

Modified: 
    flang/docs/OpenMPSupport.md
    flang/test/Lower/OpenMP/target-spmd.f90
    flang/test/Lower/OpenMP/target.f90
    mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
    mlir/test/Target/LLVMIR/omptarget-device.mlir
    mlir/test/Target/LLVMIR/omptarget-llvm.mlir

Removed: 
    


################################################################################
diff  --git a/flang/docs/OpenMPSupport.md b/flang/docs/OpenMPSupport.md
index 21966c5489108..63f4dbea98180 100644
--- a/flang/docs/OpenMPSupport.md
+++ b/flang/docs/OpenMPSupport.md
@@ -37,9 +37,9 @@ Note : No distinction is made between the support in Parser/Semantics, MLIR, Low
 | simd construct                                             | P      | Implicit linearization is skipped if iv is a pointer or allocatable|
 | declare simd construct                                     | N      | |
 | do simd construct                                          | P      | linear clause is not supported |
-| target data construct                                      | P      | device clause not supported |
+| target data construct                                      | Y      | |
 | target construct                                           | Y      | |
-| target update construct                                    | P      | device clause not supported |
+| target update construct                                    | Y      | |
 | declare target directive                                   | Y      | |
 | teams construct                                            | Y      | |
 | distribute construct                                       | Y      | |
@@ -52,14 +52,14 @@ Note : No distinction is made between the support in Parser/Semantics, MLIR, Low
 | cancel construct                                           | Y      | |
 | cancellation point construct                               | Y      | |
 | parallel do simd construct                                 | P      | linear clause not supported |
-| target teams construct                                     | P      | device clause not supported |
+| target teams construct                                     | Y      | |
 | teams distribute construct                                 | Y      | |
 | teams distribute simd construct                            | P      | linear clause is not supported |
-| target teams distribute construct                          | P      | device clause is not supported |
+| target teams distribute construct                          | Y      | |
 | teams distribute parallel loop construct                   | Y      | |
-| target teams distribute parallel loop construct            | P      | device clause is not supported |
+| target teams distribute parallel loop construct            | Y      | |
 | teams distribute parallel loop simd construct              | P      | linear clause is not supported |
-| target teams distribute parallel loop simd construct       | P      | device and linear clauses are not supported |
+| target teams distribute parallel loop simd construct       | P      | linear clause is not supported |
 
 ## Extensions
 ### ATOMIC construct

diff  --git a/flang/test/Lower/OpenMP/target-spmd.f90 b/flang/test/Lower/OpenMP/target-spmd.f90
index 43613819ccc8e..320bd1d26ccd3 100644
--- a/flang/test/Lower/OpenMP/target-spmd.f90
+++ b/flang/test/Lower/OpenMP/target-spmd.f90
@@ -189,3 +189,214 @@ subroutine target_teams_distribute_parallel_do_simd_spmd()
   end do
   !$omp end target teams distribute parallel do simd
 end subroutine target_teams_distribute_parallel_do_simd_spmd
+
+!===============================================================================
+! Target teams `device` clause
+!===============================================================================
+
+!CHECK-LABEL: func.func @_QPomp_target_teams_device() {
+subroutine omp_target_teams_device
+  integer            :: dev32
+  integer(kind=8)    :: dev64
+  integer(kind=2)    :: dev16
+
+  dev32 = 1
+  dev64 = 2_8
+  dev16 = 3_2
+
+  !$omp target teams device(dev32)
+  !$omp end target teams
+  ! CHECK: %[[DEV32:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+  ! CHECK: omp.target device(%[[DEV32]] : i32)
+
+  !$omp target teams device(dev64)
+  !$omp end target teams
+  ! CHECK: %[[DEV64:.*]] = fir.load %{{.*}} : !fir.ref<i64>
+  ! CHECK: omp.target device(%[[DEV64]] : i64)
+
+  !$omp target teams device(dev16)
+  !$omp end target teams
+  ! CHECK: %[[DEV16:.*]] = fir.load %{{.*}} : !fir.ref<i16>
+  ! CHECK: omp.target device(%[[DEV16]] : i16)
+
+  !$omp target teams device(2)
+  !$omp end target teams
+  ! CHECK: %[[C2:.*]] = arith.constant 2 : i32
+  ! CHECK: omp.target device(%[[C2]] : i32)
+
+  !$omp target teams device(5_8)
+  !$omp end target teams
+  ! CHECK: %[[C5:.*]] = arith.constant 5 : i64
+  ! CHECK: omp.target device(%[[C5]] : i64)
+
+end subroutine omp_target_teams_device
+
+!===============================================================================
+! Target teams distribute `device` clause
+!===============================================================================
+
+!CHECK-LABEL: func.func @_QPomp_target_teams_distribute_device() {
+subroutine omp_target_teams_distribute_device
+  integer            :: dev32
+  integer(kind=8)    :: dev64
+  integer(kind=2)    :: dev16
+  integer            :: i
+
+  dev32 = 1
+  dev64 = 2_8
+  dev16 = 3_2
+
+  !$omp target teams distribute device(dev32)
+  do i = 1, 1
+  end do
+  !$omp end target teams distribute
+  ! CHECK: %[[DEV32:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+  ! CHECK: omp.target device(%[[DEV32]] : i32)
+  ! CHECK: omp.teams
+  ! CHECK: omp.distribute
+  ! CHECK: omp.loop_nest
+
+  !$omp target teams distribute device(dev64)
+  do i = 1, 1
+  end do
+  !$omp end target teams distribute
+  ! CHECK: %[[DEV64:.*]] = fir.load %{{.*}} : !fir.ref<i64>
+  ! CHECK: omp.target device(%[[DEV64]] : i64)
+
+  !$omp target teams distribute device(dev16)
+  do i = 1, 1
+  end do
+  !$omp end target teams distribute
+  ! CHECK: %[[DEV16:.*]] = fir.load %{{.*}} : !fir.ref<i16>
+  ! CHECK: omp.target device(%[[DEV16]] : i16)
+
+  !$omp target teams distribute device(2)
+  do i = 1, 1
+  end do
+  !$omp end target teams distribute
+  ! CHECK: %[[C2:.*]] = arith.constant 2 : i32
+  ! CHECK: omp.target device(%[[C2]] : i32)
+
+  !$omp target teams distribute device(5_8)
+  do i = 1, 1
+  end do
+  !$omp end target teams distribute
+  ! CHECK: %[[C5:.*]] = arith.constant 5 : i64
+  ! CHECK: omp.target device(%[[C5]] : i64)
+
+end subroutine omp_target_teams_distribute_device
+
+!===============================================================================
+! Target teams distribute parallel loop `device` clause
+!===============================================================================
+
+!CHECK-LABEL: func.func @_QPomp_target_teams_distribute_parallel_do_device() {
+subroutine omp_target_teams_distribute_parallel_do_device
+  integer            :: dev32
+  integer(kind=8)    :: dev64
+  integer(kind=2)    :: dev16
+  integer            :: i
+
+  dev32 = 1
+  dev64 = 2_8
+  dev16 = 3_2
+
+  !$omp target teams distribute parallel do device(dev32)
+  do i = 1, 1
+  end do
+  !$omp end target teams distribute parallel do
+  ! CHECK: %[[DEV32:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+  ! CHECK: omp.target device(%[[DEV32]] : i32)
+  ! CHECK: omp.teams
+  ! CHECK: omp.parallel
+  ! CHECK: omp.distribute
+  ! CHECK: omp.wsloop
+  ! CHECK: omp.loop_nest
+
+  !$omp target teams distribute parallel do device(dev64)
+  do i = 1, 1
+  end do
+  !$omp end target teams distribute parallel do
+  ! CHECK: %[[DEV64:.*]] = fir.load %{{.*}} : !fir.ref<i64>
+  ! CHECK: omp.target device(%[[DEV64]] : i64)
+
+  !$omp target teams distribute parallel do device(dev16)
+  do i = 1, 1
+  end do
+  !$omp end target teams distribute parallel do
+  ! CHECK: %[[DEV16:.*]] = fir.load %{{.*}} : !fir.ref<i16>
+  ! CHECK: omp.target device(%[[DEV16]] : i16)
+
+  !$omp target teams distribute parallel do device(2)
+  do i = 1, 1
+  end do
+  !$omp end target teams distribute parallel do
+  ! CHECK: %[[C2:.*]] = arith.constant 2 : i32
+  ! CHECK: omp.target device(%[[C2]] : i32)
+
+  !$omp target teams distribute parallel do device(5_8)
+  do i = 1, 1
+  end do
+  !$omp end target teams distribute parallel do
+  ! CHECK: %[[C5:.*]] = arith.constant 5 : i64
+  ! CHECK: omp.target device(%[[C5]] : i64)
+
+end subroutine omp_target_teams_distribute_parallel_do_device
+
+!===============================================================================
+! Target teams distribute parallel loop simd `device` clause
+!===============================================================================
+
+!CHECK-LABEL: func.func @_QPomp_target_teams_distribute_parallel_do_simd_device() {
+subroutine omp_target_teams_distribute_parallel_do_simd_device
+  integer            :: dev32
+  integer(kind=8)    :: dev64
+  integer(kind=2)    :: dev16
+  integer            :: i
+
+  dev32 = 1
+  dev64 = 2_8
+  dev16 = 3_2
+
+  !$omp target teams distribute parallel do simd device(dev32)
+  do i = 1, 1
+  end do
+  !$omp end target teams distribute parallel do simd
+  ! CHECK: %[[DEV32:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+  ! CHECK: omp.target device(%[[DEV32]] : i32)
+  ! CHECK: omp.teams
+  ! CHECK: omp.parallel
+  ! CHECK: omp.distribute
+  ! CHECK: omp.wsloop
+  ! CHECK: omp.simd
+  ! CHECK: omp.loop_nest
+
+  !$omp target teams distribute parallel do simd device(dev64)
+  do i = 1, 1
+  end do
+  !$omp end target teams distribute parallel do simd
+  ! CHECK: %[[DEV64:.*]] = fir.load %{{.*}} : !fir.ref<i64>
+  ! CHECK: omp.target device(%[[DEV64]] : i64)
+
+  !$omp target teams distribute parallel do simd device(dev16)
+  do i = 1, 1
+  end do
+  !$omp end target teams distribute parallel do simd
+  ! CHECK: %[[DEV16:.*]] = fir.load %{{.*}} : !fir.ref<i16>
+  ! CHECK: omp.target device(%[[DEV16]] : i16)
+
+  !$omp target teams distribute parallel do simd device(2)
+  do i = 1, 1
+  end do
+  !$omp end target teams distribute parallel do simd
+  ! CHECK: %[[C2:.*]] = arith.constant 2 : i32
+  ! CHECK: omp.target device(%[[C2]] : i32)
+
+  !$omp target teams distribute parallel do simd device(5_8)
+  do i = 1, 1
+  end do
+  !$omp end target teams distribute parallel do simd
+  ! CHECK: %[[C5:.*]] = arith.constant 5 : i64
+  ! CHECK: omp.target device(%[[C5]] : i64)
+
+end subroutine omp_target_teams_distribute_parallel_do_simd_device

diff  --git a/flang/test/Lower/OpenMP/target.f90 b/flang/test/Lower/OpenMP/target.f90
index 55a6b7a595ed1..e7168f3944037 100644
--- a/flang/test/Lower/OpenMP/target.f90
+++ b/flang/test/Lower/OpenMP/target.f90
@@ -734,4 +734,47 @@ subroutine omp_target_device
   ! CHECK: %[[C5:.*]] = arith.constant 5 : i64
   ! CHECK: omp.target device(%[[C5]] : i64)
 
-end subroutine omp_target_device
\ No newline at end of file
+end subroutine omp_target_device
+
+!===============================================================================
+! Target data `device` clause
+!===============================================================================
+
+!CHECK-LABEL: func.func @_QPomp_target_data_device() {
+subroutine omp_target_data_device
+  integer            :: dev32
+  integer(kind=8)    :: dev64
+  integer(kind=2)    :: dev16
+  integer            :: x
+
+  dev32 = 1
+  dev64 = 2_8
+  dev16 = 3_2
+  x = 0
+
+  !$omp target data device(dev32) map(tofrom: x)
+  !$omp end target data
+  ! CHECK: %[[DEV32:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+  ! CHECK: omp.target_data device(%[[DEV32]] : i32)
+
+  !$omp target data device(dev64) map(tofrom: x)
+  !$omp end target data
+  ! CHECK: %[[DEV64:.*]] = fir.load %{{.*}} : !fir.ref<i64>
+  ! CHECK: omp.target_data device(%[[DEV64]] : i64)
+
+  !$omp target data device(dev16) map(tofrom: x)
+  !$omp end target data
+  ! CHECK: %[[DEV16:.*]] = fir.load %{{.*}} : !fir.ref<i16>
+  ! CHECK: omp.target_data device(%[[DEV16]] : i16)
+
+  !$omp target data device(2) map(tofrom: x)
+  !$omp end target data
+  ! CHECK: %[[C2:.*]] = arith.constant 2 : i32
+  ! CHECK: omp.target_data device(%[[C2]] : i32)
+
+  !$omp target data device(5_8) map(tofrom: x)
+  !$omp end target data
+  ! CHECK: %[[C5:.*]] = arith.constant 5 : i64
+  ! CHECK: omp.target_data device(%[[C5]] : i64)
+
+end subroutine omp_target_data_device

diff  --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 55df986ad3d11..aad46ba094f7b 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -349,10 +349,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
     if (!op.getDependVars().empty() || op.getDependKinds())
       result = todo("depend");
   };
-  auto checkDevice = [&todo](auto op, LogicalResult &result) {
-    if (op.getDevice())
-      result = todo("device");
-  };
   auto checkHint = [](auto op, LogicalResult &) {
     if (op.getHint())
       op.emitWarning("hint clause discarded");
@@ -456,11 +452,7 @@ static LogicalResult checkImplementationStatus(Operation &op) {
             omp::AtomicCaptureOp>([&](auto op) { checkHint(op, result); })
       .Case<omp::TargetEnterDataOp, omp::TargetExitDataOp>(
           [&](auto op) { checkDepend(op, result); })
-      .Case<omp::TargetUpdateOp>([&](auto op) {
-        checkDepend(op, result);
-        checkDevice(op, result);
-      })
-      .Case<omp::TargetDataOp>([&](auto op) { checkDevice(op, result); })
+      .Case<omp::TargetUpdateOp>([&](auto op) { checkDepend(op, result); })
       .Case([&](omp::TargetOp op) {
         checkAllocate(op, result);
         checkBare(op, result);
@@ -5052,7 +5044,7 @@ static LogicalResult
 convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
                      LLVM::ModuleTranslation &moduleTranslation) {
   llvm::Value *ifCond = nullptr;
-  int64_t deviceID = llvm::omp::OMP_DEVICEID_UNDEF;
+  llvm::Value *deviceID = builder.getInt64(llvm::omp::OMP_DEVICEID_UNDEF);
   SmallVector<Value> mapVars;
   SmallVector<Value> useDevicePtrVars;
   SmallVector<Value> useDeviceAddrVars;
@@ -5068,6 +5060,11 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
   bool isOffloadEntry =
       isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
 
+  auto getDeviceID = [&](mlir::Value dev) -> llvm::Value * {
+    llvm::Value *v = moduleTranslation.lookupValue(dev);
+    return builder.CreateIntCast(v, builder.getInt64Ty(), /*isSigned=*/true);
+  };
+
   LogicalResult result =
       llvm::TypeSwitch<Operation *, LogicalResult>(op)
           .Case([&](omp::TargetDataOp dataOp) {
@@ -5077,10 +5074,8 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
             if (auto ifVar = dataOp.getIfExpr())
               ifCond = moduleTranslation.lookupValue(ifVar);
 
-            if (auto devId = dataOp.getDevice())
-              if (auto constOp = devId.getDefiningOp<LLVM::ConstantOp>())
-                if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
-                  deviceID = intAttr.getInt();
+            if (mlir::Value devId = dataOp.getDevice())
+              deviceID = getDeviceID(devId);
 
             mapVars = dataOp.getMapVars();
             useDevicePtrVars = dataOp.getUseDevicePtrVars();
@@ -5094,10 +5089,9 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
             if (auto ifVar = enterDataOp.getIfExpr())
               ifCond = moduleTranslation.lookupValue(ifVar);
 
-            if (auto devId = enterDataOp.getDevice())
-              if (auto constOp = devId.getDefiningOp<LLVM::ConstantOp>())
-                if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
-                  deviceID = intAttr.getInt();
+            if (mlir::Value devId = enterDataOp.getDevice())
+              deviceID = getDeviceID(devId);
+
             RTLFn =
                 enterDataOp.getNowait()
                     ? llvm::omp::OMPRTL___tgt_target_data_begin_nowait_mapper
@@ -5113,10 +5107,8 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
             if (auto ifVar = exitDataOp.getIfExpr())
               ifCond = moduleTranslation.lookupValue(ifVar);
 
-            if (auto devId = exitDataOp.getDevice())
-              if (auto constOp = devId.getDefiningOp<LLVM::ConstantOp>())
-                if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
-                  deviceID = intAttr.getInt();
+            if (mlir::Value devId = exitDataOp.getDevice())
+              deviceID = getDeviceID(devId);
 
             RTLFn = exitDataOp.getNowait()
                         ? llvm::omp::OMPRTL___tgt_target_data_end_nowait_mapper
@@ -5132,10 +5124,8 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
             if (auto ifVar = updateDataOp.getIfExpr())
               ifCond = moduleTranslation.lookupValue(ifVar);
 
-            if (auto devId = updateDataOp.getDevice())
-              if (auto constOp = devId.getDefiningOp<LLVM::ConstantOp>())
-                if (auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()))
-                  deviceID = intAttr.getInt();
+            if (mlir::Value devId = updateDataOp.getDevice())
+              deviceID = getDeviceID(devId);
 
             RTLFn =
                 updateDataOp.getNowait()
@@ -5288,13 +5278,13 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
   llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = [&]() {
     if (isa<omp::TargetDataOp>(op))
       return ompBuilder->createTargetData(ompLoc, allocaIP, builder.saveIP(),
-                                          builder.getInt64(deviceID), ifCond,
-                                          info, genMapInfoCB, customMapperCB,
+                                          deviceID, ifCond, info, genMapInfoCB,
+                                          customMapperCB,
                                           /*MapperFunc=*/nullptr, bodyGenCB,
                                           /*DeviceAddrCB=*/nullptr);
-    return ompBuilder->createTargetData(
-        ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
-        info, genMapInfoCB, customMapperCB, &RTLFn);
+    return ompBuilder->createTargetData(ompLoc, allocaIP, builder.saveIP(),
+                                        deviceID, ifCond, info, genMapInfoCB,
+                                        customMapperCB, &RTLFn);
   }();
 
   if (failed(handleError(afterIP, *op)))

diff  --git a/mlir/test/Target/LLVMIR/omptarget-device.mlir b/mlir/test/Target/LLVMIR/omptarget-device.mlir
index b4c9744cc0c87..ce82f55561714 100644
--- a/mlir/test/Target/LLVMIR/omptarget-device.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-device.mlir
@@ -1,7 +1,7 @@
-// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
 
 module attributes {omp.is_target_device = false, omp.target_triples = ["nvptx64-nvidia-cuda"]} {
-  llvm.func @foo(%d16 : i16, %d32 : i32, %d64 : i64) {
+  llvm.func @_QPopenmp_target(%d16 : i16, %d32 : i32, %d64 : i64) {
     %x  = llvm.mlir.constant(0 : i32) : i32
 
     // Constant i16 -> i64 in the runtime call.
@@ -47,7 +47,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["nvptx64-
   }
 }
 
-// CHECK-LABEL: define void @foo(i16 %{{.*}}, i32 %{{.*}}, i64 %{{.*}}) {
+// CHECK-LABEL: define void @_QPopenmp_target(i16 %{{.*}}, i32 %{{.*}}, i64 %{{.*}}) {
 // CHECK: br label %entry
 // CHECK: entry:
 
@@ -65,4 +65,210 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["nvptx64-
 // CHECK: call i32 @__tgt_target_kernel(ptr {{.*}}, i64 %[[D32_I64]], i32 {{.*}}, i32 {{.*}}, ptr {{.*}}, ptr {{.*}})
 
 // Variable i64
-// CHECK: call i32 @__tgt_target_kernel(ptr {{.*}}, i64 %{{.*}}, i32 {{.*}}, i32 {{.*}}, ptr {{.*}}, ptr {{.*}})
\ No newline at end of file
+// CHECK: call i32 @__tgt_target_kernel(ptr {{.*}}, i64 %{{.*}}, i32 {{.*}}, i32 {{.*}}, ptr {{.*}}, ptr {{.*}})
+
+// -----
+
+module attributes {omp.is_target_device = false, omp.target_triples = ["nvptx64-nvidia-cuda"]} {
+  llvm.func @_QPopenmp_target_data(%d16 : i16, %d32 : i32, %d64 : i64) {
+    %one = llvm.mlir.constant(1 : i64) : i64
+    %buf = llvm.alloca %one x i32 : (i64) -> !llvm.ptr
+    %map = omp.map.info var_ptr(%buf : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+
+    // Constant i16 -> i64 in the runtime call.
+    %c1_i16 = llvm.mlir.constant(1 : i16) : i16
+    omp.target_data device(%c1_i16 : i16) map_entries(%map : !llvm.ptr) {
+      omp.terminator
+    }
+
+    // Constant i32 -> i64 in the runtime call.
+    %c2_i32 = llvm.mlir.constant(2 : i32) : i32
+    omp.target_data device(%c2_i32 : i32) map_entries(%map : !llvm.ptr) {
+      omp.terminator
+    }
+
+    // Constant i64 stays i64 in the runtime call.
+    %c3_i64 = llvm.mlir.constant(3 : i64) : i64
+    omp.target_data device(%c3_i64 : i64) map_entries(%map : !llvm.ptr) {
+      omp.terminator
+    }
+
+    // Variable i16 -> cast to i64.
+    omp.target_data device(%d16 : i16) map_entries(%map : !llvm.ptr) {
+      omp.terminator
+    }
+
+    // Variable i32 -> cast to i64.
+    omp.target_data device(%d32 : i32) map_entries(%map : !llvm.ptr) {
+      omp.terminator
+    }
+
+    // Variable i64 stays i64.
+    omp.target_data device(%d64 : i64) map_entries(%map : !llvm.ptr) {
+      omp.terminator
+    }
+
+    llvm.return
+  }
+}
+
+// CHECK-LABEL: define void @_QPopenmp_target_data(i16 %{{.*}}, i32 %{{.*}}, i64 %{{.*}}) {
+// CHECK: br label %entry
+// CHECK: entry:
+
+// ---- Constant cases (device id is 2nd argument) ----
+// CHECK-DAG: call void @__tgt_target_data_begin_mapper(ptr {{.*}}, i64 1, i32 1, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr null)
+// CHECK-DAG: call void @__tgt_target_data_end_mapper(ptr {{.*}}, i64 1, i32 1, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr null)
+
+// CHECK-DAG: call void @__tgt_target_data_begin_mapper(ptr {{.*}}, i64 2, i32 1, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr null)
+// CHECK-DAG: call void @__tgt_target_data_end_mapper(ptr {{.*}}, i64 2, i32 1, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr null)
+
+// CHECK-DAG: call void @__tgt_target_data_begin_mapper(ptr {{.*}}, i64 3, i32 1, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr null)
+// CHECK-DAG: call void @__tgt_target_data_end_mapper(ptr {{.*}}, i64 3, i32 1, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr null)
+
+// Variable i16 -> i64
+// CHECK: %[[D16_I64:.*]] = sext i16 %{{.*}} to i64
+// CHECK: call void @__tgt_target_data_begin_mapper(ptr {{.*}}, i64 %[[D16_I64]], i32 1, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr null)
+// CHECK: call void @__tgt_target_data_end_mapper(ptr {{.*}}, i64 %[[D16_I64]], i32 1, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr null)
+
+// Variable i32 -> i64
+// CHECK: %[[D32_I64:.*]] = sext i32 %{{.*}} to i64
+// CHECK: call void @__tgt_target_data_begin_mapper(ptr {{.*}}, i64 %[[D32_I64]], i32 1, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr null)
+// CHECK: call void @__tgt_target_data_end_mapper(ptr {{.*}}, i64 %[[D32_I64]], i32 1, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr null)
+
+// Variable i64
+// CHECK: call void @__tgt_target_data_begin_mapper(ptr {{.*}}, i64 %{{.*}}, i32 1, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr null)
+// CHECK: call void @__tgt_target_data_end_mapper(ptr {{.*}}, i64 %{{.*}}, i32 1, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr null)
+
+// -----
+
+module attributes {omp.is_target_device = false, omp.target_triples = ["nvptx64-nvidia-cuda"]} {
+  llvm.func @_QPomp_target_enter_exit(%d16 : i16, %d32 : i32, %d64 : i64) {
+    %c1 = llvm.mlir.constant(1 : i64) : i64
+    %var = llvm.alloca %c1 x i32 : (i64) -> !llvm.ptr
+
+    %m_to = omp.map.info var_ptr(%var : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "var"}
+    %m_from = omp.map.info var_ptr(%var : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "var"}
+
+    // Constant i16 -> i64 in the runtime call.
+    %c1_i16 = llvm.mlir.constant(1 : i16) : i16
+    omp.target_enter_data device(%c1_i16 : i16) map_entries(%m_to : !llvm.ptr)
+
+    // Constant i32 -> i64 in the runtime call.
+    %c2_i32 = llvm.mlir.constant(2 : i32) : i32
+    omp.target_enter_data device(%c2_i32 : i32) map_entries(%m_to : !llvm.ptr)
+
+    // Constant i64 stays i64 in the runtime call.
+    %c3_i64 = llvm.mlir.constant(3 : i64) : i64
+    omp.target_enter_data device(%c3_i64 : i64) map_entries(%m_to : !llvm.ptr)
+
+    // ---- Variable cases (enter) ----
+    omp.target_enter_data device(%d16 : i16) map_entries(%m_to : !llvm.ptr)
+    omp.target_enter_data device(%d32 : i32) map_entries(%m_to : !llvm.ptr)
+    omp.target_enter_data device(%d64 : i64) map_entries(%m_to : !llvm.ptr)
+
+    // ---- Constant cases (exit) ----
+    omp.target_exit_data device(%c1_i16 : i16) map_entries(%m_from : !llvm.ptr)
+    omp.target_exit_data device(%c2_i32 : i32) map_entries(%m_from : !llvm.ptr)
+    omp.target_exit_data device(%c3_i64 : i64) map_entries(%m_from : !llvm.ptr)
+
+    // ---- Variable cases (exit) ----
+    omp.target_exit_data device(%d16 : i16) map_entries(%m_from : !llvm.ptr)
+    omp.target_exit_data device(%d32 : i32) map_entries(%m_from : !llvm.ptr)
+    omp.target_exit_data device(%d64 : i64) map_entries(%m_from : !llvm.ptr)
+
+    llvm.return
+  }
+}
+
+// CHECK-LABEL: define void @_QPomp_target_enter_exit(i16 %{{.*}}, i32 %{{.*}}, i64 %{{.*}}) {
+// CHECK: br label %entry
+// CHECK: entry:
+
+// ---- Constant enter cases (device id is 2nd argument) ----
+// CHECK-DAG: call void @__tgt_target_data_begin_mapper(ptr {{.*}}, i64 1, i32 {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}})
+// CHECK-DAG: call void @__tgt_target_data_begin_mapper(ptr {{.*}}, i64 2, i32 {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}})
+// CHECK-DAG: call void @__tgt_target_data_begin_mapper(ptr {{.*}}, i64 3, i32 {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}})
+
+// ---- Variable enter cases ----
+// Variable i16 -> i64
+// CHECK: %[[D16_I64_BEGIN:.*]] = sext i16 %{{.*}} to i64
+// CHECK: call void @__tgt_target_data_begin_mapper(ptr {{.*}}, i64 %[[D16_I64_BEGIN]], i32 {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}})
+
+// Variable i32 -> i64
+// CHECK: %[[D32_I64_BEGIN:.*]] = sext i32 %{{.*}} to i64
+// CHECK: call void @__tgt_target_data_begin_mapper(ptr {{.*}}, i64 %[[D32_I64_BEGIN]], i32 {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}})
+
+// Variable i64 stays i64
+// CHECK: call void @__tgt_target_data_begin_mapper(ptr {{.*}}, i64 %{{.*}}, i32 {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}})
+
+// ---- Constant exit cases (device id is 2nd argument) ----
+// CHECK-DAG: call void @__tgt_target_data_end_mapper(ptr {{.*}}, i64 1, i32 {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}})
+// CHECK-DAG: call void @__tgt_target_data_end_mapper(ptr {{.*}}, i64 2, i32 {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}})
+// CHECK-DAG: call void @__tgt_target_data_end_mapper(ptr {{.*}}, i64 3, i32 {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}})
+
+// ---- Variable exit cases ----
+// Variable i16 -> i64
+// CHECK: %[[D16_I64_END:.*]] = sext i16 %{{.*}} to i64
+// CHECK: call void @__tgt_target_data_end_mapper(ptr {{.*}}, i64 %[[D16_I64_END]], i32 {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}})
+
+// Variable i32 -> i64
+// CHECK: %[[D32_I64_END:.*]] = sext i32 %{{.*}} to i64
+// CHECK: call void @__tgt_target_data_end_mapper(ptr {{.*}}, i64 %[[D32_I64_END]], i32 {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}})
+
+// Variable i64 stays i64
+// CHECK: call void @__tgt_target_data_end_mapper(ptr {{.*}}, i64 %{{.*}}, i32 {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}})
+
+// CHECK: ret void
+// CHECK: }
+
+// -----
+
+module attributes {omp.is_target_device = false, omp.target_triples = ["nvptx64-nvidia-cuda"]} {
+  llvm.func @target_update_dev_clause(%d16 : i16, %d32 : i32, %d64 : i64) {
+    %c1 = llvm.mlir.constant(1 : i64) : i64
+    %var = llvm.alloca %c1 x i32 : (i64) -> !llvm.ptr
+    %m = omp.map.info var_ptr(%var : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "var"}
+
+    // ---- Constant cases ----
+    %c1_i16 = llvm.mlir.constant(1 : i16) : i16
+    omp.target_update device(%c1_i16 : i16) map_entries(%m : !llvm.ptr)
+
+    %c2_i32 = llvm.mlir.constant(2 : i32) : i32
+    omp.target_update device(%c2_i32 : i32) map_entries(%m : !llvm.ptr)
+
+    %c3_i64 = llvm.mlir.constant(3 : i64) : i64
+    omp.target_update device(%c3_i64 : i64) map_entries(%m : !llvm.ptr)
+
+    // ---- Variable cases ----
+    omp.target_update device(%d16 : i16) map_entries(%m : !llvm.ptr)
+    omp.target_update device(%d32 : i32) map_entries(%m : !llvm.ptr)
+    omp.target_update device(%d64 : i64) map_entries(%m : !llvm.ptr)
+
+    llvm.return
+  }
+}
+
+// CHECK-LABEL: define void @target_update_dev_clause(i16 %{{.*}}, i32 %{{.*}}, i64 %{{.*}}) {
+// CHECK: br label %entry
+// CHECK: entry:
+
+// ---- Constant cases (device id is 2nd argument) ----
+// CHECK-DAG: call void @__tgt_target_data_update_mapper(ptr {{.*}}, i64 1, i32 {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}})
+// CHECK-DAG: call void @__tgt_target_data_update_mapper(ptr {{.*}}, i64 2, i32 {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}})
+// CHECK-DAG: call void @__tgt_target_data_update_mapper(ptr {{.*}}, i64 3, i32 {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}})
+
+// ---- Variable cases ----
+// Variable i16 -> i64
+// CHECK: %[[D16_I64:.*]] = sext i16 %{{.*}} to i64
+// CHECK: call void @__tgt_target_data_update_mapper(ptr {{.*}}, i64 %[[D16_I64]], i32 {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}})
+
+// Variable i32 -> i64
+// CHECK: %[[D32_I64:.*]] = sext i32 %{{.*}} to i64
+// CHECK: call void @__tgt_target_data_update_mapper(ptr {{.*}}, i64 %[[D32_I64]], i32 {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}})
+
+// Variable i64 stays i64
+// CHECK: call void @__tgt_target_data_update_mapper(ptr {{.*}}, i64 %{{.*}}, i32 {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}}, ptr {{.*}})
+
+// CHECK: ret void
+// CHECK: }

diff  --git a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir
index e289d5d013eaa..0b4d63125f82f 100644
--- a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir
@@ -156,6 +156,7 @@ module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} {
 // CHECK:         %[[VAL_8:.*]] = load i32, ptr %[[VAL_7]], align 4
 // CHECK:         %[[VAL_9:.*]] = icmp slt i32 %[[VAL_8]], 10
 // CHECK:         %[[VAL_10:.*]] = load i32, ptr %[[VAL_6]], align 4
+// CHECK:         %[[DEV_I64_BEGIN:.*]] = sext i32 %[[VAL_10:.*]] to i64
 // CHECK:         br label %[[VAL_11:.*]]
 // CHECK:       entry:                                            ; preds = %[[VAL_12:.*]]
 // CHECK:         br i1 %[[VAL_9]], label %[[VAL_13:.*]], label %[[VAL_14:.*]]
@@ -176,7 +177,7 @@ module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} {
 // CHECK:         store ptr null, ptr %[[VAL_22]], align 8
 // CHECK:         %[[VAL_23:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_3]], i32 0, i32 0
 // CHECK:         %[[VAL_24:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_4]], i32 0, i32 0
-// CHECK:         call void @__tgt_target_data_begin_mapper(ptr @3, i64 -1, i32 2, ptr %[[VAL_23]], ptr %[[VAL_24]], ptr @.offload_sizes, ptr @.offload_maptypes, ptr @.offload_mapnames, ptr null)
+// CHECK:         call void @__tgt_target_data_begin_mapper(ptr @3, i64 %[[DEV_I64_BEGIN]], i32 2, ptr %[[VAL_23]], ptr %[[VAL_24]], ptr @.offload_sizes, ptr @.offload_maptypes, ptr @.offload_mapnames, ptr null)
 // CHECK:         br label %[[VAL_25:.*]]
 // CHECK:       omp_if.else:                                      ; preds = %[[VAL_11]]
 // CHECK:         br label %[[VAL_25]]
@@ -184,6 +185,7 @@ module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} {
 // CHECK:         %[[VAL_26:.*]] = load i32, ptr %[[VAL_7]], align 4
 // CHECK:         %[[VAL_27:.*]] = icmp sgt i32 %[[VAL_26]], 10
 // CHECK:         %[[VAL_28:.*]] = load i32, ptr %[[VAL_6]], align 4
+// CHECK:         %[[DEV_I64_END:.*]] = sext i32 %[[VAL_28]] to i64
 // CHECK:         br i1 %[[VAL_27]], label %[[VAL_29:.*]], label %[[VAL_30:.*]]
 // CHECK:       omp_if.then2:                                     ; preds = %[[VAL_25]]
 // CHECK:         %[[ARR_OFFSET3:.*]] = getelementptr inbounds [1024 x i32], ptr %[[VAL_16]], i64 0, i64 0
@@ -202,7 +204,7 @@ module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} {
 // CHECK:         store ptr null, ptr %[[VAL_36]], align 8
 // CHECK:         %[[VAL_37:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_0]], i32 0, i32 0
 // CHECK:         %[[VAL_38:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_1]], i32 0, i32 0
-// CHECK:         call void @__tgt_target_data_end_mapper(ptr @3, i64 -1, i32 2, ptr %[[VAL_37]], ptr %[[VAL_38]], ptr @.offload_sizes.1, ptr @.offload_maptypes.2, ptr @.offload_mapnames.3, ptr null)
+// CHECK:         call void @__tgt_target_data_end_mapper(ptr @3, i64 %[[DEV_I64_END]], i32 2, ptr %[[VAL_37]], ptr %[[VAL_38]], ptr @.offload_sizes.1, ptr @.offload_maptypes.2, ptr @.offload_mapnames.3, ptr null)
 // CHECK:         br label %[[VAL_39:.*]]
 // CHECK:       omp_if.else8:                                     ; preds = %[[VAL_25]]
 // CHECK:         br label %[[VAL_39]]


        


More information about the Mlir-commits mailing list