[Mlir-commits] [llvm] [mlir] [MLIR][OpenMP] Fix and simplify bounds offset calculation for 1-D GEP offsets (PR #165486)

Thu Oct 30 09:20:48 PDT 2025

https://github.com/agozillon updated https://github.com/llvm/llvm-project/pull/165486

>From e4080a13f67769eebc47cd4d6c3709ff8455122f Mon Sep 17 00:00:00 2001
From: agozillon <Andrew.Gozillon at amd.com>
Date: Tue, 28 Oct 2025 15:11:35 -0500
Subject: [PATCH] [MLIR][OpenMP] Fix and simplify bounds offset calculation for
 1-D GEP offsets

Currently this is being calculated incorrectly and will result in incorrect index
offsets in more complicated array slices. This PR tries to address it by refactoring
and changing the calculation.
---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 55 ++++++-----------
 ...rget-record-type-with-ptr-member-host.mlir |  3 +-
 .../fortran/descriptor-array-slice-map.f90    | 61 +++++++++++++++++++
 3 files changed, 81 insertions(+), 38 deletions(-)
 create mode 100644 offload/test/offloading/fortran/descriptor-array-slice-map.f90

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index f28454075f1d3..8edec990eaaba 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4084,12 +4084,13 @@ static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo,
 ///
 /// Fortran
 ///     map(tofrom: array(2:5, 3:2))
-///   or
-/// C++
-///   map(tofrom: array[1:4][2:3])
+///
 /// We must calculate the initial pointer offset to pass across, this function
 /// performs this using bounds.
 ///
+/// TODO/WARNING: This only supports Fortran's column major indexing currently
+/// as is noted in the note below and comments in the function, we must extend
+/// this function when we add a C++ frontend.
 /// NOTE: which while specified in row-major order it currently needs to be
 /// flipped for Fortran's column order array allocation and access (as
 /// opposed to C++'s row-major, hence the backwards processing where order is
@@ -4125,46 +4126,28 @@ calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation,
     // with a pointer that's being treated like an array and we have the
     // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base
     // address (pointer pointing to the actual data) so we must caclulate the
-    // offset using a single index which the following two loops attempts to
-    // compute.
-
-    // Calculates the size offset we need to make per row e.g. first row or
-    // column only needs to be offset by one, but the next would have to be
-    // the previous row/column offset multiplied by the extent of current row.
+    // offset using a single index which the following loop attempts to
+    // compute using the standard column-major algorithm e.g for a 3D array:
     //
-    // For example ([1][10][100]):
+    // ((((c_idx * b_len) + b_idx) * a_len) + a_idx)
     //
-    //  - First row/column we move by 1 for each index increment
-    //  - Second row/column we move by 1 (first row/column) * 10 (extent/size of
-    //  current) for 10 for each index increment
-    //  - Third row/column we would move by 10 (second row/column) *
-    //  (extent/size of current) 100 for 1000 for each index increment
-    std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)};
-    for (size_t i = 1; i < bounds.size(); ++i) {
-      if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
-              bounds[i].getDefiningOp())) {
-        dimensionIndexSizeOffset.push_back(builder.CreateMul(
-            moduleTranslation.lookupValue(boundOp.getExtent()),
-            dimensionIndexSizeOffset[i - 1]));
-      }
-    }
-
-    // Now that we have calculated how much we move by per index, we must
-    // multiply each lower bound offset in indexes by the size offset we
-    // have calculated in the previous and accumulate the results to get
-    // our final resulting offset.
+    // It is of note that it's doing column-major rather than row-major at the
+    // moment, but having a way for the frontend to indicate which major format
+    // to use or standardizing/canonicalizing the order of the bounds to compute
+    // the offset may be useful in the future when there's other frontends with
+    // different formats.
+    std::vector<llvm::Value *> dimensionIndexSizeOffset;
     for (int i = bounds.size() - 1; i >= 0; --i) {
       if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
               bounds[i].getDefiningOp())) {
-        if (idx.empty())
-          idx.emplace_back(builder.CreateMul(
-              moduleTranslation.lookupValue(boundOp.getLowerBound()),
-              dimensionIndexSizeOffset[i]));
+        if (i == ((int)bounds.size() - 1))
+          idx.emplace_back(
+              moduleTranslation.lookupValue(boundOp.getLowerBound()));
         else
           idx.back() = builder.CreateAdd(
-              idx.back(), builder.CreateMul(moduleTranslation.lookupValue(
-                                                boundOp.getLowerBound()),
-                                            dimensionIndexSizeOffset[i]));
+              builder.CreateMul(idx.back(), moduleTranslation.lookupValue(
+                                                boundOp.getExtent())),
+              moduleTranslation.lookupValue(boundOp.getLowerBound()));
       }
     }
   }
diff --git a/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir b/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir
index a1e415c35e4b6..9640f03311af7 100644
--- a/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir
@@ -81,9 +81,8 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
 // CHECK: %[[ARR_SECT_SIZE:.*]] = mul i64 %[[ARR_SECT_SIZE1]], 4
 // CHECK: %[[LFULL_ARR:.*]] = load ptr, ptr @full_arr, align 8
 // CHECK: %[[FULL_ARR_PTR:.*]] = getelementptr inbounds float, ptr %[[LFULL_ARR]], i64 0
-// CHECK: %[[ARR_SECT_OFFSET1:.*]] = mul i64 %[[ARR_SECT_OFFSET2]], 1
 // CHECK: %[[LARR_SECT:.*]] = load ptr, ptr @sect_arr, align 8
-// CHECK: %[[ARR_SECT_PTR:.*]] = getelementptr inbounds i32, ptr %[[LARR_SECT]], i64 %[[ARR_SECT_OFFSET1]]
+// CHECK: %[[ARR_SECT_PTR:.*]] = getelementptr inbounds i32, ptr %[[LARR_SECT]], i64 %[[ARR_SECT_OFFSET2]]
 // CHECK: %[[SCALAR_PTR_LOAD:.*]] = load ptr, ptr %[[SCALAR_BASE]], align 8
 // CHECK: %[[FULL_ARR_DESC_SIZE:.*]] = sdiv exact i64 48, ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
 // CHECK: %[[FULL_ARR_SIZE_CMP:.*]] = icmp eq ptr %[[FULL_ARR_PTR]], null
diff --git a/offload/test/offloading/fortran/descriptor-array-slice-map.f90 b/offload/test/offloading/fortran/descriptor-array-slice-map.f90
new file mode 100644
index 0000000000000..69abb320adc35
--- /dev/null
+++ b/offload/test/offloading/fortran/descriptor-array-slice-map.f90
@@ -0,0 +1,61 @@
+! Offloading test which aims to test that an allocatable/descriptor type map
+! will allow the appropriate slicing behaviour.
+! REQUIRES: flang, amdgpu
+
+subroutine slice_writer(n, a, b, c)
+    implicit none
+    integer, intent(in) :: n
+    real(8), intent(in) :: a(n)
+    real(8), intent(in) :: b(n)
+    real(8), intent(out) :: c(n)
+    integer :: i
+
+    !$omp target teams distribute parallel do
+    do i=1,n
+       c(i) = b(i) + a(i)
+    end do
+end subroutine slice_writer
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+program main
+    implicit none
+    real(kind=8), allocatable :: a(:,:,:)
+    integer :: i, j, k, idx, idx1, idx2, idx3
+
+    i=50
+    j=100
+    k=2
+
+    allocate(a(1:i,1:j,1:k))
+
+    do idx1=1, i
+        do idx2=1, j
+            do idx3=1, k
+                a(idx1,idx2,idx3) = idx2
+            end do
+        end do
+    end do
+
+    do idx=1,k
+        !$omp target enter data map(alloc: a(1:i,:, idx))
+
+        !$omp target update to(a(1:i, 1:30, idx), &
+        !$omp&                 a(1:i, 61:100, idx))
+
+        call slice_writer(i, a(:, 1, idx), a(:, 61, idx), a(:, 31, idx))
+        call slice_writer(i, a(:, 30, idx), a(:, 100, idx), a(:, 60, idx))
+
+        !$omp target update from(a(1:i, 31:60, idx))
+        !$omp target exit data map(delete: a(1:i, :, idx))
+
+        print *, a(1, 31, idx), a(2, 31, idx), a(i, 31, idx)
+        print *, a(1, 60, idx), a(2, 60, idx), a(i, 60, idx)
+    enddo
+
+    deallocate(a)
+end program
+
+! CHECK: 62. 62. 62.
+! CHECK: 130. 130. 130.
+! CHECK: 62. 62. 62.
+! CHECK: 130. 130. 130.