[flang-commits] [flang] daa1820 - [Flang][OpenMP] Fix copyin allocatable lowering to MLIR (#122097)

via flang-commits flang-commits at lists.llvm.org
Wed Jan 22 21:44:05 PST 2025


Author: Kaviya Rajendiran
Date: 2025-01-23T11:14:00+05:30
New Revision: daa18205c6f0a3b5dd62ba2e65948e1a9182a60f

URL: https://github.com/llvm/llvm-project/commit/daa18205c6f0a3b5dd62ba2e65948e1a9182a60f
DIFF: https://github.com/llvm/llvm-project/commit/daa18205c6f0a3b5dd62ba2e65948e1a9182a60f.diff

LOG: [Flang][OpenMP] Fix copyin allocatable lowering to MLIR (#122097)

Fixes https://github.com/llvm/llvm-project/issues/113191

Issue: [flang][OpenMP] Runtime segfault when an allocatable variable is
used with copyin

Rootcause: The value of the threadprivate variable is not being copied
from the primary thread to the other threads within a parallel region.
As a result it tries to access a null pointer inside a parallel region
which causes segfault.

Fix: When allocatables used with copyin clause need to ensure that, on
entry to any parallel region each thread’s copy of a variable will
acquire the allocation status of the primary thread, before copying the
value of a threadprivate variable of the primary thread to the
threadprivate variable of each other member of the team.

Added: 
    

Modified: 
    flang/lib/Lower/Bridge.cpp
    flang/test/Lower/OpenMP/copyin.f90

Removed: 
    


################################################################################
diff  --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 700ca56141a324..d92dc0cf9abd62 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -1290,9 +1290,30 @@ class FirConverter : public Fortran::lower::AbstractConverter {
       auto loadVal = builder->create<fir::LoadOp>(loc, rhs);
       builder->create<fir::StoreOp>(loc, loadVal, lhs);
     } else if (isAllocatable &&
-               (flags.test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate) ||
-                flags.test(Fortran::semantics::Symbol::Flag::OmpCopyIn))) {
-      // For firstprivate and copyin allocatable variables, RHS must be copied
+               flags.test(Fortran::semantics::Symbol::Flag::OmpCopyIn)) {
+      // For copyin allocatable variables, RHS must be copied to lhs
+      // only when rhs is allocated.
+      hlfir::Entity temp =
+          hlfir::derefPointersAndAllocatables(loc, *builder, rhs);
+      mlir::Value addr = hlfir::genVariableRawAddress(loc, *builder, temp);
+      mlir::Value isAllocated = builder->genIsNotNullAddr(loc, addr);
+      builder->genIfThenElse(loc, isAllocated)
+          .genThen([&]() { copyData(lhs, rhs); })
+          .genElse([&]() {
+            fir::ExtendedValue hexv = symBoxToExtendedValue(dst);
+            hexv.match(
+                [&](const fir::MutableBoxValue &new_box) -> void {
+                  // if the allocation status of original list item is
+                  // unallocated, unallocate the copy if it is allocated, else
+                  // do nothing.
+                  Fortran::lower::genDeallocateIfAllocated(*this, new_box, loc);
+                },
+                [&](const auto &) -> void {});
+          })
+          .end();
+    } else if (isAllocatable &&
+               flags.test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) {
+      // For firstprivate allocatable variables, RHS must be copied
       // only when LHS is allocated.
       hlfir::Entity temp =
           hlfir::derefPointersAndAllocatables(loc, *builder, lhs);

diff  --git a/flang/test/Lower/OpenMP/copyin.f90 b/flang/test/Lower/OpenMP/copyin.f90
index 9e9ccf8e3d9142..ec4c544495c609 100644
--- a/flang/test/Lower/OpenMP/copyin.f90
+++ b/flang/test/Lower/OpenMP/copyin.f90
@@ -395,12 +395,34 @@ subroutine pointer()
 ! CHECK:             %[[VAL_4:.*]] = omp.threadprivate %[[VAL_1]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
 ! CHECK:             %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFallocatableEp"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
 ! CHECK:             %[[VAL_6:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK:             hlfir.assign %[[VAL_6]] to %[[VAL_5]]#0 realloc : !fir.box<!fir.heap<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:             %[[VAL_7:.*]] = fir.box_addr %[[VAL_6]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK:             %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (!fir.heap<!fir.array<?xi32>>) -> i64
+! CHECK:             %[[C0_I64:.*]] = arith.constant 0 : i64
+! CHECK:             %[[VAL_9:.*]] = arith.cmpi ne, %[[VAL_8]], %[[C0_I64]] : i64
+! CHECK:             fir.if %[[VAL_9]] {
+! CHECK:               %[[VAL_10:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:             hlfir.assign %[[VAL_10]] to %[[VAL_5]]#0 realloc : !fir.box<!fir.heap<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:             } else {
+! CHECK:              %[[VAL_10:.*]] = fir.load %[[VAL_5]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:              %[[VAL_11:.*]] = fir.box_addr %[[VAL_10]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK:              %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (!fir.heap<!fir.array<?xi32>>) -> i64
+! CHECK:              %[[C0_I64_0:.*]] = arith.constant 0 : i64
+! CHECK:              %[[VAL_13:.*]] = arith.cmpi ne, %[[VAL_12]], %[[C0_I64_0]] : i64
+! CHECK:              fir.if %[[VAL_13]] {
+! CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_5]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:                %[[VAL_15:.*]] = fir.box_addr %[[VAL_14]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK:                fir.freemem %[[VAL_15]] : !fir.heap<!fir.array<?xi32>>
+! CHECK:                %[[VAL_16:.*]] = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+! CHECK:                %[[C0:.*]] = arith.constant 0 : index
+! CHECK:                %[[VAL_17:.*]] = fir.shape %[[C0]] : (index) -> !fir.shape<1>
+! CHECK:                %[[VAL_18:.*]] = fir.embox %[[VAL_16]](%[[VAL_17]]) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK:                fir.store %[[VAL_18]] to %[[VAL_5]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:              }
 ! CHECK:             omp.barrier
-! CHECK:             %[[VAL_7:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK:             %[[VAL_8:.*]] = fir.box_addr %[[VAL_7]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
-! CHECK:             %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (!fir.heap<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
-! CHECK:             fir.call @_QPsub8(%[[VAL_9]]) fastmath<contract> : (!fir.ref<!fir.array<?xi32>>) -> ()
+! CHECK:             %[[VAL_19:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:             %[[VAL_20:.*]] = fir.box_addr %[[VAL_19]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK:             %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (!fir.heap<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+! CHECK:             fir.call @_QPsub8(%[[VAL_21]]) fastmath<contract> : (!fir.ref<!fir.array<?xi32>>) -> ()
 ! CHECK:             omp.terminator
 ! CHECK:           }
 ! CHECK:           return
@@ -422,7 +444,7 @@ subroutine allocatable()
 ! CHECK:           omp.parallel {
 ! CHECK:             %[[VAL_4:.*]] = omp.threadprivate %[[VAL_1]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>> -> !fir.ref<!fir.box<!fir.heap<i32>>>
 ! CHECK:             %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFallocatable2Ea"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
-! CHECK:             %[[VAL_6:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK:             %[[VAL_6:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>
 ! CHECK:             %[[VAL_7:.*]] = fir.box_addr %[[VAL_6]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
 ! CHECK:             %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (!fir.heap<i32>) -> i64
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 0 : i64
@@ -432,10 +454,23 @@ subroutine allocatable()
 ! CHECK:               %[[VAL_12:.*]] = fir.box_addr %[[VAL_11]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
 ! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]] : !fir.heap<i32>
 ! CHECK:               hlfir.assign %[[VAL_13]] to %[[VAL_5]]#0 realloc : i32, !fir.ref<!fir.box<!fir.heap<i32>>>
-! CHECK:             }
+! CHECK:             } else {
+! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_5]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK:               %[[VAL_15:.*]] = fir.box_addr  %[[VAL_11]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
+! CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (!fir.heap<i32>) -> i64
+! CHECK:               %[[C0_I64_0:.*]] = arith.constant 0 : i64
+! CHECK:               %[[VAL_17:.*]] = arith.cmpi ne, %[[VAL_16]], %[[C0_I64_0]] : i64
+! CHECK:               fir.if %[[VAL_17]] {
+! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_5]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK:                 %[[VAL_19:.*]] = fir.box_addr %[[VAL_18]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
+! CHECK:                 fir.freemem %[[VAL_19]] : !fir.heap<i32>
+! CHECK:                 %[[VAL_20:.*]] = fir.zero_bits !fir.heap<i32>
+! CHECK:                 %[[VAL_21:.*]] = fir.embox %[[VAL_20]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
+! CHECK:                 fir.store %[[VAL_21]] to %[[VAL_5]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK:               }
 ! CHECK:             omp.barrier
-! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             hlfir.assign %[[VAL_14]] to %[[VAL_5]]#0 realloc : i32, !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
+! CHECK:             hlfir.assign %[[VAL_22]] to %[[VAL_5]]#0 realloc : i32, !fir.ref<!fir.box<!fir.heap<i32>>>
 ! CHECK:             omp.terminator
 ! CHECK:           }
 ! CHECK:           return
@@ -448,3 +483,62 @@ subroutine allocatable2()
     a = 1
   !$omp end parallel
 end subroutine
+
+! CHECK:    func.func @_QPallocatable3() {
+! CHECK:      %[[VAL_0:.*]] = fir.address_of(@_QFallocatable3Ea) : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK:      %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFallocatable3Ea"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
+! CHECK:      %[[VAL_2:.*]] = omp.threadprivate %[[VAL_1]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>> -> !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK:       %[[VAL_3:.*]]:2  = hlfir.declare %[[VAL_2]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFallocatable3Ea"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
+! CHECK:      %[[VAL_4:.*]] = fir.allocmem i32 {fir.must_be_heap = true, uniq_name = "_QFallocatable3Ea.alloc"}
+! CHECK:      %[[VAL_5:.*]] = fir.embox %[[VAL_4]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
+! CHECK:      fir.store %[[VAL_5]] to %[[VAL_3]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK:      %[[C10_I32:.*]] = arith.constant 10 : i32
+! CHECK:      hlfir.assign %[[C10_I32]] to %[[VAL_3]]#0 realloc : i32, !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK:      omp.parallel {
+! CHECK:        %[[VAL_6:.*]] = omp.threadprivate %[[VAL_1]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>> -> !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK:        %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFallocatable3Ea"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
+! CHECK:        %[[VAL_8:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK:        %[[VAL_9:.*]] = fir.box_addr %[[VAL_8]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
+! CHECK:        %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (!fir.heap<i32>) -> i64
+! CHECK:        %[[C10_I64:.*]] = arith.constant 0 : i64
+! CHECK:        %[[VAL_11:.*]] = arith.cmpi ne, %[[VAL_10]], %[[C10_I64]] : i64
+! CHECK:        fir.if %[[VAL_11]] {
+! CHECK:         %[[VAL_12:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK:         %[[VAL_13:.*]] = fir.box_addr %[[VAL_12]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
+! CHECK:          %[[VAL_14:.*]] = fir.load %[[VAL_13]] : !fir.heap<i32>
+! CHECK:          hlfir.assign %[[VAL_14]] to %[[VAL_7]]#0 realloc : i32, !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK:        } else {
+! CHECK:           %[[VAL_12:.*]] = fir.load %[[VAL_7]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK:           %[[VAL_15:.*]] = fir.box_addr %[[VAL_12]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
+! CHECK:           %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (!fir.heap<i32>) -> i64
+! CHECK:           %[[C0_I64_0:.*]] = arith.constant 0 : i64
+! CHECK:           %[[VAL_17:.*]] = arith.cmpi ne, %[[VAL_16]], %[[C0_I64_0]] : i64
+! CHECK:           fir.if %[[VAL_17]] {
+! CHECK:             %[[VAL_18:.*]] = fir.load %[[VAL_7]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK:             %[[VAL_19:.*]] = fir.box_addr %[[VAL_18]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
+! CHECK:             fir.freemem %[[VAL_19]] : !fir.heap<i32>
+! CHECK:             %[[VAL_20:.*]] = fir.zero_bits !fir.heap<i32>
+! CHECK:             %[[VAL_21:.*]] = fir.embox %[[VAL_20]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
+! CHECK:             fir.store %[[VAL_21]] to %[[VAL_7]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK:           }
+! CHECK:          }
+! CHECK:        omp.barrier
+! CHECK:        %[[VAL_22:.*]] = fir.load %7#0 : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK:        %[[VAL_23:.*]] = fir.box_addr %[[VAL_22]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
+! CHECK:       %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.heap<i32>
+! CHECK:        %[[C1_I32:.*]] = arith.constant 1 : i32
+! CHECK:        %[[VAL_25:.*]]= arith.addi %[[VAL_24]], %[[C1_I32]] : i32
+! CHECK:        hlfir.assign %[[VAL_25]]to %[[VAL_7]]#0 realloc : i32, !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK:        omp.terminator
+! CHECK:      }
+! CHECK:      return
+! CHECK:    }
+subroutine allocatable3()
+  integer, allocatable, save :: a
+  !$omp threadprivate(a)
+  allocate(a)
+  a = 10
+  !$omp parallel copyin(a)
+    a = a + 1
+  !$omp end parallel
+end subroutine


        


More information about the flang-commits mailing list