[flang-commits] [flang] fb69fcd - [Flang][OpenMP] Fix COPYIN of derived types with allocatable components at -O3 (#196063)

via flang-commits flang-commits at lists.llvm.org
Mon May 11 05:33:56 PDT 2026


Author: CHANDRA GHALE
Date: 2026-05-11T18:03:51+05:30
New Revision: fb69fcdb7e283185d0806c7cd30957cfbcc50aa5

URL: https://github.com/llvm/llvm-project/commit/fb69fcdb7e283185d0806c7cd30957cfbcc50aa5
DIFF: https://github.com/llvm/llvm-project/commit/fb69fcdb7e283185d0806c7cd30957cfbcc50aa5.diff

LOG: [Flang][OpenMP] Fix COPYIN of derived types with allocatable components at -O3 (#196063)

COPYIN of threadprivate derived types with allocatable components
segfaults at -O3 because the OpenMP runtime zero-fills per-thread
storage, leaving allocatable component descriptors with invalid
metadata. This patch skips the copy on the master thread (where source
and destination alias) and uses temporary_lhs assignment on worker
threads so the runtime initializes descriptors before the deep copy.

Assisted-by: Claude Opus 4.6

Fixes :
[https://github.com/llvm/llvm-project/issues/196134](https://github.com/llvm/llvm-project/issues/196134)
Minimal reprducing test-case : 
```
program repro_o3_segv
  use omp_lib
  implicit none

  integer, parameter :: NT = 4
  integer :: i, rc
  integer :: nThreads(NT), nThreads1(NT)

  type structure_1
    integer, allocatable :: a(:)
  end type
  type structure_2
    type(structure_1) :: struc1
  end type

  type(structure_2), save :: struc2(2)
!$omp threadprivate(struc2)

  rc = 0
  nThreads  = -999
  nThreads1 = -999

  ! Keep this: dynamic teams can change codegen/runtime behavior.
  call omp_set_dynamic(.true.)
  call omp_set_num_threads(NT)

  allocate(struc2(1)%struc1%a(2))
  struc2(1)%struc1%a(1) = 1
  struc2(1)%struc1%a(2) = 2

!$omp parallel copyin(struc2)
  if (omp_get_thread_num() == NT-1) then
    ! Keep branch shape from original.
    ! struc2(1)%struc1%a(1) = 3
  end if
!$omp barrier

  allocate(struc2(2)%struc1%a(3))
  nThreads(omp_get_thread_num()+1) = struc2(1)%struc1%a(1)
  struc2(2)%struc1%a(1) = omp_get_thread_num()+1
  struc2(2)%struc1%a(2) = omp_get_thread_num()+2
!$omp end parallel

  do i = 1, NT
    if (nThreads(i) /= 1) rc = 1
  end do

  struc2(2)%struc1%a(2) = -1

!$omp parallel copyin(struc2)
  nThreads(omp_get_thread_num()+1)  = struc2(2)%struc1%a(1)
  nThreads1(omp_get_thread_num()+1) = struc2(2)%struc1%a(2)
!$omp end parallel

  do i = 1, NT
    if (nThreads1(i) /= -1) rc = 1
  end do

  if (rc /= 0) stop 1

  deallocate(struc2(1)%struc1%a)
  deallocate(struc2(2)%struc1%a)
end program
```
[>./flang -fopenmp -fopenmp-version=50 copyin_derrived_alloct.f90 -O3
> ./a.out 
Segmentation fault (core dumped)

Co-authored-by: Chandra Ghale <ghale at pe34genoa.hpc.amslabs.hpecorp.net>

Added: 
    flang/test/Lower/OpenMP/copyin-derived-allocatable-comp.f90

Modified: 
    flang/lib/Lower/Bridge.cpp

Removed: 
    


################################################################################
diff  --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index c5709e1cd94d4..08bb9def8ad5f 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -1548,6 +1548,42 @@ class FirConverter : public Fortran::lower::AbstractConverter {
       builder->genIfThen(loc, isAllocated)
           .genThen([&]() { copyData(lhs, rhs); })
           .end();
+    } else if (!isAllocatable &&
+               flags.test(Fortran::semantics::Symbol::Flag::OmpCopyIn) &&
+               hlfir::mayHaveAllocatableComponent(lhs.getType())) {
+      // For copyin of derived types with allocatable components where the
+      // variable itself is not allocatable: the threadprivate copy's
+      // allocatable component descriptors may be uninitialized (e.g.,
+      // zero-filled by the OpenMP runtime). Use temporary_lhs semantics
+      // which routes through AssignTemporary at runtime. AssignTemporary
+      // first initializes the LHS descriptor metadata (setting
+      // attribute=CFI_attribute_allocatable, base_addr=nullptr for
+      // allocatable components via the Initialize runtime call), then
+      // performs the assignment with MaybeReallocate semantics for proper
+      // deep copy of allocatable components.
+      //
+      // On the master thread, the LHS and RHS resolve to the same
+      // threadprivate storage, so we must skip the temporary_lhs path
+      // (which would destroy the source data via Initialize before the
+      // copy). Use a runtime address comparison to distinguish threads.
+      mlir::Value lhsAddr =
+          fir::ConvertOp::create(*builder, loc, builder->getIndexType(), lhs);
+      mlir::Value rhsAddr =
+          fir::ConvertOp::create(*builder, loc, builder->getIndexType(), rhs);
+      mlir::Value sameAddr = mlir::arith::CmpIOp::create(
+          *builder, loc, mlir::arith::CmpIPredicate::eq, lhsAddr, rhsAddr);
+      builder->genIfThenElse(loc, sameAddr)
+          .genThen([&]() {
+            // Master thread: lhs and rhs are the same, nothing to do.
+          })
+          .genElse([&]() {
+            hlfir::Entity r = hlfir::loadTrivialScalar(loc, *builder, rhs);
+            hlfir::AssignOp::create(*builder, loc, r, lhs,
+                                    /*realloc=*/false,
+                                    /*keep_lhs_length_if_realloc=*/false,
+                                    /*temporary_lhs=*/true);
+          })
+          .end();
     } else {
       copyData(lhs, rhs);
     }

diff  --git a/flang/test/Lower/OpenMP/copyin-derived-allocatable-comp.f90 b/flang/test/Lower/OpenMP/copyin-derived-allocatable-comp.f90
new file mode 100644
index 0000000000000..c3ca8cff569da
--- /dev/null
+++ b/flang/test/Lower/OpenMP/copyin-derived-allocatable-comp.f90
@@ -0,0 +1,88 @@
+! Test lowering of COPYIN clause for derived types with allocatable components.
+! Threadprivate copies may have uninitialized allocatable component descriptors
+! (zero-filled by the OpenMP runtime). For non-master threads, use
+! temporary_lhs semantics so that AssignTemporary initializes descriptors
+! before performing the deep copy. The master thread is skipped via a runtime
+! address comparison (lhs == rhs means same threadprivate storage).
+
+! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
+
+! CHECK-LABEL: func.func @_QPcopyin_derived_alloc_comp
+subroutine copyin_derived_alloc_comp()
+  type inner
+    integer, allocatable :: a(:)
+  end type inner
+  type outer
+    type(inner) :: s
+  end type outer
+  type(outer), save :: x(2)
+  !$omp threadprivate(x)
+
+  allocate(x(1)%s%a(3))
+  x(1)%s%a = 42
+
+! CHECK:      omp.parallel {
+! CHECK:        omp.threadprivate
+! CHECK:        hlfir.declare
+! CHECK:        fir.convert {{.*}} -> index
+! CHECK:        fir.convert {{.*}} -> index
+! CHECK:        arith.cmpi eq, {{.*}} : index
+! CHECK:        fir.if
+! CHECK:        } else {
+! CHECK:          hlfir.assign {{.*}} temporary_lhs
+! CHECK:        }
+! CHECK:        omp.barrier
+  !$omp parallel copyin(x)
+    call sub(x(1)%s%a)
+  !$omp end parallel
+  deallocate(x(1)%s%a)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPcopyin_scalar_derived_alloc_comp
+subroutine copyin_scalar_derived_alloc_comp()
+  type dt
+    integer, allocatable :: a(:)
+  end type dt
+  type(dt), save :: y
+  !$omp threadprivate(y)
+  allocate(y%a(5))
+  y%a = 10
+
+! CHECK:      omp.parallel {
+! CHECK:        omp.threadprivate
+! CHECK:        hlfir.declare
+! CHECK:        fir.convert {{.*}} -> index
+! CHECK:        fir.convert {{.*}} -> index
+! CHECK:        arith.cmpi eq, {{.*}} : index
+! CHECK:        fir.if
+! CHECK:        } else {
+! CHECK:          hlfir.assign {{.*}} temporary_lhs
+! CHECK:        }
+! CHECK:        omp.barrier
+  !$omp parallel copyin(y)
+    call sub(y%a)
+  !$omp end parallel
+  deallocate(y%a)
+end subroutine
+
+! Derived type WITHOUT allocatable components: plain assign, no address check.
+! CHECK-LABEL: func.func @_QPcopyin_no_alloc_comp
+subroutine copyin_no_alloc_comp()
+  type simple
+    integer :: val
+  end type simple
+  type(simple), save :: z
+  !$omp threadprivate(z)
+! CHECK:      omp.parallel {
+! CHECK:        omp.threadprivate
+! CHECK:        hlfir.declare
+! CHECK-NOT:    arith.cmpi
+! CHECK:        hlfir.assign
+! CHECK-NOT:    temporary_lhs
+! CHECK:        omp.barrier
+  !$omp parallel copyin(z)
+    call sub2(z%val)
+  !$omp end parallel
+end subroutine
+


        


More information about the flang-commits mailing list