[flang-commits] [flang] [Flang][OpenMP] Fix COPYIN of derived types with allocatable components at -O3 (PR #196063)
CHANDRA GHALE via flang-commits
flang-commits at lists.llvm.org
Wed May 6 05:55:47 PDT 2026
https://github.com/chandraghale created https://github.com/llvm/llvm-project/pull/196063
COPYIN of threadprivate derived types with allocatable components segfaults at -O3 because the OpenMP runtime zero-fills per-thread storage, leaving allocatable component descriptors with invalid metadata. This patch skips the copy on the master thread (where source and destination alias) and uses temporary_lhs assignment on worker threads so the runtime initializes descriptors before the deep copy.
Assisted-by: Claude Opus 4.6
Minimal reprducing test-case :
```
program repro_o3_segv
use omp_lib
implicit none
integer, parameter :: NT = 4
integer :: i, rc
integer :: nThreads(NT), nThreads1(NT)
type structure_1
integer, allocatable :: a(:)
end type
type structure_2
type(structure_1) :: struc1
end type
type(structure_2), save :: struc2(2)
!$omp threadprivate(struc2)
rc = 0
nThreads = -999
nThreads1 = -999
! Keep this: dynamic teams can change codegen/runtime behavior.
call omp_set_dynamic(.true.)
call omp_set_num_threads(NT)
allocate(struc2(1)%struc1%a(2))
struc2(1)%struc1%a(1) = 1
struc2(1)%struc1%a(2) = 2
!$omp parallel copyin(struc2)
if (omp_get_thread_num() == NT-1) then
! Keep branch shape from original.
! struc2(1)%struc1%a(1) = 3
end if
!$omp barrier
allocate(struc2(2)%struc1%a(3))
nThreads(omp_get_thread_num()+1) = struc2(1)%struc1%a(1)
struc2(2)%struc1%a(1) = omp_get_thread_num()+1
struc2(2)%struc1%a(2) = omp_get_thread_num()+2
!$omp end parallel
do i = 1, NT
if (nThreads(i) /= 1) rc = 1
end do
struc2(2)%struc1%a(2) = -1
!$omp parallel copyin(struc2)
nThreads(omp_get_thread_num()+1) = struc2(2)%struc1%a(1)
nThreads1(omp_get_thread_num()+1) = struc2(2)%struc1%a(2)
!$omp end parallel
do i = 1, NT
if (nThreads1(i) /= -1) rc = 1
end do
if (rc /= 0) stop 1
deallocate(struc2(1)%struc1%a)
deallocate(struc2(2)%struc1%a)
end program
```
[>./flang -fopenmp -fopenmp-version=50 copyin_derrived_alloct.f90 -O3
> ./a.out
Segmentation fault (core dumped)
>From 794d4a3d414f542ec067aceaada7827eb98a7ed9 Mon Sep 17 00:00:00 2001
From: Chandra Ghale <ghale at pe34genoa.hpc.amslabs.hpecorp.net>
Date: Wed, 6 May 2026 07:47:53 -0500
Subject: [PATCH] Fix COPYIN of derived types with allocatable components at
-O3
---
flang/lib/Lower/Bridge.cpp | 36 ++++++++
.../copyin-derived-allocatable-comp.f90 | 88 +++++++++++++++++++
2 files changed, 124 insertions(+)
create mode 100644 flang/test/Lower/OpenMP/copyin-derived-allocatable-comp.f90
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index c5709e1cd94d4..08bb9def8ad5f 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -1548,6 +1548,42 @@ class FirConverter : public Fortran::lower::AbstractConverter {
builder->genIfThen(loc, isAllocated)
.genThen([&]() { copyData(lhs, rhs); })
.end();
+ } else if (!isAllocatable &&
+ flags.test(Fortran::semantics::Symbol::Flag::OmpCopyIn) &&
+ hlfir::mayHaveAllocatableComponent(lhs.getType())) {
+ // For copyin of derived types with allocatable components where the
+ // variable itself is not allocatable: the threadprivate copy's
+ // allocatable component descriptors may be uninitialized (e.g.,
+ // zero-filled by the OpenMP runtime). Use temporary_lhs semantics
+ // which routes through AssignTemporary at runtime. AssignTemporary
+ // first initializes the LHS descriptor metadata (setting
+ // attribute=CFI_attribute_allocatable, base_addr=nullptr for
+ // allocatable components via the Initialize runtime call), then
+ // performs the assignment with MaybeReallocate semantics for proper
+ // deep copy of allocatable components.
+ //
+ // On the master thread, the LHS and RHS resolve to the same
+ // threadprivate storage, so we must skip the temporary_lhs path
+ // (which would destroy the source data via Initialize before the
+ // copy). Use a runtime address comparison to distinguish threads.
+ mlir::Value lhsAddr =
+ fir::ConvertOp::create(*builder, loc, builder->getIndexType(), lhs);
+ mlir::Value rhsAddr =
+ fir::ConvertOp::create(*builder, loc, builder->getIndexType(), rhs);
+ mlir::Value sameAddr = mlir::arith::CmpIOp::create(
+ *builder, loc, mlir::arith::CmpIPredicate::eq, lhsAddr, rhsAddr);
+ builder->genIfThenElse(loc, sameAddr)
+ .genThen([&]() {
+ // Master thread: lhs and rhs are the same, nothing to do.
+ })
+ .genElse([&]() {
+ hlfir::Entity r = hlfir::loadTrivialScalar(loc, *builder, rhs);
+ hlfir::AssignOp::create(*builder, loc, r, lhs,
+ /*realloc=*/false,
+ /*keep_lhs_length_if_realloc=*/false,
+ /*temporary_lhs=*/true);
+ })
+ .end();
} else {
copyData(lhs, rhs);
}
diff --git a/flang/test/Lower/OpenMP/copyin-derived-allocatable-comp.f90 b/flang/test/Lower/OpenMP/copyin-derived-allocatable-comp.f90
new file mode 100644
index 0000000000000..c3ca8cff569da
--- /dev/null
+++ b/flang/test/Lower/OpenMP/copyin-derived-allocatable-comp.f90
@@ -0,0 +1,88 @@
+! Test lowering of COPYIN clause for derived types with allocatable components.
+! Threadprivate copies may have uninitialized allocatable component descriptors
+! (zero-filled by the OpenMP runtime). For non-master threads, use
+! temporary_lhs semantics so that AssignTemporary initializes descriptors
+! before performing the deep copy. The master thread is skipped via a runtime
+! address comparison (lhs == rhs means same threadprivate storage).
+
+! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
+
+! CHECK-LABEL: func.func @_QPcopyin_derived_alloc_comp
+subroutine copyin_derived_alloc_comp()
+ type inner
+ integer, allocatable :: a(:)
+ end type inner
+ type outer
+ type(inner) :: s
+ end type outer
+ type(outer), save :: x(2)
+ !$omp threadprivate(x)
+
+ allocate(x(1)%s%a(3))
+ x(1)%s%a = 42
+
+! CHECK: omp.parallel {
+! CHECK: omp.threadprivate
+! CHECK: hlfir.declare
+! CHECK: fir.convert {{.*}} -> index
+! CHECK: fir.convert {{.*}} -> index
+! CHECK: arith.cmpi eq, {{.*}} : index
+! CHECK: fir.if
+! CHECK: } else {
+! CHECK: hlfir.assign {{.*}} temporary_lhs
+! CHECK: }
+! CHECK: omp.barrier
+ !$omp parallel copyin(x)
+ call sub(x(1)%s%a)
+ !$omp end parallel
+ deallocate(x(1)%s%a)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPcopyin_scalar_derived_alloc_comp
+subroutine copyin_scalar_derived_alloc_comp()
+ type dt
+ integer, allocatable :: a(:)
+ end type dt
+ type(dt), save :: y
+ !$omp threadprivate(y)
+ allocate(y%a(5))
+ y%a = 10
+
+! CHECK: omp.parallel {
+! CHECK: omp.threadprivate
+! CHECK: hlfir.declare
+! CHECK: fir.convert {{.*}} -> index
+! CHECK: fir.convert {{.*}} -> index
+! CHECK: arith.cmpi eq, {{.*}} : index
+! CHECK: fir.if
+! CHECK: } else {
+! CHECK: hlfir.assign {{.*}} temporary_lhs
+! CHECK: }
+! CHECK: omp.barrier
+ !$omp parallel copyin(y)
+ call sub(y%a)
+ !$omp end parallel
+ deallocate(y%a)
+end subroutine
+
+! Derived type WITHOUT allocatable components: plain assign, no address check.
+! CHECK-LABEL: func.func @_QPcopyin_no_alloc_comp
+subroutine copyin_no_alloc_comp()
+ type simple
+ integer :: val
+ end type simple
+ type(simple), save :: z
+ !$omp threadprivate(z)
+! CHECK: omp.parallel {
+! CHECK: omp.threadprivate
+! CHECK: hlfir.declare
+! CHECK-NOT: arith.cmpi
+! CHECK: hlfir.assign
+! CHECK-NOT: temporary_lhs
+! CHECK: omp.barrier
+ !$omp parallel copyin(z)
+ call sub2(z%val)
+ !$omp end parallel
+end subroutine
+
More information about the flang-commits
mailing list