[flang-commits] [flang] [mlir] [OpenMP][LLVM] Clone `omp.private` op in the parent module (PR #96024)

Kareem Ergawy via flang-commits flang-commits at lists.llvm.org
Thu Jun 20 01:30:39 PDT 2024


https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/96024

>From 254a34472fc4bc69577e542ae05f5417c99845ee Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Wed, 19 Jun 2024 00:15:42 -0500
Subject: [PATCH 1/4] [OpenMP][LLVM] Clone `omp.private` op in the parent
 module

Fixes a crash uncovered by test 0007_0019.f90 in the Fujitsu test suite.

Previously, in the `PrivCB`, we cloned the `omp.private` op without
inserting it in the parent module of the original op. This causes issues
whenever there is an op that needs to lookup the parent module (e.g. for
symbol lookup). This PR fixes the issue by cloning in the parent module
instead of creating an orphaned op.
---
 ...rivatization-lower-allocatable-to-llvm.f90 | 23 -----------
 .../delayed-privatization-lower-to-llvm.f90   | 40 +++++++++++++++++++
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |  6 ++-
 3 files changed, 45 insertions(+), 24 deletions(-)
 delete mode 100644 flang/test/Lower/OpenMP/delayed-privatization-lower-allocatable-to-llvm.f90
 create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-lower-to-llvm.f90

diff --git a/flang/test/Lower/OpenMP/delayed-privatization-lower-allocatable-to-llvm.f90 b/flang/test/Lower/OpenMP/delayed-privatization-lower-allocatable-to-llvm.f90
deleted file mode 100644
index ac9a6d8746cf2..0000000000000
--- a/flang/test/Lower/OpenMP/delayed-privatization-lower-allocatable-to-llvm.f90
+++ /dev/null
@@ -1,23 +0,0 @@
-! Tests the OMPIRBuilder can handle multiple privatization regions that contain
-! multiple BBs (for example, for allocatables).
-
-! RUN: %flang -S -emit-llvm -fopenmp -mmlir --openmp-enable-delayed-privatization \
-! RUN:   -o - %s 2>&1 | FileCheck %s
-
-subroutine foo(x)
-  integer, allocatable :: x, y
-!$omp parallel private(x, y)
-  x = y
-!$omp end parallel
-end
-
-! CHECK-LABEL: define void @foo_
-! CHECK:         ret void
-! CHECK-NEXT:  }
-
-! CHECK-LABEL: define internal void @foo_..omp_par
-! CHECK-DAG:     call ptr @malloc
-! CHECK-DAG:     call ptr @malloc
-! CHECK-DAG:     call void @free
-! CHECK-DAG:     call void @free
-! CHECK:       }
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-lower-to-llvm.f90 b/flang/test/Lower/OpenMP/delayed-privatization-lower-to-llvm.f90
new file mode 100644
index 0000000000000..a60b843f10e28
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-lower-to-llvm.f90
@@ -0,0 +1,40 @@
+! Tests the OMPIRBuilder can handle multiple privatization regions that contain
+! multiple BBs (for example, for allocatables).
+
+! RUN: %flang -S -emit-llvm -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN:   -o - %s 2>&1 | FileCheck %s
+
+! CHECK: @[[SOURCE:.*]] = linkonce constant [{{.*}} x i8] c"{{.*}}delayed-privatization-lower-to-llvm.f90\00", comdat
+
+subroutine lower_allocatable(x)
+  integer, allocatable :: x, y
+!$omp parallel private(x, y)
+  x = y
+!$omp end parallel
+end
+
+! CHECK-LABEL: define void @lower_allocatable_
+! CHECK:         ret void
+! CHECK-NEXT:  }
+
+! CHECK-LABEL: define internal void @lower_allocatable_..omp_par
+! CHECK-DAG:     call ptr @malloc
+! CHECK-DAG:     call ptr @malloc
+! CHECK-DAG:     call void @free
+! CHECK-DAG:     call void @free
+! CHECK:       }
+
+subroutine lower_region_with_if_print
+  real(kind=8), dimension(1,1) :: u1
+  !$omp parallel firstprivate(u1) 
+    if (any(u1/=1)) print *,"if branch"
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: define void @lower_region_with_if_print_
+! CHECK:         ret void
+! CHECK-NEXT:  }
+
+! CHECK-LABEL: define internal void @lower_region_with_if_print_..omp_par
+! CHECK:         call i1 @_FortranAAny(ptr %{{[^[:space:]]+}}, ptr @[[SOURCE]], i32 {{.*}}, i32 {{.*}})
+! CHECK:       }
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index cbfc64972f38b..abfe7a2210eb4 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1288,7 +1288,11 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
           // region. The privatizer is processed in-place (see below) before it
           // gets inlined in the parallel region and therefore processing the
           // original op is dangerous.
-          return {privVar, privatizer.clone()};
+
+          mlir::IRRewriter opCloner(&moduleTranslation.getContext());
+          opCloner.setInsertionPoint(privatizer);
+          return {privVar, llvm::cast<mlir::omp::PrivateClauseOp>(
+                               opCloner.clone(*privatizer))};
         }
       }
 

>From 7ca9599fad743dbb8a66f223173a576434581272 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Thu, 20 Jun 2024 01:50:52 -0500
Subject: [PATCH 2/4] more testing

---
 .../delayed-privatization-lower-to-llvm.f90   |  2 +-
 mlir/test/Target/LLVMIR/openmp-private.mlir   | 50 +++++++++++++++++++
 2 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/flang/test/Lower/OpenMP/delayed-privatization-lower-to-llvm.f90 b/flang/test/Lower/OpenMP/delayed-privatization-lower-to-llvm.f90
index a60b843f10e28..74d06d2cef0d9 100644
--- a/flang/test/Lower/OpenMP/delayed-privatization-lower-to-llvm.f90
+++ b/flang/test/Lower/OpenMP/delayed-privatization-lower-to-llvm.f90
@@ -27,7 +27,7 @@ subroutine lower_allocatable(x)
 subroutine lower_region_with_if_print
   real(kind=8), dimension(1,1) :: u1
   !$omp parallel firstprivate(u1) 
-    if (any(u1/=1)) print *,"if branch"
+    if (any(u1/=1)) u1 = u1 + 1
   !$omp end parallel
 end subroutine
 
diff --git a/mlir/test/Target/LLVMIR/openmp-private.mlir b/mlir/test/Target/LLVMIR/openmp-private.mlir
index 58bda87c3b7be..851968590294a 100644
--- a/mlir/test/Target/LLVMIR/openmp-private.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-private.mlir
@@ -140,3 +140,53 @@ omp.private {type = private} @multi_block.privatizer : !llvm.ptr alloc {
   llvm.store %1, %arg2 : f32, !llvm.ptr
   omp.yield(%arg2 : !llvm.ptr)
 }
+
+// Tests fix for Fujitsu test suite test: 0007_0019.f90: the
+// `llvm.mlir.addressof` op needs access to the parent module when lowering
+// from the LLVM dialect to LLVM IR. If such op is used inside an `omp.private`
+// op instance that was not created/cloned inside the module, we would get a
+// seg fault due to trying to access a null pointer.
+
+// CHECK-LABEL: define internal void @lower_region_with_addressof..omp_par
+// CHECK:         omp.par.region:
+// CHECK:           br label %[[PAR_REG_BEG:.*]]
+// CHECK:         [[PAR_REG_BEG]]:
+// CHECK:           %[[PRIVATIZER_GEP:.*]] = getelementptr double, ptr @_QQfoo, i64 111
+// CHECK:           call void @bar(ptr %[[PRIVATIZER_GEP]])
+// CHECK:           call void @bar(ptr getelementptr (double, ptr @_QQfoo, i64 222))
+llvm.func @lower_region_with_addressof() {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x !llvm.array<1 x array<1 x f64>> {bindc_name = "u1"} : (i64) -> !llvm.ptr
+  %2 = llvm.mlir.constant(0 : index) : i64
+  %3 = llvm.mlir.constant(4 : i32) : i32
+  %4 = llvm.mlir.constant(1.000000e+00 : f64) : f64
+  %5 = llvm.mlir.constant(1 : index) : i64
+  omp.parallel private(@_QFlower_region_with_addressof_privatizer %1 -> %arg0 : !llvm.ptr) {
+    %c0 = llvm.mlir.constant(111 : i64) : i64
+    %7 = llvm.getelementptr %arg0[%c0] : (!llvm.ptr, i64) -> !llvm.ptr, f64
+    llvm.call @bar(%7) : (!llvm.ptr) -> ()
+
+    %c1 = llvm.mlir.constant(222 : i64) : i64
+    %8 = llvm.mlir.addressof @_QQfoo: !llvm.ptr
+    %9 = llvm.getelementptr %8[%c1] : (!llvm.ptr, i64) -> !llvm.ptr, f64
+    llvm.call @bar(%9) : (!llvm.ptr) -> ()
+    omp.terminator
+  }
+
+  llvm.return
+}
+
+omp.private {type = private} @_QFlower_region_with_addressof_privatizer : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+  %0 = llvm.mlir.constant(1 : i64) : i64
+    %1 = llvm.alloca %0 x !llvm.array<1 x array<1 x f64>> {bindc_name = "u1", pinned} : (i64) -> !llvm.ptr
+  %2 = llvm.mlir.addressof @_QQfoo: !llvm.ptr
+  omp.yield(%2 : !llvm.ptr)
+}
+
+llvm.mlir.global linkonce constant @_QQfoo() {addr_space = 0 : i32} : !llvm.array<3 x i8> {
+  %0 = llvm.mlir.constant("foo") : !llvm.array<3 x i8>
+  llvm.return %0 : !llvm.array<3 x i8>
+}
+
+llvm.func @bar(!llvm.ptr)

>From 88680a312d3fe96960bb8dd7a3000459a381cb1d Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Thu, 20 Jun 2024 02:40:28 -0500
Subject: [PATCH 3/4] simplify test

---
 mlir/test/Target/LLVMIR/openmp-private.mlir | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlir/test/Target/LLVMIR/openmp-private.mlir b/mlir/test/Target/LLVMIR/openmp-private.mlir
index 851968590294a..afef15ca80855 100644
--- a/mlir/test/Target/LLVMIR/openmp-private.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-private.mlir
@@ -156,7 +156,7 @@ omp.private {type = private} @multi_block.privatizer : !llvm.ptr alloc {
 // CHECK:           call void @bar(ptr getelementptr (double, ptr @_QQfoo, i64 222))
 llvm.func @lower_region_with_addressof() {
   %0 = llvm.mlir.constant(1 : i64) : i64
-  %1 = llvm.alloca %0 x !llvm.array<1 x array<1 x f64>> {bindc_name = "u1"} : (i64) -> !llvm.ptr
+  %1 = llvm.alloca %0 x f64 {bindc_name = "u1"} : (i64) -> !llvm.ptr
   %2 = llvm.mlir.constant(0 : index) : i64
   %3 = llvm.mlir.constant(4 : i32) : i32
   %4 = llvm.mlir.constant(1.000000e+00 : f64) : f64
@@ -179,7 +179,7 @@ llvm.func @lower_region_with_addressof() {
 omp.private {type = private} @_QFlower_region_with_addressof_privatizer : !llvm.ptr alloc {
 ^bb0(%arg0: !llvm.ptr):
   %0 = llvm.mlir.constant(1 : i64) : i64
-    %1 = llvm.alloca %0 x !llvm.array<1 x array<1 x f64>> {bindc_name = "u1", pinned} : (i64) -> !llvm.ptr
+    %1 = llvm.alloca %0 x f64 {bindc_name = "u1", pinned} : (i64) -> !llvm.ptr
   %2 = llvm.mlir.addressof @_QQfoo: !llvm.ptr
   omp.yield(%2 : !llvm.ptr)
 }

>From bf02b8e6291aa30179a7c13b20de8be7754eae13 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Thu, 20 Jun 2024 03:30:27 -0500
Subject: [PATCH 4/4] unique clone name

---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 21 ++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index abfe7a2210eb4..4d632f5ffd1cf 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1289,10 +1289,25 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
           // gets inlined in the parallel region and therefore processing the
           // original op is dangerous.
 
-          mlir::IRRewriter opCloner(&moduleTranslation.getContext());
+          MLIRContext &context = moduleTranslation.getContext();
+          mlir::IRRewriter opCloner(&context);
           opCloner.setInsertionPoint(privatizer);
-          return {privVar, llvm::cast<mlir::omp::PrivateClauseOp>(
-                               opCloner.clone(*privatizer))};
+          auto clone = llvm::cast<mlir::omp::PrivateClauseOp>(
+              opCloner.clone(*privatizer));
+
+          // Unique the clone name to avoid clashes in the symbol table.
+          unsigned counter = 0;
+          SmallString<256> cloneName = SymbolTable::generateSymbolName<256>(
+              privatizer.getSymName(),
+              [&](llvm::StringRef candidate) {
+                return SymbolTable::lookupNearestSymbolFrom(
+                           opInst, StringAttr::get(&context, candidate)) !=
+                       nullptr;
+              },
+              counter);
+
+          clone.setSymName(cloneName);
+          return {privVar, clone};
         }
       }
 



More information about the flang-commits mailing list