[llvm-branch-commits] [flang] [flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)
Ivan R. Ivanov via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Nov 18 20:02:56 PST 2024
https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/104748
>From a0f2307ce374355449877178cea70049b9e861d5 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov <ivanov.i.aa at m.titech.ac.jp>
Date: Sun, 4 Aug 2024 17:33:52 +0900
Subject: [PATCH] Add workshare loop wrapper lowerings
Bufferize test
Bufferize test
Bufferize test
Add test for should use workshare lowering
Add integration test for workshare
One more integration test
Add test for cfg workshare bufferization
Fix tests
Test coverage for all changes
Integration tests
bufferize fix
---
.../HLFIR/Transforms/BufferizeHLFIR.cpp | 4 +-
.../Transforms/OptimizedBufferization.cpp | 10 +-
flang/test/HLFIR/bufferize-workshare.fir | 57 ++++++
.../OpenMP/workshare-array-array-assign.f90 | 34 ++++
.../Integration/OpenMP/workshare-axpy.f90 | 57 ++++++
.../OpenMP/workshare-scalar-array-assign.f90 | 45 +++++
.../OpenMP/workshare-scalar-array-mul.f90 | 65 +++++++
.../OpenMP/should-use-workshare-lowering.mlir | 162 ++++++++++++++++++
8 files changed, 430 insertions(+), 4 deletions(-)
create mode 100644 flang/test/HLFIR/bufferize-workshare.fir
create mode 100644 flang/test/Integration/OpenMP/workshare-array-array-assign.f90
create mode 100644 flang/test/Integration/OpenMP/workshare-axpy.f90
create mode 100644 flang/test/Integration/OpenMP/workshare-scalar-array-assign.f90
create mode 100644 flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90
create mode 100644 flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
index 07794828fce267..1848dbe2c7a2c2 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
@@ -26,6 +26,7 @@
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/IR/Dominance.h"
#include "mlir/IR/PatternMatch.h"
@@ -792,7 +793,8 @@ struct ElementalOpConversion
// Generate a loop nest looping around the fir.elemental shape and clone
// fir.elemental region inside the inner loop.
hlfir::LoopNest loopNest =
- hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
+ hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
+ flangomp::shouldUseWorkshareLowering(elemental));
auto insPt = builder.saveInsertionPoint();
builder.setInsertionPointToStart(loopNest.body);
auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index 166649d955dabd..a0160b233e3cd1 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -20,6 +20,7 @@
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
#include "flang/Optimizer/Transforms/Utils.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/IR/Dominance.h"
@@ -482,7 +483,8 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite(
// Generate a loop nest looping around the hlfir.elemental shape and clone
// hlfir.elemental region inside the inner loop
hlfir::LoopNest loopNest =
- hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
+ hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
+ flangomp::shouldUseWorkshareLowering(elemental));
builder.setInsertionPointToStart(loopNest.body);
auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
loopNest.oneBasedIndices);
@@ -553,7 +555,8 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite(
llvm::SmallVector<mlir::Value> extents =
hlfir::getIndexExtents(loc, builder, shape);
hlfir::LoopNest loopNest =
- hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
+ hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
+ flangomp::shouldUseWorkshareLowering(assign));
builder.setInsertionPointToStart(loopNest.body);
auto arrayElement =
hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
@@ -651,7 +654,8 @@ llvm::LogicalResult VariableAssignBufferization::matchAndRewrite(
llvm::SmallVector<mlir::Value> extents =
hlfir::getIndexExtents(loc, builder, shape);
hlfir::LoopNest loopNest =
- hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
+ hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
+ flangomp::shouldUseWorkshareLowering(assign));
builder.setInsertionPointToStart(loopNest.body);
auto rhsArrayElement =
hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);
diff --git a/flang/test/HLFIR/bufferize-workshare.fir b/flang/test/HLFIR/bufferize-workshare.fir
new file mode 100644
index 00000000000000..af5abb381937ec
--- /dev/null
+++ b/flang/test/HLFIR/bufferize-workshare.fir
@@ -0,0 +1,57 @@
+// RUN: fir-opt --bufferize-hlfir %s | FileCheck %s
+
+// CHECK-LABEL: func.func @simple(
+// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<42xi32>>) {
+// CHECK: omp.parallel {
+// CHECK: omp.workshare {
+// CHECK: %[[VAL_1:.*]] = arith.constant 42 : index
+// CHECK: %[[VAL_2:.*]] = arith.constant 1 : i32
+// CHECK: %[[VAL_3:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_3]]) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+// CHECK: %[[VAL_5:.*]] = fir.allocmem !fir.array<42xi32> {bindc_name = ".tmp.array", uniq_name = ""}
+// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]](%[[VAL_3]]) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>)
+// CHECK: %[[VAL_7:.*]] = arith.constant true
+// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index
+// CHECK: omp.workshare.loop_wrapper {
+// CHECK: omp.loop_nest (%[[VAL_9:.*]]) : index = (%[[VAL_8]]) to (%[[VAL_1]]) inclusive step (%[[VAL_8]]) {
+// CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_9]]) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
+// CHECK: %[[VAL_12:.*]] = arith.subi %[[VAL_11]], %[[VAL_2]] : i32
+// CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_9]]) : (!fir.heap<!fir.array<42xi32>>, index) -> !fir.ref<i32>
+// CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_13]] temporary_lhs : i32, !fir.ref<i32>
+// CHECK: omp.yield
+// CHECK: }
+// CHECK: }
+// CHECK: %[[VAL_14:.*]] = fir.undefined tuple<!fir.heap<!fir.array<42xi32>>, i1>
+// CHECK: %[[VAL_15:.*]] = fir.insert_value %[[VAL_14]], %[[VAL_7]], [1 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, i1) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
+// CHECK: %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_6]]#0, [0 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, !fir.heap<!fir.array<42xi32>>) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
+// CHECK: hlfir.assign %[[VAL_6]]#0 to %[[VAL_4]]#0 : !fir.heap<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>
+// CHECK: fir.freemem %[[VAL_6]]#0 : !fir.heap<!fir.array<42xi32>>
+// CHECK: omp.terminator
+// CHECK: }
+// CHECK: omp.terminator
+// CHECK: }
+// CHECK: return
+// CHECK: }
+func.func @simple(%arg: !fir.ref<!fir.array<42xi32>>) {
+ omp.parallel {
+ omp.workshare {
+ %c42 = arith.constant 42 : index
+ %c1_i32 = arith.constant 1 : i32
+ %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+ %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+ ^bb0(%i: index):
+ %ref = hlfir.designate %array#0 (%i) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
+ %val = fir.load %ref : !fir.ref<i32>
+ %sub = arith.subi %val, %c1_i32 : i32
+ hlfir.yield_element %sub : i32
+ }
+ hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+ hlfir.destroy %elemental : !hlfir.expr<42xi32>
+ omp.terminator
+ }
+ omp.terminator
+ }
+ return
+}
diff --git a/flang/test/Integration/OpenMP/workshare-array-array-assign.f90 b/flang/test/Integration/OpenMP/workshare-array-array-assign.f90
new file mode 100644
index 00000000000000..e9ec5d9175beb5
--- /dev/null
+++ b/flang/test/Integration/OpenMP/workshare-array-array-assign.f90
@@ -0,0 +1,34 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR
+!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR
+
+subroutine sb1(x, y)
+ integer :: x(:)
+ integer :: y(:)
+ !$omp parallel workshare
+ x = y
+ !$omp end parallel workshare
+end subroutine
+
+! HLFIR: omp.parallel {
+! HLFIR: omp.workshare {
+! HLFIR: hlfir.assign
+! HLFIR: omp.terminator
+! HLFIR: }
+! HLFIR: omp.terminator
+! HLFIR: }
+
+! FIR: omp.parallel {
+! FIR: omp.wsloop nowait {
+! FIR: omp.loop_nest
+! FIR: }
+! FIR: omp.barrier
+! FIR: omp.terminator
+! FIR: }
diff --git a/flang/test/Integration/OpenMP/workshare-axpy.f90 b/flang/test/Integration/OpenMP/workshare-axpy.f90
new file mode 100644
index 00000000000000..0c4524f8552906
--- /dev/null
+++ b/flang/test/Integration/OpenMP/workshare-axpy.f90
@@ -0,0 +1,57 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR
+!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR
+
+subroutine sb1(a, x, y, z)
+ integer :: a
+ integer :: x(:)
+ integer :: y(:)
+ integer :: z(:)
+ !$omp parallel workshare
+ z = a * x + y
+ !$omp end parallel workshare
+end subroutine
+
+! HLFIR: func.func @_QPsb1
+! HLFIR: omp.parallel {
+! HLFIR: omp.workshare {
+! HLFIR: hlfir.elemental {{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
+! HLFIR: hlfir.elemental {{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
+! HLFIR: hlfir.assign
+! HLFIR: hlfir.destroy
+! HLFIR: hlfir.destroy
+! HLFIR-NOT: omp.barrier
+! HLFIR: omp.terminator
+! HLFIR: }
+! HLFIR-NOT: omp.barrier
+! HLFIR: omp.terminator
+! HLFIR: }
+! HLFIR: return
+! HLFIR: }
+! HLFIR:}
+
+
+! FIR: func.func private @_workshare_copy_heap_Uxi32(%{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>>, %{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>>
+! FIR: func.func private @_workshare_copy_i32(%{{[a-z0-9]+}}: !fir.ref<i32>, %{{[a-z0-9]+}}: !fir.ref<i32>
+
+! FIR: func.func @_QPsb1
+! FIR: omp.parallel {
+! FIR: omp.single copyprivate(%9 -> @_workshare_copy_i32 : !fir.ref<i32>, %10 -> @_workshare_copy_heap_Uxi32 : !fir.ref<!fir.heap<!fir.array<?xi32>>>) {
+! FIR: fir.allocmem
+! FIR: omp.wsloop {
+! FIR: omp.loop_nest
+! FIR: omp.single nowait {
+! FIR: fir.call @_FortranAAssign
+! FIR: fir.freemem
+! FIR: omp.terminator
+! FIR: }
+! FIR: omp.barrier
+! FIR: omp.terminator
+! FIR: }
diff --git a/flang/test/Integration/OpenMP/workshare-scalar-array-assign.f90 b/flang/test/Integration/OpenMP/workshare-scalar-array-assign.f90
new file mode 100644
index 00000000000000..6c180cd639997c
--- /dev/null
+++ b/flang/test/Integration/OpenMP/workshare-scalar-array-assign.f90
@@ -0,0 +1,45 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR
+!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR
+
+subroutine sb1(a, x)
+ integer :: a
+ integer :: x(:)
+ !$omp parallel workshare
+ x = a
+ !$omp end parallel workshare
+end subroutine
+
+! HLFIR: omp.parallel {
+! HLFIR: omp.workshare {
+! HLFIR: %[[SCALAR:.*]] = fir.load %1#0 : !fir.ref<i32>
+! HLFIR: hlfir.assign %[[SCALAR]] to
+! HLFIR: omp.terminator
+! HLFIR: }
+! HLFIR: omp.terminator
+! HLFIR: }
+
+! FIR: omp.parallel {
+! FIR: %[[SCALAR_ALLOCA:.*]] = fir.alloca i32
+! FIR: omp.single copyprivate(%[[SCALAR_ALLOCA]] -> @_workshare_copy_i32 : !fir.ref<i32>) {
+! FIR: %[[SCALAR_LOAD:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! FIR: fir.store %[[SCALAR_LOAD]] to %[[SCALAR_ALLOCA]] : !fir.ref<i32>
+! FIR: omp.terminator
+! FIR: }
+! FIR: %[[SCALAR_RELOAD:.*]] = fir.load %[[SCALAR_ALLOCA]] : !fir.ref<i32>
+! FIR: %6:3 = fir.box_dims %3, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+! FIR: omp.wsloop nowait {
+! FIR: omp.loop_nest (%arg2) : index = (%c1) to (%6#1) inclusive step (%c1) {
+! FIR: fir.store %[[SCALAR_RELOAD]]
+! FIR: omp.yield
+! FIR: }
+! FIR: }
+! FIR: omp.barrier
+! FIR: omp.terminator
diff --git a/flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90 b/flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90
new file mode 100644
index 00000000000000..9b8ef66b48f47d
--- /dev/null
+++ b/flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90
@@ -0,0 +1,65 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR-O3
+!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR-O3
+
+!RUN: %flang_fc1 -emit-hlfir -fopenmp -O0 %s -o - | FileCheck %s --check-prefix HLFIR-O0
+!RUN: %flang_fc1 -emit-fir -fopenmp -O0 %s -o - | FileCheck %s --check-prefix FIR-O0
+
+program test
+ real :: arr_01(10)
+ !$omp parallel workshare
+ arr_01 = arr_01*2
+ !$omp end parallel workshare
+end program
+
+! HLFIR-O3: omp.parallel {
+! HLFIR-O3: omp.workshare {
+! HLFIR-O3: hlfir.elemental
+! HLFIR-O3: hlfir.assign
+! HLFIR-O3: hlfir.destroy
+! HLFIR-O3: omp.terminator
+! HLFIR-O3: omp.terminator
+
+! FIR-O3: omp.parallel {
+! FIR-O3: omp.wsloop nowait {
+! FIR-O3: omp.loop_nest
+! FIR-O3: omp.barrier
+! FIR-O3: omp.terminator
+
+! HLFIR-O0: omp.parallel {
+! HLFIR-O0: omp.workshare {
+! HLFIR-O0: hlfir.elemental
+! HLFIR-O0: hlfir.assign
+! HLFIR-O0: hlfir.destroy
+! HLFIR-O0: omp.terminator
+! HLFIR-O0: omp.terminator
+
+! Check the copyprivate copy function
+! FIR-O0: func.func private @_workshare_copy_heap_{{.*}}(%[[DST:.*]]: {{.*}}, %[[SRC:.*]]: {{.*}})
+! FIR-O0: fir.load %[[SRC]]
+! FIR-O0: fir.store {{.*}} to %[[DST]]
+
+! Check that we properly handle the temporary array
+! FIR-O0: omp.parallel {
+! FIR-O0: %[[CP:.*]] = fir.alloca !fir.heap<!fir.array<10xf32>>
+! FIR-O0: omp.single copyprivate(%[[CP]] -> @_workshare_copy_heap_
+! FIR-O0: fir.allocmem
+! FIR-O0: fir.store
+! FIR-O0: omp.terminator
+! FIR-O0: fir.load %[[CP]]
+! FIR-O0: omp.wsloop {
+! FIR-O0: omp.loop_nest
+! FIR-O0: omp.yield
+! FIR-O0: omp.single nowait {
+! FIR-O0: fir.call @_FortranAAssign
+! FIR-O0: fir.freemem
+! FIR-O0: omp.terminator
+! FIR-O0: omp.barrier
+! FIR-O0: omp.terminator
diff --git a/flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir b/flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir
new file mode 100644
index 00000000000000..91b08123cce422
--- /dev/null
+++ b/flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir
@@ -0,0 +1,162 @@
+// RUN: fir-opt --bufferize-hlfir %s | FileCheck %s
+
+// Checks that we correctly identify when to use the lowering to
+// omp.workshare.loop_wrapper
+
+// CHECK-LABEL: @should_parallelize_0
+// CHECK: omp.workshare.loop_wrapper
+func.func @should_parallelize_0(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
+ omp.workshare {
+ %c42 = arith.constant 42 : index
+ %c1_i32 = arith.constant 1 : i32
+ %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+ %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+ ^bb0(%i: index):
+ hlfir.yield_element %c1_i32 : i32
+ }
+ hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+ hlfir.destroy %elemental : !hlfir.expr<42xi32>
+ omp.terminator
+ }
+ return
+}
+
+// CHECK-LABEL: @should_parallelize_1
+// CHECK: omp.workshare.loop_wrapper
+func.func @should_parallelize_1(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
+ omp.parallel {
+ omp.workshare {
+ %c42 = arith.constant 42 : index
+ %c1_i32 = arith.constant 1 : i32
+ %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+ %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+ ^bb0(%i: index):
+ hlfir.yield_element %c1_i32 : i32
+ }
+ hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+ hlfir.destroy %elemental : !hlfir.expr<42xi32>
+ omp.terminator
+ }
+ omp.terminator
+ }
+ return
+}
+
+
+// CHECK-LABEL: @should_not_parallelize_0
+// CHECK-NOT: omp.workshare.loop_wrapper
+func.func @should_not_parallelize_0(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
+ omp.workshare {
+ omp.single {
+ %c42 = arith.constant 42 : index
+ %c1_i32 = arith.constant 1 : i32
+ %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+ %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+ ^bb0(%i: index):
+ hlfir.yield_element %c1_i32 : i32
+ }
+ hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+ hlfir.destroy %elemental : !hlfir.expr<42xi32>
+ omp.terminator
+ }
+ omp.terminator
+ }
+ return
+}
+
+// CHECK-LABEL: @should_not_parallelize_1
+// CHECK-NOT: omp.workshare.loop_wrapper
+func.func @should_not_parallelize_1(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
+ omp.workshare {
+ omp.critical {
+ %c42 = arith.constant 42 : index
+ %c1_i32 = arith.constant 1 : i32
+ %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+ %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+ ^bb0(%i: index):
+ hlfir.yield_element %c1_i32 : i32
+ }
+ hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+ hlfir.destroy %elemental : !hlfir.expr<42xi32>
+ omp.terminator
+ }
+ omp.terminator
+ }
+ return
+}
+
+// CHECK-LABEL: @should_not_parallelize_2
+// CHECK-NOT: omp.workshare.loop_wrapper
+func.func @should_not_parallelize_2(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
+ omp.workshare {
+ omp.parallel {
+ %c42 = arith.constant 42 : index
+ %c1_i32 = arith.constant 1 : i32
+ %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+ %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+ ^bb0(%i: index):
+ hlfir.yield_element %c1_i32 : i32
+ }
+ hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+ hlfir.destroy %elemental : !hlfir.expr<42xi32>
+ omp.terminator
+ }
+ omp.terminator
+ }
+ return
+}
+
+// CHECK-LABEL: @should_not_parallelize_3
+// CHECK-NOT: omp.workshare.loop_wrapper
+func.func @should_not_parallelize_3(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
+ omp.workshare {
+ omp.parallel {
+ omp.workshare {
+ omp.parallel {
+ %c42 = arith.constant 42 : index
+ %c1_i32 = arith.constant 1 : i32
+ %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+ %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+ ^bb0(%i: index):
+ hlfir.yield_element %c1_i32 : i32
+ }
+ hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+ hlfir.destroy %elemental : !hlfir.expr<42xi32>
+ omp.terminator
+ }
+ omp.terminator
+ }
+ omp.terminator
+ }
+ omp.terminator
+ }
+ return
+}
+
+// CHECK-LABEL: @should_not_parallelize_4
+// CHECK-NOT: omp.workshare.loop_wrapper
+func.func @should_not_parallelize_4(%arg: !fir.ref<!fir.array<42xi32>>, %idx : index) {
+ omp.workshare {
+ ^bb1:
+ %c42 = arith.constant 42 : index
+ %c1_i32 = arith.constant 1 : i32
+ %shape = fir.shape %c42 : (index) -> !fir.shape<1>
+ %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
+ %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
+ ^bb0(%i: index):
+ hlfir.yield_element %c1_i32 : i32
+ }
+ hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
+ hlfir.destroy %elemental : !hlfir.expr<42xi32>
+ cf.br ^bb2
+ ^bb2:
+ omp.terminator
+ }
+ return
+}
More information about the llvm-branch-commits
mailing list