[flang-commits] [flang] [mlir] [flang][OpenMP] Uncotionally create `after_alloca` block in `allocate… (PR #123168)
Kareem Ergawy via flang-commits
flang-commits at lists.llvm.org
Thu Jan 16 00:32:34 PST 2025
https://github.com/ergawy created https://github.com/llvm/llvm-project/pull/123168
…PrivateVars`
While https://github.com/llvm/llvm-project/pull/122866 fixed some issues, it introduced a regression in worksharing loops. The new bug comes from the fact that we now conditionally created the `after_alloca` block based on the number of predecessors of the alloca insertion point. This is unneccessary, we can just alway create the block. If we do this, we respect the post condtions expected after calling `allocatePrivateVars` (i.e. that the `afterAlloca` block has a single predecessor.
>From d207e8f3f878a595520d49deaec83495a8aec8cf Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Thu, 16 Jan 2025 02:27:16 -0600
Subject: [PATCH] [flang][OpenMP] Uncotionally create `after_alloca` block in
`allocatePrivateVars`
While https://github.com/llvm/llvm-project/pull/122866 fixed some
issues, it introduced a regression in worksharing loops. The new bug
comes from the fact that we now conditionally created the `after_alloca`
block based on the number of predecessors of the alloca insertion point.
This is unneccessary, we can just alway create the block. If we do this,
we respect the post condtions expected after calling
`allocatePrivateVars` (i.e. that the `afterAlloca` block has a single
predecessor.
---
.../parallel-private-reduction-worstcase.f90 | 5 +-
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 8 ++-
mlir/test/Target/LLVMIR/openmp-llvm.mlir | 4 +-
.../openmp-parallel-reduction-multiblock.mlir | 4 +-
.../openmp-reduction-array-sections.mlir | 7 ++-
.../LLVMIR/openmp-reduction-init-arg.mlir | 6 ++-
.../LLVMIR/openmp-reduction-sections.mlir | 8 ++-
.../Target/LLVMIR/openmp-simd-private.mlir | 3 ++
.../openmp-target-use-device-nested.mlir | 3 ++
.../openmp-wsloop-test-block-structure.mlir | 54 +++++++++++++++++++
10 files changed, 89 insertions(+), 13 deletions(-)
create mode 100644 mlir/test/Target/LLVMIR/openmp-wsloop-test-block-structure.mlir
diff --git a/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90 b/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90
index fe3a326702e52a..4fa1c34e4646bb 100644
--- a/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90
+++ b/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90
@@ -96,9 +96,12 @@ subroutine worst_case(a, b, c, d)
! CHECK: omp.region.cont13: ; preds = %omp.private.copy16
! CHECK-NEXT: %{{.*}} = phi ptr
+! CHECK-NEXT: br label %omp.region.after_alloca
+
+! CHECK: omp.region.after_alloca:
! CHECK-NEXT: br label %omp.par.region
-! CHECK: omp.par.region: ; preds = %omp.region.cont13
+! CHECK: omp.par.region: ; preds = %omp.region.after_alloca
! CHECK-NEXT: br label %omp.reduction.init
! CHECK: omp.reduction.init: ; preds = %omp.par.region
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index fdc9cee5b5dca7..5d6223960d4286 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1350,11 +1350,9 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
// Allocate private vars
llvm::BranchInst *allocaTerminator =
llvm::cast<llvm::BranchInst>(allocaIP.getBlock()->getTerminator());
- if (allocaTerminator->getNumSuccessors() != 1) {
- splitBB(llvm::OpenMPIRBuilder::InsertPointTy(
- allocaIP.getBlock(), allocaTerminator->getIterator()),
- true, "omp.region.after_alloca");
- }
+ splitBB(llvm::OpenMPIRBuilder::InsertPointTy(allocaIP.getBlock(),
+ allocaTerminator->getIterator()),
+ true, "omp.region.after_alloca");
llvm::IRBuilderBase::InsertPointGuard guard(builder);
// Update the allocaTerminator in case the alloca block was split above.
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index a5e64fc3327545..390ecabaef21b3 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -2766,7 +2766,9 @@ llvm.func @task(%arg0 : !llvm.ptr) {
// CHECK: %[[VAL_19:.*]] = load i32, ptr %[[VAL_14]], align 4
// CHECK: store i32 %[[VAL_19]], ptr %[[VAL_15]], align 4
// CHECK: br label %[[VAL_20:.*]]
-// CHECK: task.body: ; preds = %omp.private.copy
+// CHECK: [[VAL_20]]:
+// CHECK: br label %task.body
+// CHECK: task.body: ; preds = %[[VAL_20]]
// CHECK: br label %omp.task.region
// CHECK: omp.task.region: ; preds = %task.body
// CHECK: call void @foo(ptr %[[VAL_15]])
diff --git a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir
index 75161bac2faf42..d2e394b2cf6a8d 100644
--- a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir
@@ -56,8 +56,10 @@ llvm.func @missordered_blocks_(%arg0: !llvm.ptr {fir.bindc_name = "x"}, %arg1: !
// CHECK: %[[VAL_20:.*]] = alloca ptr, align 8
// CHECK: %[[VAL_21:.*]] = alloca ptr, align 8
// CHECK: %[[VAL_22:.*]] = alloca [2 x ptr], align 8
+// CHECK: br label %[[AFTER_ALLOC:omp.region.after_alloca]]
+// CHECK: [[AFTER_ALLOC]]: ; preds = %[[PAR_ENTRY]]
// CHECK: br label %[[VAL_23:omp.par.region]]
-// CHECK: [[VAL_23]]: ; preds = %[[PAR_ENTRY]]
+// CHECK: [[VAL_23]]: ; preds = %[[AFTER_ALLOC]]
// CHECK: br label %[[VAL_42:.*]]
// CHECK: [[RED_INIT:omp.reduction.init]]:
// CHECK: br label %[[VAL_25:omp.reduction.neutral]]
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir
index 912d5568c5f262..d6ed3086969fb0 100644
--- a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir
@@ -91,9 +91,12 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute
// CHECK: %[[VAL_14:.*]] = alloca [1 x ptr], align 8
// CHECK: br label %[[VAL_15:.*]]
-// CHECK: omp.par.region: ; preds = %[[PAR_ENTRY]]
+// CHECK: [[VAL_15]]:
+// CHECK: br label %[[PAR_REG:omp.par.region]]
+
+// CHECK: [[PAR_REG]]: ; preds = %[[VAL_15]]
// CHECK: br label %[[VAL_18:.*]]
-// CHECK: omp.par.region1: ; preds = %[[VAL_15]]
+// CHECK: omp.par.region1: ; preds = %[[PAR_REG]]
// CHECK: %[[VAL_19:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
// CHECK: br label %[[VAL_22:.*]]
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir
index 7f2424381e846e..8d329bd8ff817c 100644
--- a/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir
@@ -63,7 +63,11 @@ module {
// CHECK: %[[VAL_23:.*]] = alloca ptr, align 8
// CHECK: %[[VAL_24:.*]] = alloca [2 x ptr], align 8
// CHECK: br label %[[VAL_25:.*]]
-// CHECK: omp.par.region: ; preds = %[[PAR_ENTRY]]
+
+// CHECK: [[VAL_25]]:
+// CHECK: br label %[[PAR_REG:omp.par.region]]
+
+// CHECK: [[PAR_REG]]: ; preds = %[[VAL_25]]
// CHECK: br label %[[INIT_LABEL:.*]]
// CHECK: [[INIT_LABEL]]:
// CHECK: %[[VAL_20:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[VAL_13]], align 8
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir
index 05af32622246a6..de3b997feb674f 100644
--- a/mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir
@@ -50,9 +50,13 @@ llvm.func @sections_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attributes {fir.in
// CHECK: %[[VAL_20:.*]] = alloca float, align 4
// CHECK: %[[VAL_21:.*]] = alloca [1 x ptr], align 8
// CHECK: br label %[[VAL_22:.*]]
-// CHECK: omp.par.region: ; preds = %[[PAR_ENTRY]]
+
+// CHECK: [[VAL_22]]:
+// CHECK: br label %[[PAR_REG:omp.par.region]]
+
+// CHECK: [[PAR_REG]]: ; preds = %[[VAL_22]]
// CHECK: br label %[[VAL_25:.*]]
-// CHECK: omp.par.region1: ; preds = %[[VAL_22]]
+// CHECK: omp.par.region1: ; preds = %[[PAR_REG]]
// CHECK: br label %[[VAL_26:.*]]
// CHECK: [[RED_INIT:omp.reduction.init]]:
diff --git a/mlir/test/Target/LLVMIR/openmp-simd-private.mlir b/mlir/test/Target/LLVMIR/openmp-simd-private.mlir
index 09d76f8edd0074..61542aa1aa4d77 100644
--- a/mlir/test/Target/LLVMIR/openmp-simd-private.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-simd-private.mlir
@@ -12,6 +12,9 @@ omp.private {type = private} @i_privatizer : !llvm.ptr alloc {
// CHECK: %{{.*}} = alloca i32, i64 1, align 4
// CHECK: %[[DUMMY:.*]] = alloca float, i64 1, align 4
// CHECK: %[[PRIV_I:.*]] = alloca i32, i64 1, align 4
+// CHECK: br label %[[LATE_ALLOC:.*]]
+
+// CHECK: [[LATE_ALLOC]]:
// CHECK: br label %[[AFTER_ALLOC:.*]]
// CHECK: [[AFTER_ALLOC]]:
diff --git a/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir b/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir
index 3872d908e7a201..ff580e5fea634b 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-use-device-nested.mlir
@@ -12,6 +12,9 @@
// CHECK-NEXT: br i1 %[[VAL_7]], label %[[VAL_8:.*]], label %[[VAL_9:.*]]
// CHECK: user_code.entry: ; preds = %[[VAL_10:.*]]
// CHECK-NEXT: %[[VAL_11:.*]] = load ptr, ptr %[[VAL_3]], align 8
+// CHECK-NEXT: br label %[[AFTER_ALLOC:.*]]
+
+// CHECK: [[AFTER_ALLOC]]:
// CHECK-NEXT: br label %[[VAL_12:.*]]
// CHECK: [[VAL_12]]:
diff --git a/mlir/test/Target/LLVMIR/openmp-wsloop-test-block-structure.mlir b/mlir/test/Target/LLVMIR/openmp-wsloop-test-block-structure.mlir
new file mode 100644
index 00000000000000..19ae425e20403b
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-wsloop-test-block-structure.mlir
@@ -0,0 +1,54 @@
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
+
+// Tests regression uncovered by "1009/1009_0029.f90" (from the Fujitsu test
+// suite). This test replicates a simplified version of the block structure
+// produced by the Fujitsu test.
+
+llvm.func @test_block_structure() {
+ %i1 = llvm.mlir.constant(1 : index) : i1
+ %i64 = llvm.mlir.constant(1 : index) : i64
+ llvm.br ^bb1(%i64, %i64 : i64, i64)
+
+^bb1(%20: i64, %21: i64): // 2 preds: ^bb0, ^bb5
+ llvm.cond_br %i1, ^bb2, ^bb6
+
+^bb2: // pred: ^bb1
+ llvm.br ^bb3(%i64, %i64 : i64, i64)
+
+^bb3(%25: i64, %26: i64): // 2 preds: ^bb2, ^bb4
+ llvm.cond_br %i1, ^bb4, ^bb5
+
+^bb4: // pred: ^bb3
+ omp.wsloop {
+ omp.loop_nest (%arg0) : i64 = (%i64) to (%i64) inclusive step (%i64) {
+ omp.yield
+ }
+ }
+ llvm.br ^bb1(%i64, %i64 : i64, i64)
+
+^bb5: // pred: ^bb3
+ llvm.br ^bb1(%i64, %i64 : i64, i64)
+
+^bb6: // pred: ^bb1
+ llvm.return
+}
+
+// CHECK: define void @test_block_structure
+// CHECK: br label %[[AFTER_ALLOCA:.*]]
+
+// CHECK: [[AFTER_ALLOCA:]]:
+// CHECK: br label %[[BB1:.*]]
+
+// CHECK: [[BB1:]]:
+// CHECK: %{{.*}} = phi i64
+// CHECK: br i1 true, label %[[BB2:.*]], label %{{.*}}
+
+// CHECK: [[BB2]]:
+// CHECK: br label %[[BB3:.*]]
+
+// CHECK: [[BB3]]:
+// CHECK: %{{.*}} = phi i64
+// CHECK: br i1 true, label %[[BB4:.*]], label %{{.*}}
+
+// CHECK: [[BB4]]:
+// CHECK: br label %omp_loop.preheader
More information about the flang-commits
mailing list