[llvm-branch-commits] [flang] [llvm] [mlir] [OpenMP][OMPIRBuilder] Hoist static parallel region allocas to the entry block on the CPU (PR #174314)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat Jan 3 23:27:52 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir-llvm
@llvm/pr-subscribers-flang-openmp
Author: Kareem Ergawy (ergawy)
<details>
<summary>Changes</summary>
Follow-up on #<!-- -->171597, this PR hoists allocas in a parallel region to the entry block of its corresponding outlined function. This PR does this for the CPU while #<!-- -->171597 introduced the main mechanism to do so and did it for the GPU.
---
Full diff: https://github.com/llvm/llvm-project/pull/174314.diff
6 Files Affected:
- (modified) flang/test/Integration/OpenMP/copyprivate.f90 (+1-1)
- (modified) flang/test/Integration/OpenMP/private-global.f90 (+4-3)
- (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+1-1)
- (modified) mlir/test/Target/LLVMIR/openmp-llvm.mlir (+6-3)
- (modified) mlir/test/Target/LLVMIR/openmp-parallel-reduction-init.mlir (+1-1)
- (modified) mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir (+1-1)
``````````diff
diff --git a/flang/test/Integration/OpenMP/copyprivate.f90 b/flang/test/Integration/OpenMP/copyprivate.f90
index e0e4abe015438..43c8612d0a1da 100644
--- a/flang/test/Integration/OpenMP/copyprivate.f90
+++ b/flang/test/Integration/OpenMP/copyprivate.f90
@@ -37,9 +37,9 @@
!CHECK: %[[TID_ADDR:.*]] = alloca i32, align 4
!CHECK: %[[I:.*]] = alloca i32, align 4
!CHECK: %[[J:.*]] = alloca i32, align 4
+!CHECK: %[[DID_IT:.*]] = alloca i32
!CHECK: br label %[[OMP_REDUCTION_INIT:.*]]
-!CHECK: %[[DID_IT:.*]] = alloca i32
!CHECK: store i32 0, ptr %[[DID_IT]]
!CHECK: %[[THREAD_NUM1:.*]] = call i32 @__kmpc_global_thread_num(ptr @[[LOC:.*]])
!CHECK: %[[RET:.*]] = call i32 @__kmpc_single({{.*}})
diff --git a/flang/test/Integration/OpenMP/private-global.f90 b/flang/test/Integration/OpenMP/private-global.f90
index 8f8de8cdedd3b..978a8fa3c8205 100644
--- a/flang/test/Integration/OpenMP/private-global.f90
+++ b/flang/test/Integration/OpenMP/private-global.f90
@@ -22,15 +22,16 @@ program bug
! CHECK: store i32 %[[VAL_10]], ptr %[[VAL_9]], align 4
! CHECK: %[[VAL_12:.*]] = load i32, ptr %[[VAL_9]], align 4
! CHECK: %[[PRIV_BOX_ALLOC:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
-! ...
-! check that we use the private copy of table for the assignment
-! CHECK: omp.par.region1:
! CHECK: %[[ELEMENTAL_TMP:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
+! CHECK: %[[ELEMENTAL_TMP_2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
! CHECK: %[[TABLE_BOX_ADDR:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
! CHECK: %[[BOXED_FIFTY:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, align 8
! CHECK: %[[FIFTY:.*]] = alloca i32, i64 1, align 4
! CHECK: %[[INTERMEDIATE:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
! CHECK: %[[TABLE_BOX_ADDR2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
+! ...
+! check that we use the private copy of table for the assignment
+! CHECK: omp.par.region1:
! CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[INTERMEDIATE]], ptr %[[PRIV_BOX_ALLOC]], i32 {{4[48]}}, i1 false)
! CHECK: store i32 50, ptr %[[FIFTY]], align 4
! CHECK: %[[FIFTY_BOX_VAL:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8 } { ptr undef, i64 4, i32 20240719, i8 0, i8 9, i8 0, i8 0 }, ptr %[[FIFTY]], 0
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index f764b644edc69..5e4d4c7e49776 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1732,7 +1732,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
hostParallelCallback(this, OutlinedFn, OuterFn, Ident, IfCondition,
PrivTID, PrivTIDAddr, ToBeDeletedVec);
};
- // TODO: fix-up allocations on the host as well?
+ OI.FixUpNonEntryAllocas = true;
}
OI.OuterAllocaBB = OuterAllocaBlock;
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 6011dc6604478..0b8a9765a4b87 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3513,12 +3513,16 @@ llvm.func @distribute_wsloop(%lb : i32, %ub : i32, %step : i32) {
// CHECK: call void{{.*}}@__kmpc_fork_call({{.*}}, ptr @[[OUTLINED_PARALLEL:.*]],
// CHECK: define internal void @[[OUTLINED_PARALLEL]]
-// CHECK: distribute.alloca:
+// CHECK: omp.par.entry:
+// CHECK: %[[TID_LOCAL:.*]] = alloca i32, align 4
// CHECK: %[[LASTITER:.*]] = alloca i32
// CHECK: %[[LB:.*]] = alloca i32
// CHECK: %[[UB:.*]] = alloca i32
// CHECK: %[[STRIDE:.*]] = alloca i32
-// CHECK: br label %[[AFTER_ALLOCA:.*]]
+// CHECK: %[[DIST_UB:.*]] = alloca i32
+
+// CHECK: distribute.alloca:
+// CHECK-NEXT: br label %[[AFTER_ALLOCA:.*]]
// CHECK: [[AFTER_ALLOCA]]:
// CHECK: br label %[[DISTRIBUTE_BODY:.*]]
@@ -3539,7 +3543,6 @@ llvm.func @distribute_wsloop(%lb : i32, %ub : i32, %step : i32) {
// CHECK: store i32 %[[TRIPCOUNT]], ptr %[[UB]]
// CHECK: store i32 1, ptr %[[STRIDE]]
// CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num({{.*}})
-// CHECK: %[[DIST_UB:.*]] = alloca i32
// CHECK: call void @__kmpc_dist_for_static_init_{{.*}}(ptr @{{.*}}, i32 %[[TID]], i32 34, ptr %[[LASTITER]], ptr %[[LB]], ptr %[[UB]], ptr %[[DIST_UB]], ptr %[[STRIDE]], i32 1, i32 0)
// -----
diff --git a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-init.mlir b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-init.mlir
index 7e90ba0f0d937..4a93ed39eb811 100644
--- a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-init.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-init.mlir
@@ -36,12 +36,12 @@ llvm.func @use_reduction() attributes {fir.bindc_name = "test"} {
// CHECK: omp.par.entry:
// CHECK: %[[RED_REGION_ALLOC:.*]] = alloca { ptr }, i64 1, align 8
+// CHECK: %[[PAR_REG_VAL:.*]] = alloca { ptr }, i64 1, align 8
// CHECK: omp.par.region:
// CHECK: br label %omp.par.region1
// CHECK: omp.par.region1:
-// CHECK: %[[PAR_REG_VAL:.*]] = alloca { ptr }, i64 1, align 8
// CHECK: br label %omp.reduction.init
// CHECK: omp.reduction.init:
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir
index 13f52f054869e..bd3b77587b8a2 100644
--- a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir
@@ -89,6 +89,7 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute
// CHECK: %[[VAL_20:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
// CHECK: %[[VAL_21:.*]] = alloca ptr, align 8
// CHECK: %[[VAL_14:.*]] = alloca [1 x ptr], align 8
+// CHECK: %[[VAL_19:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
// CHECK: br label %[[VAL_15:.*]]
// CHECK: [[VAL_15]]:
@@ -97,7 +98,6 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute
// CHECK: [[PAR_REG]]: ; preds = %[[VAL_15]]
// CHECK: br label %[[VAL_18:.*]]
// CHECK: omp.par.region1: ; preds = %[[PAR_REG]]
-// CHECK: %[[VAL_19:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
// CHECK: br label %[[VAL_22:.*]]
// CHECK: omp.reduction.init: ; preds = %[[VAL_16:.*]]
``````````
</details>
https://github.com/llvm/llvm-project/pull/174314
More information about the llvm-branch-commits
mailing list