[llvm-branch-commits] [flang] [llvm] [mlir] [OpenMP][OMPIRBuilder] Hoist static parallel region allocas to the entry block on the CPU (PR #174314)

Kareem Ergawy via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sat Jan 3 23:27:25 PST 2026


https://github.com/ergawy created https://github.com/llvm/llvm-project/pull/174314


Follow-up on #171597, this PR hoists allocas in a parallel region to the entry block of its corresponding outlined function. This PR does this for the CPU while #171597 introduced the main mechanism to do so and did it for the GPU.

>From 0322a513996dd163dbb6f12b44690f1d0cedd4e4 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Sun, 4 Jan 2026 01:00:48 -0600
Subject: [PATCH] [OpenMP][OMPIRBuilder] Hoist static parallel region allocas
 to the entry block on the CPU

Follow-up on #171597, this PR hoists allocas in a parallel region to the
entry block of its corresponding outlined function. This PR does this
for the CPU while #171597 introduced the main mechanism to do so and did
it for the GPU.
---
 flang/test/Integration/OpenMP/copyprivate.f90            | 2 +-
 flang/test/Integration/OpenMP/private-global.f90         | 7 ++++---
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp                | 2 +-
 mlir/test/Target/LLVMIR/openmp-llvm.mlir                 | 9 ++++++---
 .../Target/LLVMIR/openmp-parallel-reduction-init.mlir    | 2 +-
 .../Target/LLVMIR/openmp-reduction-array-sections.mlir   | 2 +-
 6 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/flang/test/Integration/OpenMP/copyprivate.f90 b/flang/test/Integration/OpenMP/copyprivate.f90
index e0e4abe015438..43c8612d0a1da 100644
--- a/flang/test/Integration/OpenMP/copyprivate.f90
+++ b/flang/test/Integration/OpenMP/copyprivate.f90
@@ -37,9 +37,9 @@
 !CHECK:         %[[TID_ADDR:.*]] = alloca i32, align 4
 !CHECK:         %[[I:.*]] = alloca i32, align 4
 !CHECK:         %[[J:.*]] = alloca i32, align 4
+!CHECK:         %[[DID_IT:.*]] = alloca i32
 !CHECK:         br label %[[OMP_REDUCTION_INIT:.*]]
 
-!CHECK:         %[[DID_IT:.*]] = alloca i32
 !CHECK:         store i32 0, ptr %[[DID_IT]]
 !CHECK:         %[[THREAD_NUM1:.*]] = call i32 @__kmpc_global_thread_num(ptr @[[LOC:.*]])
 !CHECK:         %[[RET:.*]] = call i32 @__kmpc_single({{.*}})
diff --git a/flang/test/Integration/OpenMP/private-global.f90 b/flang/test/Integration/OpenMP/private-global.f90
index 8f8de8cdedd3b..978a8fa3c8205 100644
--- a/flang/test/Integration/OpenMP/private-global.f90
+++ b/flang/test/Integration/OpenMP/private-global.f90
@@ -22,15 +22,16 @@ program bug
 ! CHECK:         store i32 %[[VAL_10]], ptr %[[VAL_9]], align 4
 ! CHECK:         %[[VAL_12:.*]] = load i32, ptr %[[VAL_9]], align 4
 ! CHECK:         %[[PRIV_BOX_ALLOC:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
-! ...
-! check that we use the private copy of table for the assignment
-! CHECK:       omp.par.region1:
 ! CHECK:         %[[ELEMENTAL_TMP:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
+! CHECK:         %[[ELEMENTAL_TMP_2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
 ! CHECK:         %[[TABLE_BOX_ADDR:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
 ! CHECK:         %[[BOXED_FIFTY:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, align 8
 ! CHECK:         %[[FIFTY:.*]] = alloca i32, i64 1, align 4
 ! CHECK:         %[[INTERMEDIATE:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
 ! CHECK:         %[[TABLE_BOX_ADDR2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
+! ...
+! check that we use the private copy of table for the assignment
+! CHECK:       omp.par.region1:
 ! CHECK:         call void @llvm.memcpy.p0.p0.i32(ptr %[[INTERMEDIATE]], ptr %[[PRIV_BOX_ALLOC]], i32 {{4[48]}}, i1 false)
 ! CHECK:         store i32 50, ptr %[[FIFTY]], align 4
 ! CHECK:         %[[FIFTY_BOX_VAL:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8 } { ptr undef, i64 4, i32 20240719, i8 0, i8 9, i8 0, i8 0 }, ptr %[[FIFTY]], 0
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index f764b644edc69..5e4d4c7e49776 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1732,7 +1732,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
       hostParallelCallback(this, OutlinedFn, OuterFn, Ident, IfCondition,
                            PrivTID, PrivTIDAddr, ToBeDeletedVec);
     };
-    // TODO: fix-up allocations on the host as well?
+    OI.FixUpNonEntryAllocas = true;
   }
 
   OI.OuterAllocaBB = OuterAllocaBlock;
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 6011dc6604478..0b8a9765a4b87 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -3513,12 +3513,16 @@ llvm.func @distribute_wsloop(%lb : i32, %ub : i32, %step : i32) {
 // CHECK:         call void{{.*}}@__kmpc_fork_call({{.*}}, ptr @[[OUTLINED_PARALLEL:.*]],
 
 // CHECK:       define internal void @[[OUTLINED_PARALLEL]]
-// CHECK:       distribute.alloca:
+// CHECK:       omp.par.entry:
+// CHECK:         %[[TID_LOCAL:.*]] = alloca i32, align 4
 // CHECK:         %[[LASTITER:.*]] = alloca i32
 // CHECK:         %[[LB:.*]] = alloca i32
 // CHECK:         %[[UB:.*]] = alloca i32
 // CHECK:         %[[STRIDE:.*]] = alloca i32
-// CHECK:         br label %[[AFTER_ALLOCA:.*]]
+// CHECK:         %[[DIST_UB:.*]] = alloca i32
+
+// CHECK:       distribute.alloca:
+// CHECK-NEXT:    br label %[[AFTER_ALLOCA:.*]]
 
 // CHECK:       [[AFTER_ALLOCA]]:
 // CHECK:         br label %[[DISTRIBUTE_BODY:.*]]
@@ -3539,7 +3543,6 @@ llvm.func @distribute_wsloop(%lb : i32, %ub : i32, %step : i32) {
 // CHECK:         store i32 %[[TRIPCOUNT]], ptr %[[UB]]
 // CHECK:         store i32 1, ptr %[[STRIDE]]
 // CHECK:         %[[TID:.*]] = call i32 @__kmpc_global_thread_num({{.*}})
-// CHECK:         %[[DIST_UB:.*]] = alloca i32
 // CHECK:         call void @__kmpc_dist_for_static_init_{{.*}}(ptr @{{.*}}, i32 %[[TID]], i32 34, ptr %[[LASTITER]], ptr %[[LB]], ptr %[[UB]], ptr %[[DIST_UB]], ptr %[[STRIDE]], i32 1, i32 0)
 
 // -----
diff --git a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-init.mlir b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-init.mlir
index 7e90ba0f0d937..4a93ed39eb811 100644
--- a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-init.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-init.mlir
@@ -36,12 +36,12 @@ llvm.func @use_reduction() attributes {fir.bindc_name = "test"} {
 
 // CHECK: omp.par.entry:
 // CHECK:   %[[RED_REGION_ALLOC:.*]] = alloca { ptr }, i64 1, align 8
+// CHECK:   %[[PAR_REG_VAL:.*]] = alloca { ptr }, i64 1, align 8
 
 // CHECK: omp.par.region:
 // CHECK:   br label %omp.par.region1
 
 // CHECK: omp.par.region1:
-// CHECK:   %[[PAR_REG_VAL:.*]] = alloca { ptr }, i64 1, align 8
 // CHECK:   br label %omp.reduction.init
 
 // CHECK: omp.reduction.init:
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir
index 13f52f054869e..bd3b77587b8a2 100644
--- a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir
@@ -89,6 +89,7 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute
 // CHECK:         %[[VAL_20:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
 // CHECK:         %[[VAL_21:.*]] = alloca ptr, align 8
 // CHECK:         %[[VAL_14:.*]] = alloca [1 x ptr], align 8
+// CHECK:         %[[VAL_19:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
 // CHECK:         br label %[[VAL_15:.*]]
 
 // CHECK:       [[VAL_15]]:
@@ -97,7 +98,6 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute
 // CHECK:       [[PAR_REG]]:                                   ; preds = %[[VAL_15]]
 // CHECK:         br label %[[VAL_18:.*]]
 // CHECK:       omp.par.region1:                                  ; preds = %[[PAR_REG]]
-// CHECK:         %[[VAL_19:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
 // CHECK:         br label %[[VAL_22:.*]]
 
 // CHECK:       omp.reduction.init:                               ; preds = %[[VAL_16:.*]]



More information about the llvm-branch-commits mailing list