[flang-commits] [flang] [llvm] [mlir] [MLIR][OpenMP] Handle privatization for global values in MLIR->LLVM translation (PR #104407)
Kareem Ergawy via flang-commits
flang-commits at lists.llvm.org
Wed Aug 14 23:19:12 PDT 2024
https://github.com/ergawy created https://github.com/llvm/llvm-project/pull/104407
Fix for https://github.com/llvm/llvm-project/issues/102939.
The issues occurs because the CodeExtractor component only collect inputs
(to the parallel regions) that are defined in the same function in which the
parallel regions is present. Howerver, this is problematic because if we are
privatizing a global value (e.g. a `target` variable which is emitted as a
global), then we miss finding that input and we do not privatize the
variable.
This commit attempts to fix the issue by adding a flag to the
CodeExtractor so that we can collect global inputs.
>From c7eb7d375eb4be713331e49488dec07d60fb52f4 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Wed, 14 Aug 2024 03:40:59 -0500
Subject: [PATCH 1/2] [MLIR][OpenMP] Fix MLIR->LLVM value matching in
privatization logic
Fixes #102935
Updates matching logic for finding the LLVM value that corresponds to
an MLIR value. We need that matching to find the delayed privatizer for
an LLVM value being privatized.
The issue occures when there is an "indirect" correspondence between
MLIR and LLVM values: in some cases the values we are trying to match
stem from a pair of load/store ops that point to the same memref. This
PR adds such matching logic.
---
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 33 ++++++++++++++-
.../Target/LLVMIR/openmp-firstprivate.mlir | 42 +++++++++++++++++++
2 files changed, 74 insertions(+), 1 deletion(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 458d05d5059db7..75133ec82cc9f0 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1444,12 +1444,43 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
auto privateVars = opInst.getPrivateVars();
auto privateSyms = opInst.getPrivateSyms();
+ // Try to find a privatizer that corresponds to the LLVM value being
+ // prvatized.
for (auto [privVar, privatizerAttr] :
llvm::zip_equal(privateVars, *privateSyms)) {
// Find the MLIR private variable corresponding to the LLVM value
// being privatized.
llvm::Value *llvmPrivVar = moduleTranslation.lookupValue(privVar);
- if (llvmPrivVar != &vPtr)
+
+ // Check if the LLVM value being privatized matches the LLVM value
+ // mapped to privVar. In some cases, this is not trivial ...
+ auto isMatch = [](llvm::Value *vPtr, llvm::Value *llvmPrivVar) {
+ // If both values are trivially equal, we found a match.
+ if (llvmPrivVar == nullptr)
+ return false;
+
+ if (llvmPrivVar == vPtr)
+ return true;
+
+ auto *vPtrLoad = llvm::dyn_cast_if_present<llvm::LoadInst>(vPtr);
+
+ if (vPtrLoad == nullptr)
+ return false;
+
+ // Otherwise, we check if both vPtr and llvmPrivVar refer to the
+ // same memory (through a load/store pair).
+ for (auto &use : llvmPrivVar->uses()) {
+ auto *llvmPrivVarStore =
+ llvm::dyn_cast_if_present<llvm::StoreInst>(use.getUser());
+ if (llvmPrivVarStore && (vPtrLoad->getPointerOperand() ==
+ llvmPrivVarStore->getPointerOperand()))
+ return true;
+ }
+
+ return false;
+ };
+
+ if (!isMatch(&vPtr, llvmPrivVar))
continue;
SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(privatizerAttr);
diff --git a/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir b/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir
index 65ae98b2a74c6e..b06ad96f4592c5 100644
--- a/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir
@@ -114,3 +114,45 @@ omp.private {type = firstprivate} @multi_block.privatizer : !llvm.ptr alloc {
llvm.store %arg2, %arg3 : f32, !llvm.ptr
omp.yield(%arg3 : !llvm.ptr)
}
+
+// -----
+
+// Verifies fix for https://github.com/llvm/llvm-project/issues/102935.
+//
+// The issue happens since we previously failed to match MLIR values to their
+// corresponding LLVM values in some cases (e.g. char strings with non-const
+// length).
+llvm.func @non_const_len_char_test(%n: !llvm.ptr {fir.bindc_name = "n"}) {
+ %n_val = llvm.load %n : !llvm.ptr -> i64
+ %orig_alloc = llvm.alloca %n_val x i8 {bindc_name = "str"} : (i64) -> !llvm.ptr
+ %orig_val = llvm.mlir.undef : !llvm.struct<(ptr, i64)>
+ %orig_val_init = llvm.insertvalue %orig_alloc, %orig_val[0] : !llvm.struct<(ptr, i64)>
+ omp.parallel private(@non_const_len_char %orig_val_init -> %priv_arg : !llvm.struct<(ptr, i64)>) {
+ %dummy = llvm.extractvalue %priv_arg[0] : !llvm.struct<(ptr, i64)>
+ omp.terminator
+ }
+ llvm.return
+}
+
+omp.private {type = firstprivate} @non_const_len_char : !llvm.struct<(ptr, i64)> alloc {
+^bb0(%orig_val: !llvm.struct<(ptr, i64)>):
+ %str_len = llvm.extractvalue %orig_val[1] : !llvm.struct<(ptr, i64)>
+ %priv_alloc = llvm.alloca %str_len x i8 {bindc_name = "str", pinned} : (i64) -> !llvm.ptr
+ %priv_val = llvm.mlir.undef : !llvm.struct<(ptr, i64)>
+ %priv_val_init = llvm.insertvalue %priv_alloc, %priv_val[0] : !llvm.struct<(ptr, i64)>
+ omp.yield(%priv_val_init : !llvm.struct<(ptr, i64)>)
+} copy {
+^bb0(%orig_val: !llvm.struct<(ptr, i64)>, %priv_val: !llvm.struct<(ptr, i64)>):
+ llvm.call @foo() : () -> ()
+ omp.yield(%priv_val : !llvm.struct<(ptr, i64)>)
+}
+
+llvm.func @foo()
+
+// CHECK-LABEL: @non_const_len_char_test..omp_par({{.*}})
+// CHECK-NEXT: omp.par.entry:
+// Verify that we found the privatizer by checking that we properly inlined the
+// bodies of the alloc and copy regions.
+// CHECK: %[[STR_LEN:.*]] = extractvalue { ptr, i64 } %{{.*}}, 1
+// CHECK: %{{.*}} = alloca i8, i64 %[[STR_LEN]], align 1
+// CHECK: call void @foo()
>From 840faaa60a41fdb740b96e4554f44f491aaeb9ee Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Thu, 15 Aug 2024 01:16:14 -0500
Subject: [PATCH 2/2] [MLIR][OpenMP] Handle privatization for global values in
MLIR->LLVM translation
Fix for https://github.com/llvm/llvm-project/issues/102939.
The issues occurs because the CodeExtractor component only collect inputs
(to the parallel regions) that are defined in the same function in which the
parallel regions is present. Howerver, this is problematic because if we are
privatizing a global value (e.g. a `target` variable which is emitted as a
global), then we miss finding that input and we do not privatize the
variable.
This commit attempts to fix the issue by adding a flag to the
CodeExtractor so that we can collect global inputs.
---
...privatization-allocatable-firstprivate.f90 | 5 +-
.../llvm/Transforms/Utils/CodeExtractor.h | 3 +-
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 4 +-
llvm/lib/Transforms/Utils/CodeExtractor.cpp | 7 ++-
.../Target/LLVMIR/openmp-firstprivate.mlir | 46 +++++++++++++++++++
5 files changed, 58 insertions(+), 7 deletions(-)
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
index 833976ff284a86..5f09371bbaba2e 100644
--- a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
+++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
@@ -57,6 +57,5 @@ end program compilation_to_obj
! LLVM: @[[GLOB_VAR:[^[:space:]]+]]t = internal global
! LLVM: define internal void @_QQmain..omp_par
-! LLVM: %[[LOCAL_VAR:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
-! LLVM-NEXT: %[[GLOB_VAL:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @[[GLOB_VAR]]t, align 8
-! LLVM-NEXT: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[GLOB_VAL]], ptr %[[LOCAL_VAR]], align 8
+! LLVM: %[[GLOB_VAL:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @[[GLOB_VAR]]t, align 8
+! LLVM-NEXT: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[GLOB_VAL]], ptr %{{.*}}, align 8
diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
index 333ed6774d6c7e..9fd5f52d212519 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -198,7 +198,8 @@ class CodeExtractorAnalysisCache {
/// sets, before extraction occurs. These modifications won't have any
/// significant impact on the cost however.
void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
- const ValueSet &Allocas) const;
+ const ValueSet &Allocas,
+ bool CollectGlobalInputs = false) const;
/// Check if life time marker nodes can be hoisted/sunk into the outline
/// region.
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 83fec194d73904..6df68d36e0c20a 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1542,7 +1542,9 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
BasicBlock *CommonExit = nullptr;
SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
- Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
+
+ Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
+ /*CollectGlobalInputs=*/true);
LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n");
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 94c7f161fc4c73..2c4a98134b91f8 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -644,14 +644,17 @@ bool CodeExtractor::isEligible() const {
}
void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
- const ValueSet &SinkCands) const {
+ const ValueSet &SinkCands,
+ bool CollectGlobalInputs) const {
for (BasicBlock *BB : Blocks) {
// If a used value is defined outside the region, it's an input. If an
// instruction is used outside the region, it's an output.
for (Instruction &II : *BB) {
for (auto &OI : II.operands()) {
Value *V = OI;
- if (!SinkCands.count(V) && definedInCaller(Blocks, V))
+ if (!SinkCands.count(V) &&
+ (definedInCaller(Blocks, V) ||
+ (CollectGlobalInputs && llvm::isa<llvm::GlobalVariable>(V))))
Inputs.insert(V);
}
diff --git a/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir b/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir
index b06ad96f4592c5..02ce6b5b19ceaf 100644
--- a/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir
@@ -156,3 +156,49 @@ llvm.func @foo()
// CHECK: %[[STR_LEN:.*]] = extractvalue { ptr, i64 } %{{.*}}, 1
// CHECK: %{{.*}} = alloca i8, i64 %[[STR_LEN]], align 1
// CHECK: call void @foo()
+
+// -----
+
+// Verifies fix for https://github.com/llvm/llvm-project/issues/102939.
+//
+// The issues occurs because the CodeExtractor component only collect inputs
+// (to the parallel regions) that are defined in the same function in which the
+// parallel regions is present. Howerver, this is problematic because if we are
+// privatizing a global value (e.g. a `target` variable which is emitted as a
+// global), then we miss finding that input and we do not privatize the
+// variable.
+
+omp.private {type = firstprivate} @global_privatizer : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x f32 {bindc_name = "global", pinned} : (i64) -> !llvm.ptr
+ omp.yield(%1 : !llvm.ptr)
+} copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+ %0 = llvm.load %arg0 : !llvm.ptr -> f32
+ llvm.store %0, %arg1 : f32, !llvm.ptr
+ omp.yield(%arg1 : !llvm.ptr)
+}
+
+llvm.func @global_accessor() {
+ %global_addr = llvm.mlir.addressof @global : !llvm.ptr
+ omp.parallel private(@global_privatizer %global_addr -> %arg0 : !llvm.ptr) {
+ %1 = llvm.mlir.constant(3.140000e+00 : f32) : f32
+ llvm.store %1, %arg0 : f32, !llvm.ptr
+ omp.terminator
+ }
+ llvm.return
+}
+
+llvm.mlir.global internal @global() {addr_space = 0 : i32} : f32 {
+ %0 = llvm.mlir.zero : f32
+ llvm.return %0 : f32
+}
+
+// CHECK-LABEL: @global_accessor..omp_par({{.*}})
+// CHECK-NEXT: omp.par.entry:
+// Verify that we found the privatizer by checking that we properly inlined the
+// bodies of the alloc and copy regions.
+// CHECK: %[[PRIV_ALLOC:.*]] = alloca float, i64 1, align 4
+// CHECK: %[[GLOB_VAL:.*]] = load float, ptr @global, align 4
+// CHECK: store float %[[GLOB_VAL]], ptr %[[PRIV_ALLOC]], align 4
More information about the flang-commits
mailing list