[flang-commits] [flang] [llvm] [mlir] [MLIR][OpenMP] Handle privatization for global values in MLIR->LLVM translation (PR #104407)

via flang-commits flang-commits at lists.llvm.org
Wed Aug 14 23:19:45 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-flang-fir-hlfir

Author: Kareem Ergawy (ergawy)

<details>
<summary>Changes</summary>

Fix for https://github.com/llvm/llvm-project/issues/102939.

The issues occurs because the CodeExtractor component only collect inputs
(to the parallel regions) that are defined in the same function in which the
parallel regions is present. Howerver, this is problematic because if we are
privatizing a global value (e.g. a `target` variable which is emitted as a
global), then we miss finding that input and we do not privatize the
variable.

This commit attempts to fix the issue by adding a flag to the
CodeExtractor so that we can collect global inputs.

---
Full diff: https://github.com/llvm/llvm-project/pull/104407.diff


6 Files Affected:

- (modified) flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 (+2-3) 
- (modified) llvm/include/llvm/Transforms/Utils/CodeExtractor.h (+2-1) 
- (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+3-1) 
- (modified) llvm/lib/Transforms/Utils/CodeExtractor.cpp (+5-2) 
- (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+32-1) 
- (modified) mlir/test/Target/LLVMIR/openmp-firstprivate.mlir (+88) 


``````````diff
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
index 833976ff284a86..5f09371bbaba2e 100644
--- a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
+++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
@@ -57,6 +57,5 @@ end program compilation_to_obj
 ! LLVM: @[[GLOB_VAR:[^[:space:]]+]]t = internal global
 
 ! LLVM: define internal void @_QQmain..omp_par
-! LLVM: %[[LOCAL_VAR:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
-! LLVM-NEXT: %[[GLOB_VAL:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @[[GLOB_VAR]]t, align 8
-! LLVM-NEXT: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[GLOB_VAL]], ptr %[[LOCAL_VAR]], align 8
+! LLVM:      %[[GLOB_VAL:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @[[GLOB_VAR]]t, align 8
+! LLVM-NEXT: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[GLOB_VAL]], ptr %{{.*}}, align 8
diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
index 333ed6774d6c7e..9fd5f52d212519 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -198,7 +198,8 @@ class CodeExtractorAnalysisCache {
     /// sets, before extraction occurs. These modifications won't have any
     /// significant impact on the cost however.
     void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
-                           const ValueSet &Allocas) const;
+                           const ValueSet &Allocas,
+                           bool CollectGlobalInputs = false) const;
 
     /// Check if life time marker nodes can be hoisted/sunk into the outline
     /// region.
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 83fec194d73904..6df68d36e0c20a 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1542,7 +1542,9 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
   BasicBlock *CommonExit = nullptr;
   SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
   Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
-  Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
+
+  Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
+                              /*CollectGlobalInputs=*/true);
 
   LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n");
 
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 94c7f161fc4c73..2c4a98134b91f8 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -644,14 +644,17 @@ bool CodeExtractor::isEligible() const {
 }
 
 void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
-                                      const ValueSet &SinkCands) const {
+                                      const ValueSet &SinkCands,
+                                      bool CollectGlobalInputs) const {
   for (BasicBlock *BB : Blocks) {
     // If a used value is defined outside the region, it's an input.  If an
     // instruction is used outside the region, it's an output.
     for (Instruction &II : *BB) {
       for (auto &OI : II.operands()) {
         Value *V = OI;
-        if (!SinkCands.count(V) && definedInCaller(Blocks, V))
+        if (!SinkCands.count(V) &&
+            (definedInCaller(Blocks, V) ||
+             (CollectGlobalInputs && llvm::isa<llvm::GlobalVariable>(V))))
           Inputs.insert(V);
       }
 
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 458d05d5059db7..75133ec82cc9f0 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1444,12 +1444,43 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
         auto privateVars = opInst.getPrivateVars();
         auto privateSyms = opInst.getPrivateSyms();
 
+        // Try to find a privatizer that corresponds to the LLVM value being
+        // prvatized.
         for (auto [privVar, privatizerAttr] :
              llvm::zip_equal(privateVars, *privateSyms)) {
           // Find the MLIR private variable corresponding to the LLVM value
           // being privatized.
           llvm::Value *llvmPrivVar = moduleTranslation.lookupValue(privVar);
-          if (llvmPrivVar != &vPtr)
+
+          // Check if the LLVM value being privatized matches the LLVM value
+          // mapped to privVar. In some cases, this is not trivial ...
+          auto isMatch = [](llvm::Value *vPtr, llvm::Value *llvmPrivVar) {
+            // If both values are trivially equal, we found a match.
+            if (llvmPrivVar == nullptr)
+              return false;
+
+            if (llvmPrivVar == vPtr)
+              return true;
+
+            auto *vPtrLoad = llvm::dyn_cast_if_present<llvm::LoadInst>(vPtr);
+
+            if (vPtrLoad == nullptr)
+              return false;
+
+            // Otherwise, we check if both vPtr and llvmPrivVar refer to the
+            // same memory (through a load/store pair).
+            for (auto &use : llvmPrivVar->uses()) {
+              auto *llvmPrivVarStore =
+                  llvm::dyn_cast_if_present<llvm::StoreInst>(use.getUser());
+              if (llvmPrivVarStore && (vPtrLoad->getPointerOperand() ==
+                                       llvmPrivVarStore->getPointerOperand()))
+                return true;
+            }
+
+            return false;
+          };
+
+          if (!isMatch(&vPtr, llvmPrivVar))
             continue;
 
           SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(privatizerAttr);
diff --git a/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir b/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir
index 65ae98b2a74c6e..02ce6b5b19ceaf 100644
--- a/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-firstprivate.mlir
@@ -114,3 +114,91 @@ omp.private {type = firstprivate} @multi_block.privatizer : !llvm.ptr alloc {
   llvm.store %arg2, %arg3 : f32, !llvm.ptr
   omp.yield(%arg3 : !llvm.ptr)
 }
+
+// -----
+
+// Verifies fix for https://github.com/llvm/llvm-project/issues/102935.
+//
+// The issue happens since we previously failed to match MLIR values to their
+// corresponding LLVM values in some cases (e.g. char strings with non-const
+// length).
+llvm.func @non_const_len_char_test(%n: !llvm.ptr {fir.bindc_name = "n"}) {
+  %n_val = llvm.load %n : !llvm.ptr -> i64
+  %orig_alloc = llvm.alloca %n_val x i8 {bindc_name = "str"} : (i64) -> !llvm.ptr
+  %orig_val = llvm.mlir.undef : !llvm.struct<(ptr, i64)>
+  %orig_val_init = llvm.insertvalue %orig_alloc, %orig_val[0] : !llvm.struct<(ptr, i64)>
+  omp.parallel private(@non_const_len_char %orig_val_init -> %priv_arg : !llvm.struct<(ptr, i64)>) {
+    %dummy = llvm.extractvalue %priv_arg[0] : !llvm.struct<(ptr, i64)>
+    omp.terminator
+  }
+  llvm.return
+}
+
+omp.private {type = firstprivate} @non_const_len_char : !llvm.struct<(ptr, i64)> alloc {
+^bb0(%orig_val: !llvm.struct<(ptr, i64)>):
+  %str_len = llvm.extractvalue %orig_val[1] : !llvm.struct<(ptr, i64)>
+  %priv_alloc = llvm.alloca %str_len x i8 {bindc_name = "str", pinned} : (i64) -> !llvm.ptr
+  %priv_val = llvm.mlir.undef : !llvm.struct<(ptr, i64)>
+  %priv_val_init = llvm.insertvalue %priv_alloc, %priv_val[0] : !llvm.struct<(ptr, i64)>
+  omp.yield(%priv_val_init : !llvm.struct<(ptr, i64)>)
+} copy {
+^bb0(%orig_val: !llvm.struct<(ptr, i64)>, %priv_val: !llvm.struct<(ptr, i64)>):
+  llvm.call @foo() : () -> ()
+  omp.yield(%priv_val : !llvm.struct<(ptr, i64)>)
+}
+
+llvm.func @foo()
+
+// CHECK-LABEL: @non_const_len_char_test..omp_par({{.*}})
+// CHECK-NEXT:    omp.par.entry:
+// Verify that we found the privatizer by checking that we properly inlined the
+// bodies of the alloc and copy regions.
+// CHECK:         %[[STR_LEN:.*]] = extractvalue { ptr, i64 } %{{.*}}, 1
+// CHECK:         %{{.*}} = alloca i8, i64 %[[STR_LEN]], align 1
+// CHECK:         call void @foo()
+
+// -----
+
+// Verifies fix for https://github.com/llvm/llvm-project/issues/102939.
+//
+// The issues occurs because the CodeExtractor component only collect inputs
+// (to the parallel regions) that are defined in the same function in which the
+// parallel regions is present. Howerver, this is problematic because if we are
+// privatizing a global value (e.g. a `target` variable which is emitted as a
+// global), then we miss finding that input and we do not privatize the
+// variable.
+
+omp.private {type = firstprivate} @global_privatizer : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x f32 {bindc_name = "global", pinned} : (i64) -> !llvm.ptr
+  omp.yield(%1 : !llvm.ptr)
+} copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+  %0 = llvm.load %arg0 : !llvm.ptr -> f32
+  llvm.store %0, %arg1 : f32, !llvm.ptr
+  omp.yield(%arg1 : !llvm.ptr)
+}
+
+llvm.func @global_accessor() {
+  %global_addr = llvm.mlir.addressof @global : !llvm.ptr
+  omp.parallel private(@global_privatizer %global_addr -> %arg0 : !llvm.ptr) {
+    %1 = llvm.mlir.constant(3.140000e+00 : f32) : f32
+    llvm.store %1, %arg0 : f32, !llvm.ptr
+    omp.terminator
+  }
+  llvm.return
+}
+
+llvm.mlir.global internal @global() {addr_space = 0 : i32} : f32 {
+  %0 = llvm.mlir.zero : f32
+  llvm.return %0 : f32
+}
+
+// CHECK-LABEL: @global_accessor..omp_par({{.*}})
+// CHECK-NEXT:  omp.par.entry:
+// Verify that we found the privatizer by checking that we properly inlined the
+// bodies of the alloc and copy regions.
+// CHECK:         %[[PRIV_ALLOC:.*]] = alloca float, i64 1, align 4
+// CHECK:         %[[GLOB_VAL:.*]] = load float, ptr @global, align 4
+// CHECK:         store float %[[GLOB_VAL]], ptr %[[PRIV_ALLOC]], align 4

``````````

</details>


https://github.com/llvm/llvm-project/pull/104407


More information about the flang-commits mailing list