[Mlir-commits] [mlir] 240dd92 - [OpenMPIRBuilder] forward arguments as pointers to outlined function

Alex Zinenko llvmlistbot at llvm.org
Wed Dec 2 05:59:49 PST 2020


Author: Alex Zinenko
Date: 2020-12-02T14:59:41+01:00
New Revision: 240dd92432ebbfbf24ef85779f2cdf93e6ddf605

URL: https://github.com/llvm/llvm-project/commit/240dd92432ebbfbf24ef85779f2cdf93e6ddf605
DIFF: https://github.com/llvm/llvm-project/commit/240dd92432ebbfbf24ef85779f2cdf93e6ddf605.diff

LOG: [OpenMPIRBuilder] forward arguments as pointers to outlined function

OpenMPIRBuilder::createParallel outlines the body region of the parallel
construct into a new function that accepts any value previously defined outside
the region as a function argument. This function is called back by OpenMP
runtime function __kmpc_fork_call, which expects trailing arguments to be
pointers. If the region uses a value that is not of a pointer type, e.g. a
struct, the produced code would be invalid. In such cases, make createParallel
emit IR that stores the value on stack and pass the pointer to the outlined
function instead. The outlined function then loads the value back and uses as
normal.

Reviewed By: jdoerfert, llitchev

Differential Revision: https://reviews.llvm.org/D92189

Added: 
    

Modified: 
    clang/lib/CodeGen/CGStmtOpenMP.cpp
    clang/test/OpenMP/parallel_codegen.cpp
    llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
    llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
    llvm/lib/Transforms/IPO/OpenMPOpt.cpp
    llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
    mlir/lib/Target/LLVMIR/ModuleTranslation.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 2fb4144930df..5e8d98cfe5ef 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1693,7 +1693,7 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
     //
     // TODO: This defaults to shared right now.
     auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
-                     llvm::Value &Val, llvm::Value *&ReplVal) {
+                     llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
       // The next line is appropriate only for variables (Val) with the
       // data-sharing attribute "shared".
       ReplVal = &Val;

diff  --git a/clang/test/OpenMP/parallel_codegen.cpp b/clang/test/OpenMP/parallel_codegen.cpp
index bceab0637f6a..83561ce6f6a8 100644
--- a/clang/test/OpenMP/parallel_codegen.cpp
+++ b/clang/test/OpenMP/parallel_codegen.cpp
@@ -133,22 +133,26 @@ int main (int argc, char **argv) {
 // CHECK-DEBUG-DAG:       define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]])
 // CHECK-DEBUG-DAG:       call void [[OMP_OUTLINED_DEBUG]]
 
+// Note that OpenMPIRBuilder puts the trailing arguments in a 
diff erent order:
+// arguments that are wrapped into additional pointers precede the other
+// arguments. This is expected and not problematic because both the call and the
+// function are generated from the same place, and the function is internal.
 // ALL:       define linkonce_odr {{[a-z\_\b]*[ ]?i32}} [[TMAIN]](i8** %argc)
 // ALL:       store i8** %argc, i8*** [[ARGC_ADDR:%.+]],
 // CHECK:       call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i{{64|32}})* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i{{64|32}} %{{.+}})
-// IRBUILDER:   call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i{{64|32}})* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i{{64|32}} %{{.+}})
+// IRBUILDER:   call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i{{64|32}}*, i8***)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i{{64|32}}* %{{.+}}, i8*** [[ARGC_ADDR]])
 // ALL:  ret i32 0
 // ALL-NEXT:  }
 // ALL-DEBUG:       define linkonce_odr i32 [[TMAIN]](i8** %argc)
 
 // CHECK-DEBUG:       store i8** %argc, i8*** [[ARGC_ADDR:%.+]],
 // CHECK-DEBUG:       call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.*}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 %{{.+}})
-// IRBUILDER-DEBUG:   call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.*}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 %{{.+}})
+// IRBUILDER-DEBUG:   call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.*}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64*, i8***)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64* %{{.+}}, i8*** [[ARGC_ADDR]])
 // ALL-DEBUG:  ret i32 0
 // ALL-DEBUG-NEXT:  }
 
 // CHECK:       define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** nonnull align {{[0-9]+}} dereferenceable({{4|8}}) %argc, i{{64|32}}{{.*}} %{{.+}})
-// IRBUILDER:   define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i8*** [[ARGC_REF:%.*]], i{{64|32}}{{.*}} %{{.+}})
+// IRBUILDER:   define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i{{64|32}}*{{.*}} %{{.+}}, i8*** [[ARGC_REF:%.*]])
 // CHECK:       store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]],
 // CHECK:       [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]]
 // ALL:         [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]]
@@ -159,7 +163,7 @@ int main (int argc, char **argv) {
 // CHECK-NEXT:  unreachable
 // CHECK-NEXT:  }
 // CHECK-DEBUG:       define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** nonnull align {{[0-9]+}} dereferenceable({{4|8}}) %argc, i64 %{{.+}})
-// IRBUILDER-DEBUG:   define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i8*** [[ARGC_REF:%.*]], i64 %{{.+}})
+// IRBUILDER-DEBUG:   define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i64* %{{.+}}, i8*** [[ARGC_REF:%.*]])
 // CHECK-DEBUG:       store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]],
 // CHECK-DEBUG:       [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]]
 // ALL-DEBUG:         [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]]

diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 87fc62165eca..a09605bb1023 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -116,15 +116,20 @@ class OpenMPIRBuilder {
   ///                 should be placed.
   /// \param CodeGenIP is the insertion point at which the privatization code
   ///                  should be placed.
-  /// \param Val The value beeing copied/created.
+  /// \param Original The value being copied/created, should not be used in the
+  ///                 generated IR.
+  /// \param Inner The equivalent of \p Original that should be used in the
+  ///              generated IR; this is equal to \p Original if the value is
+  ///              a pointer and can thus be passed directly, otherwise it is
+  ///              an equivalent but 
diff erent value.
   /// \param ReplVal The replacement value, thus a copy or new created version
-  ///                of \p Val.
+  ///                of \p Inner.
   ///
   /// \returns The new insertion point where code generation continues and
-  ///          \p ReplVal the replacement of \p Val.
+  ///          \p ReplVal the replacement value.
   using PrivatizeCallbackTy = function_ref<InsertPointTy(
-      InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Val,
-      Value *&ReplVal)>;
+      InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
+      Value &Inner, Value *&ReplVal)>;
 
   /// Description of a LLVM-IR insertion point (IP) and a debug/source location
   /// (filename, line, column, ...).

diff  --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 1dd0683678fd..044e69da8665 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -455,6 +455,10 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
   BasicBlock *InsertBB = Builder.GetInsertBlock();
   Function *OuterFn = InsertBB->getParent();
 
+  // Save the outer alloca block because the insertion iterator may get
+  // invalidated and we still need this later.
+  BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
+
   // Vector to remember instructions we used only during the modeling but which
   // we want to delete at the end.
   SmallVector<Instruction *, 4> ToBeDeleted;
@@ -522,7 +526,8 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
 
   // Add some fake uses for OpenMP provided arguments.
   ToBeDeleted.push_back(Builder.CreateLoad(TIDAddr, "tid.addr.use"));
-  ToBeDeleted.push_back(Builder.CreateLoad(ZeroAddr, "zero.addr.use"));
+  Instruction *ZeroAddrUse = Builder.CreateLoad(ZeroAddr, "zero.addr.use");
+  ToBeDeleted.push_back(ZeroAddrUse);
 
   // ThenBB
   //   |
@@ -691,11 +696,37 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
     if (&V == TIDAddr || &V == ZeroAddr)
       return;
 
-    SmallVector<Use *, 8> Uses;
+    SetVector<Use *> Uses;
     for (Use &U : V.uses())
       if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
         if (ParallelRegionBlockSet.count(UserI->getParent()))
-          Uses.push_back(&U);
+          Uses.insert(&U);
+
+    // __kmpc_fork_call expects extra arguments as pointers. If the input
+    // already has a pointer type, everything is fine. Otherwise, store the
+    // value onto stack and load it back inside the to-be-outlined region. This
+    // will ensure only the pointer will be passed to the function.
+    // FIXME: if there are more than 15 trailing arguments, they must be
+    // additionally packed in a struct.
+    Value *Inner = &V;
+    if (!V.getType()->isPointerTy()) {
+      IRBuilder<>::InsertPointGuard Guard(Builder);
+      LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n");
+
+      Builder.restoreIP(OuterAllocaIP);
+      Value *Ptr =
+          Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded");
+
+      // Store to stack at end of the block that currently branches to the entry
+      // block of the to-be-outlined region.
+      Builder.SetInsertPoint(InsertBB,
+                             InsertBB->getTerminator()->getIterator());
+      Builder.CreateStore(&V, Ptr);
+
+      // Load back next to allocations in the to-be-outlined region.
+      Builder.restoreIP(InnerAllocaIP);
+      Inner = Builder.CreateLoad(Ptr);
+    }
 
     Value *ReplacementValue = nullptr;
     CallInst *CI = dyn_cast<CallInst>(&V);
@@ -703,7 +734,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
       ReplacementValue = PrivTID;
     } else {
       Builder.restoreIP(
-          PrivCB(InnerAllocaIP, Builder.saveIP(), V, ReplacementValue));
+          PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue));
       assert(ReplacementValue &&
              "Expected copy/create callback to set replacement value!");
       if (ReplacementValue == &V)
@@ -714,6 +745,20 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
       UPtr->set(ReplacementValue);
   };
 
+  // Reset the inner alloca insertion as it will be used for loading the values
+  // wrapped into pointers before passing them into the to-be-outlined region.
+  // Configure it to insert immediately after the fake use of zero address so
+  // that they are available in the generated body and so that the
+  // OpenMP-related values (thread ID and zero address pointers) remain leading
+  // in the argument list.
+  InnerAllocaIP = IRBuilder<>::InsertPoint(
+      ZeroAddrUse->getParent(), ZeroAddrUse->getNextNode()->getIterator());
+
+  // Reset the outer alloca insertion point to the entry of the relevant block
+  // in case it was invalidated.
+  OuterAllocaIP = IRBuilder<>::InsertPoint(
+      OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt());
+
   for (Value *Input : Inputs) {
     LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
     PrivHelper(*Input);

diff  --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 0ef73eef429b..f1c953afca25 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -631,9 +631,9 @@ struct OpenMPOpt {
       EndBB->getTerminator()->setSuccessor(0, CGEndBB);
     };
 
-    auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
-                      Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
-      ReplacementValue = &VPtr;
+    auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
+                      Value &Inner, Value *&ReplacementValue) -> InsertPointTy {
+      ReplacementValue = &Inner;
       return CodeGenIP;
     };
 

diff  --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index badaea69b5cc..2d86b924e4b0 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -60,6 +60,25 @@ class OpenMPIRBuilderTest : public testing::Test {
   DebugLoc DL;
 };
 
+// Returns the value stored in the given allocation. Returns null if the given
+// value is not a result of an allocation, if no value is stored or if there is
+// more than one store.
+static Value *findStoredValue(Value *AllocaValue) {
+  Instruction *Alloca = dyn_cast<AllocaInst>(AllocaValue);
+  if (!Alloca)
+    return nullptr;
+  StoreInst *Store = nullptr;
+  for (Use &U : Alloca->uses()) {
+    if (auto *CandidateStore = dyn_cast<StoreInst>(U.getUser())) {
+      EXPECT_EQ(Store, nullptr);
+      Store = CandidateStore;
+    }
+  }
+  if (!Store)
+    return nullptr;
+  return Store->getValueOperand();
+};
+
 TEST_F(OpenMPIRBuilderTest, CreateBarrier) {
   OpenMPIRBuilder OMPBuilder(*M);
   OMPBuilder.initialize();
@@ -341,20 +360,25 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
   };
 
   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
-                    Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
+                    Value &Orig, Value &Inner,
+                    Value *&ReplacementValue) -> InsertPointTy {
     ++NumPrivatizedVars;
 
-    if (!isa<AllocaInst>(VPtr)) {
-      EXPECT_EQ(&VPtr, F->arg_begin());
-      ReplacementValue = &VPtr;
+    if (!isa<AllocaInst>(Orig)) {
+      EXPECT_EQ(&Orig, F->arg_begin());
+      ReplacementValue = &Inner;
       return CodeGenIP;
     }
 
+    // Since the original value is an allocation, it has a pointer type and
+    // therefore no additional wrapping should happen.
+    EXPECT_EQ(&Orig, &Inner);
+
     // Trivial copy (=firstprivate).
     Builder.restoreIP(AllocaIP);
-    Type *VTy = VPtr.getType()->getPointerElementType();
-    Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
-    ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
+    Type *VTy = Inner.getType()->getPointerElementType();
+    Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
+    ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
     Builder.restoreIP(CodeGenIP);
     Builder.CreateStore(V, ReplacementValue);
     return CodeGenIP;
@@ -401,7 +425,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
   EXPECT_EQ(ForkCI->getArgOperand(1),
             ConstantInt::get(Type::getInt32Ty(Ctx), 1U));
   EXPECT_EQ(ForkCI->getArgOperand(2), Usr);
-  EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin());
+  EXPECT_EQ(findStoredValue(ForkCI->getArgOperand(3)), F->arg_begin());
 }
 
 TEST_F(OpenMPIRBuilderTest, ParallelNested) {
@@ -423,12 +447,13 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) {
   };
 
   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
-                    Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
+                    Value &Orig, Value &Inner,
+                    Value *&ReplacementValue) -> InsertPointTy {
     // Trivial copy (=firstprivate).
     Builder.restoreIP(AllocaIP);
-    Type *VTy = VPtr.getType()->getPointerElementType();
-    Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
-    ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
+    Type *VTy = Inner.getType()->getPointerElementType();
+    Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
+    ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
     Builder.restoreIP(CodeGenIP);
     Builder.CreateStore(V, ReplacementValue);
     return CodeGenIP;
@@ -515,12 +540,13 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
   };
 
   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
-                    Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
+                    Value &Orig, Value &Inner,
+                    Value *&ReplacementValue) -> InsertPointTy {
     // Trivial copy (=firstprivate).
     Builder.restoreIP(AllocaIP);
-    Type *VTy = VPtr.getType()->getPointerElementType();
-    Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
-    ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
+    Type *VTy = Inner.getType()->getPointerElementType();
+    Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
+    ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
     Builder.restoreIP(CodeGenIP);
     Builder.CreateStore(V, ReplacementValue);
     return CodeGenIP;
@@ -639,20 +665,25 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
   };
 
   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
-                    Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
+                    Value &Orig, Value &Inner,
+                    Value *&ReplacementValue) -> InsertPointTy {
     ++NumPrivatizedVars;
 
-    if (!isa<AllocaInst>(VPtr)) {
-      EXPECT_EQ(&VPtr, F->arg_begin());
-      ReplacementValue = &VPtr;
+    if (!isa<AllocaInst>(Orig)) {
+      EXPECT_EQ(&Orig, F->arg_begin());
+      ReplacementValue = &Inner;
       return CodeGenIP;
     }
 
+    // Since the original value is an allocation, it has a pointer type and
+    // therefore no additional wrapping should happen.
+    EXPECT_EQ(&Orig, &Inner);
+
     // Trivial copy (=firstprivate).
     Builder.restoreIP(AllocaIP);
-    Type *VTy = VPtr.getType()->getPointerElementType();
-    Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
-    ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
+    Type *VTy = Inner.getType()->getPointerElementType();
+    Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
+    ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
     Builder.restoreIP(CodeGenIP);
     Builder.CreateStore(V, ReplacementValue);
     return CodeGenIP;
@@ -708,13 +739,15 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
   EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
   EXPECT_EQ(ForkCI->getArgOperand(1),
             ConstantInt::get(Type::getInt32Ty(Ctx), 1));
-  EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin());
+  Value *StoredForkArg = findStoredValue(ForkCI->getArgOperand(3));
+  EXPECT_EQ(StoredForkArg, F->arg_begin());
 
   EXPECT_EQ(DirectCI->getCalledFunction(), OutlinedFn);
   EXPECT_EQ(DirectCI->getNumArgOperands(), 3U);
   EXPECT_TRUE(isa<AllocaInst>(DirectCI->getArgOperand(0)));
   EXPECT_TRUE(isa<AllocaInst>(DirectCI->getArgOperand(1)));
-  EXPECT_EQ(DirectCI->getArgOperand(2), F->arg_begin());
+  Value *StoredDirectArg = findStoredValue(DirectCI->getArgOperand(2));
+  EXPECT_EQ(StoredDirectArg, F->arg_begin());
 }
 
 TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
@@ -772,7 +805,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
     ASSERT_EQ(CBFn->user_back()->getNumUses(), 0U);
   };
 
-  auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V,
+  auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V, Value &,
                     Value *&) -> InsertPointTy {
     ++NumPrivatizedVars;
     llvm_unreachable("No privatization callback call expected!");
@@ -829,6 +862,85 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
   }
 }
 
+TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) {
+  OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.initialize();
+  F->setName("func");
+  IRBuilder<> Builder(BB);
+  OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+
+  Type *I32Ty = Type::getInt32Ty(M->getContext());
+  Type *I32PtrTy = Type::getInt32PtrTy(M->getContext());
+  Type *StructTy = StructType::get(I32Ty, I32PtrTy);
+  Type *StructPtrTy = StructTy->getPointerTo();
+  Type *VoidTy = Type::getVoidTy(M->getContext());
+  FunctionCallee RetI32Func = M->getOrInsertFunction("ret_i32", I32Ty);
+  FunctionCallee TakeI32Func =
+      M->getOrInsertFunction("take_i32", VoidTy, I32Ty);
+  FunctionCallee RetI32PtrFunc = M->getOrInsertFunction("ret_i32ptr", I32PtrTy);
+  FunctionCallee TakeI32PtrFunc =
+      M->getOrInsertFunction("take_i32ptr", VoidTy, I32PtrTy);
+  FunctionCallee RetStructFunc = M->getOrInsertFunction("ret_struct", StructTy);
+  FunctionCallee TakeStructFunc =
+      M->getOrInsertFunction("take_struct", VoidTy, StructTy);
+  FunctionCallee RetStructPtrFunc =
+      M->getOrInsertFunction("ret_structptr", StructPtrTy);
+  FunctionCallee TakeStructPtrFunc =
+      M->getOrInsertFunction("take_structPtr", VoidTy, StructPtrTy);
+  Value *I32Val = Builder.CreateCall(RetI32Func);
+  Value *I32PtrVal = Builder.CreateCall(RetI32PtrFunc);
+  Value *StructVal = Builder.CreateCall(RetStructFunc);
+  Value *StructPtrVal = Builder.CreateCall(RetStructPtrFunc);
+
+  Instruction *Internal;
+  auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                       BasicBlock &ContinuationBB) {
+    IRBuilder<>::InsertPointGuard Guard(Builder);
+    Builder.restoreIP(CodeGenIP);
+    Internal = Builder.CreateCall(TakeI32Func, I32Val);
+    Builder.CreateCall(TakeI32PtrFunc, I32PtrVal);
+    Builder.CreateCall(TakeStructFunc, StructVal);
+    Builder.CreateCall(TakeStructPtrFunc, StructPtrVal);
+  };
+  auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
+                    Value &Inner, Value *&ReplacementValue) {
+    ReplacementValue = &Inner;
+    return CodeGenIP;
+  };
+  auto FiniCB = [](InsertPointTy) {};
+
+  IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
+                                    F->getEntryBlock().getFirstInsertionPt());
+  IRBuilder<>::InsertPoint AfterIP =
+      OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
+                                nullptr, nullptr, OMP_PROC_BIND_default, false);
+  Builder.restoreIP(AfterIP);
+  Builder.CreateRetVoid();
+
+  OMPBuilder.finalize();
+
+  EXPECT_FALSE(verifyModule(*M, &errs()));
+  Function *OutlinedFn = Internal->getFunction();
+
+  Type *Arg2Type = OutlinedFn->getArg(2)->getType();
+  EXPECT_TRUE(Arg2Type->isPointerTy());
+  EXPECT_EQ(Arg2Type->getPointerElementType(), I32Ty);
+
+  // Arguments that need to be passed through pointers and reloaded will get
+  // used earlier in the functions and therefore will appear first in the
+  // argument list after outlining.
+  Type *Arg3Type = OutlinedFn->getArg(3)->getType();
+  EXPECT_TRUE(Arg3Type->isPointerTy());
+  EXPECT_EQ(Arg3Type->getPointerElementType(), StructTy);
+
+  Type *Arg4Type = OutlinedFn->getArg(4)->getType();
+  EXPECT_EQ(Arg4Type, I32PtrTy);
+
+  Type *Arg5Type = OutlinedFn->getArg(5)->getType();
+  EXPECT_EQ(Arg5Type, StructPtrTy);
+}
+
 TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) {
   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
   OpenMPIRBuilder OMPBuilder(*M);

diff  --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index 81dfee1d52cc..8553c5bd851f 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -433,7 +433,7 @@ ModuleTranslation::convertOmpParallel(Operation &opInst,
   // attribute (shared, private, firstprivate, ...) of variables.
   // Currently defaults to shared.
   auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
-                    llvm::Value &vPtr,
+                    llvm::Value &, llvm::Value &vPtr,
                     llvm::Value *&replacementValue) -> InsertPointTy {
     replacementValue = &vPtr;
 


        


More information about the Mlir-commits mailing list