[clang] [llvm] [Clang][OpenMP] Port clang codegen code for openmp porject (PR #85795)

via cfe-commits cfe-commits at lists.llvm.org
Tue Mar 19 07:24:44 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-codegen

@llvm/pr-subscribers-flang-openmp

Author: Harrison (TSWorld1314)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/85795.diff


5 Files Affected:

- (modified) clang/include/clang/Basic/LangOptions.def (+1) 
- (modified) clang/lib/CodeGen/CGBuilder.h (+9) 
- (modified) clang/lib/CodeGen/CGDecl.cpp (+108-110) 
- (modified) clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp (+29-16) 
- (modified) llvm/include/llvm/Frontend/OpenMP/OMPKinds.def (+9) 


``````````diff
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 8ef6700ecdc78e..64b87ecdc97524 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -260,6 +260,7 @@ LANGOPT(OpenMPTargetDebug , 32, 0, "Enable debugging in the OpenMP offloading de
 LANGOPT(OpenMPOptimisticCollapse  , 1, 0, "Use at most 32 bits to represent the collapsed loop nest counter.")
 LANGOPT(OpenMPThreadSubscription  , 1, 0, "Assume work-shared loops do not have more iterations than participating threads.")
 LANGOPT(OpenMPTeamSubscription  , 1, 0, "Assume distributed loops do not have more iterations than participating teams.")
+LANGOPT(OpenMPGlobalizeToGlobalSpace  , 1, 0, "Globalize to global space for the globalized variables")
 LANGOPT(OpenMPNoThreadState  , 1, 0, "Assume that no thread in a parallel region will modify an ICV.")
 LANGOPT(OpenMPNoNestedParallelism  , 1, 0, "Assume that no thread in a parallel region will encounter a parallel region")
 LANGOPT(OpenMPOffloadMandatory  , 1, 0, "Assert that offloading is mandatory and do not create a host fallback.")
diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h
index bf5ab171d720d9..fe5beff05134ac 100644
--- a/clang/lib/CodeGen/CGBuilder.h
+++ b/clang/lib/CodeGen/CGBuilder.h
@@ -152,6 +152,15 @@ class CGBuilderTy : public CGBuilderBaseTy {
                             Addr.isKnownNonNull());
   }
 
+  /// Cast the element type of the given address to a different type,
+  /// preserving information like the alignment and address space.
+  Address CreateElementBitCast(Address Addr, llvm::Type *Ty,
+                               const llvm::Twine &Name = "") {
+    auto *PtrTy = Ty->getPointerTo(Addr.getAddressSpace());
+    return Address(CreateBitCast(Addr.getPointer(), PtrTy, Name), Ty,
+                   Addr.getAlignment(), Addr.isKnownNonNull());
+  }
+  
   using CGBuilderBaseTy::CreatePointerBitCastOrAddrSpaceCast;
   Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty,
                                               llvm::Type *ElementTy,
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index dc42faf8dbb9fd..691af33dc239d6 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -2531,48 +2531,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
         (IPD->getParameterKind() == ImplicitParamKind::ThreadPrivateVar);
   }
 
-  Address DeclPtr = Address::invalid();
-  Address AllocaPtr = Address::invalid();
-  bool DoStore = false;
-  bool IsScalar = hasScalarEvaluationKind(Ty);
-  bool UseIndirectDebugAddress = false;
-
-  // If we already have a pointer to the argument, reuse the input pointer.
-  if (Arg.isIndirect()) {
-    DeclPtr = Arg.getIndirectAddress();
-    DeclPtr = DeclPtr.withElementType(ConvertTypeForMem(Ty));
-    // Indirect argument is in alloca address space, which may be different
-    // from the default address space.
-    auto AllocaAS = CGM.getASTAllocaAddressSpace();
-    auto *V = DeclPtr.getPointer();
-    AllocaPtr = DeclPtr;
-
-    // For truly ABI indirect arguments -- those that are not `byval` -- store
-    // the address of the argument on the stack to preserve debug information.
-    ABIArgInfo ArgInfo = CurFnInfo->arguments()[ArgNo - 1].info;
-    if (ArgInfo.isIndirect())
-      UseIndirectDebugAddress = !ArgInfo.getIndirectByVal();
-    if (UseIndirectDebugAddress) {
-      auto PtrTy = getContext().getPointerType(Ty);
-      AllocaPtr = CreateMemTemp(PtrTy, getContext().getTypeAlignInChars(PtrTy),
-                                D.getName() + ".indirect_addr");
-      EmitStoreOfScalar(V, AllocaPtr, /* Volatile */ false, PtrTy);
-    }
-
-    auto SrcLangAS = getLangOpts().OpenCL ? LangAS::opencl_private : AllocaAS;
-    auto DestLangAS =
-        getLangOpts().OpenCL ? LangAS::opencl_private : LangAS::Default;
-    if (SrcLangAS != DestLangAS) {
-      assert(getContext().getTargetAddressSpace(SrcLangAS) ==
-             CGM.getDataLayout().getAllocaAddrSpace());
-      auto DestAS = getContext().getTargetAddressSpace(DestLangAS);
-      auto *T = llvm::PointerType::get(getLLVMContext(), DestAS);
-      DeclPtr =
-          DeclPtr.withPointer(getTargetHooks().performAddrSpaceCast(
-                                  *this, V, SrcLangAS, DestLangAS, T, true),
-                              DeclPtr.isKnownNonNull());
-    }
-
+  auto PushCleanupIfNeeded = [this, Ty, &D](Address DeclPtr) {
     // Push a destructor cleanup for this parameter if the ABI requires it.
     // Don't push a cleanup in a thunk for a method that will also emit a
     // cleanup.
@@ -2588,87 +2547,126 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
             EHStack.stable_begin();
       }
     }
+  };
+
+  Address DeclPtr = Address::invalid();
+  Address AllocaPtr = Address::invalid();
+  Address OpenMPLocalAddr =
+      getLangOpts().OpenMP
+          ? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D)
+          : Address::invalid();
+  bool DoStore = false;
+  bool IsScalar = hasScalarEvaluationKind(Ty);
+  bool UseIndirectDebugAddress = false;
+  if (OpenMPLocalAddr.isValid()) {
+    DeclPtr = OpenMPLocalAddr;
+    AllocaPtr = DeclPtr;
+    LValue Dst = MakeAddrLValue(DeclPtr, Ty);
+    if (Arg.isIndirect()) {
+      LValue Src = MakeAddrLValue(Arg.getIndirectAddress(), Ty);
+      callCStructCopyConstructor(Dst, Src);
+      PushCleanupIfNeeded(Arg.getIndirectAddress());
+    } else {
+      EmitStoreOfScalar(Arg.getDirectValue(), Dst, /* isInitialization */ true);
+    }
   } else {
-    // Check if the parameter address is controlled by OpenMP runtime.
-    Address OpenMPLocalAddr =
-        getLangOpts().OpenMP
-            ? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D)
-            : Address::invalid();
-    if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) {
-      DeclPtr = OpenMPLocalAddr;
+    // If we already have a pointer to the argument, reuse the input pointer.
+    if (Arg.isIndirect()) {
+      // If we have a prettier pointer type at this point, bitcast to that.
+      DeclPtr = Arg.getIndirectAddress();
+      DeclPtr = Builder.CreateElementBitCast(DeclPtr, ConvertTypeForMem(Ty),
+                                             D.getName());
+      // Indirect argument is in alloca address space, which may be different
+      // from the default address space.
+      auto AllocaAS = CGM.getASTAllocaAddressSpace();
+      auto *V = DeclPtr.getPointer();
       AllocaPtr = DeclPtr;
+      auto SrcLangAS = getLangOpts().OpenCL ? LangAS::opencl_private : AllocaAS;
+      auto DestLangAS =
+          getLangOpts().OpenCL ? LangAS::opencl_private : LangAS::Default;
+      if (SrcLangAS != DestLangAS) {
+        assert(getContext().getTargetAddressSpace(SrcLangAS) ==
+               CGM.getDataLayout().getAllocaAddrSpace());
+        auto DestAS = getContext().getTargetAddressSpace(DestLangAS);
+        auto *T = DeclPtr.getElementType()->getPointerTo(DestAS);
+        DeclPtr =
+            DeclPtr.withPointer(getTargetHooks().performAddrSpaceCast(
+                                    *this, V, SrcLangAS, DestLangAS, T, true),
+                                DeclPtr.isKnownNonNull());
+      }
+      PushCleanupIfNeeded(DeclPtr);
     } else {
-      // Otherwise, create a temporary to hold the value.
+      // Create a temporary to hold the value.
       DeclPtr = CreateMemTemp(Ty, getContext().getDeclAlign(&D),
                               D.getName() + ".addr", &AllocaPtr);
+      DoStore = true;
     }
-    DoStore = true;
-  }
-
-  llvm::Value *ArgVal = (DoStore ? Arg.getDirectValue() : nullptr);
-
-  LValue lv = MakeAddrLValue(DeclPtr, Ty);
-  if (IsScalar) {
-    Qualifiers qs = Ty.getQualifiers();
-    if (Qualifiers::ObjCLifetime lt = qs.getObjCLifetime()) {
-      // We honor __attribute__((ns_consumed)) for types with lifetime.
-      // For __strong, it's handled by just skipping the initial retain;
-      // otherwise we have to balance out the initial +1 with an extra
-      // cleanup to do the release at the end of the function.
-      bool isConsumed = D.hasAttr<NSConsumedAttr>();
-
-      // If a parameter is pseudo-strong then we can omit the implicit retain.
-      if (D.isARCPseudoStrong()) {
-        assert(lt == Qualifiers::OCL_Strong &&
-               "pseudo-strong variable isn't strong?");
-        assert(qs.hasConst() && "pseudo-strong variable should be const!");
-        lt = Qualifiers::OCL_ExplicitNone;
-      }
 
-      // Load objects passed indirectly.
-      if (Arg.isIndirect() && !ArgVal)
-        ArgVal = Builder.CreateLoad(DeclPtr);
-
-      if (lt == Qualifiers::OCL_Strong) {
-        if (!isConsumed) {
-          if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
-            // use objc_storeStrong(&dest, value) for retaining the
-            // object. But first, store a null into 'dest' because
-            // objc_storeStrong attempts to release its old value.
-            llvm::Value *Null = CGM.EmitNullConstant(D.getType());
-            EmitStoreOfScalar(Null, lv, /* isInitialization */ true);
-            EmitARCStoreStrongCall(lv.getAddress(*this), ArgVal, true);
-            DoStore = false;
-          }
-          else
-          // Don't use objc_retainBlock for block pointers, because we
-          // don't want to Block_copy something just because we got it
-          // as a parameter.
-            ArgVal = EmitARCRetainNonBlock(ArgVal);
-        }
-      } else {
-        // Push the cleanup for a consumed parameter.
-        if (isConsumed) {
-          ARCPreciseLifetime_t precise = (D.hasAttr<ObjCPreciseLifetimeAttr>()
-                                ? ARCPreciseLifetime : ARCImpreciseLifetime);
-          EHStack.pushCleanup<ConsumeARCParameter>(getARCCleanupKind(), ArgVal,
-                                                   precise);
+    llvm::Value *ArgVal = (DoStore ? Arg.getDirectValue() : nullptr);
+
+    LValue lv = MakeAddrLValue(DeclPtr, Ty);
+    if (IsScalar) {
+      Qualifiers qs = Ty.getQualifiers();
+      if (Qualifiers::ObjCLifetime lt = qs.getObjCLifetime()) {
+        // We honor __attribute__((ns_consumed)) for types with lifetime.
+        // For __strong, it's handled by just skipping the initial retain;
+        // otherwise we have to balance out the initial +1 with an extra
+        // cleanup to do the release at the end of the function.
+        bool isConsumed = D.hasAttr<NSConsumedAttr>();
+
+        // If a parameter is pseudo-strong then we can omit the implicit retain.
+        if (D.isARCPseudoStrong()) {
+          assert(lt == Qualifiers::OCL_Strong &&
+                 "pseudo-strong variable isn't strong?");
+          assert(qs.hasConst() && "pseudo-strong variable should be const!");
+          lt = Qualifiers::OCL_ExplicitNone;
         }
 
-        if (lt == Qualifiers::OCL_Weak) {
-          EmitARCInitWeak(DeclPtr, ArgVal);
-          DoStore = false; // The weak init is a store, no need to do two.
+        // Load objects passed indirectly.
+        if (Arg.isIndirect() && !ArgVal)
+          ArgVal = Builder.CreateLoad(DeclPtr);
+
+        if (lt == Qualifiers::OCL_Strong) {
+          if (!isConsumed) {
+            if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
+              // use objc_storeStrong(&dest, value) for retaining the
+              // object. But first, store a null into 'dest' because
+              // objc_storeStrong attempts to release its old value.
+              llvm::Value *Null = CGM.EmitNullConstant(D.getType());
+              EmitStoreOfScalar(Null, lv, /* isInitialization */ true);
+              EmitARCStoreStrongCall(lv.getAddress(*this), ArgVal, true);
+              DoStore = false;
+            } else
+              // Don't use objc_retainBlock for block pointers, because we
+              // don't want to Block_copy something just because we got it
+              // as a parameter.
+              ArgVal = EmitARCRetainNonBlock(ArgVal);
+          }
+        } else {
+          // Push the cleanup for a consumed parameter.
+          if (isConsumed) {
+            ARCPreciseLifetime_t precise =
+                (D.hasAttr<ObjCPreciseLifetimeAttr>() ? ARCPreciseLifetime
+                                                      : ARCImpreciseLifetime);
+            EHStack.pushCleanup<ConsumeARCParameter>(getARCCleanupKind(),
+                                                     ArgVal, precise);
+          }
+
+          if (lt == Qualifiers::OCL_Weak) {
+            EmitARCInitWeak(DeclPtr, ArgVal);
+            DoStore = false; // The weak init is a store, no need to do two.
+          }
         }
-      }
 
-      // Enter the cleanup scope.
-      EmitAutoVarWithLifetime(*this, D, DeclPtr, lt);
+        // Enter the cleanup scope.
+        EmitAutoVarWithLifetime(*this, D, DeclPtr, lt);
+      }
     }
-  }
 
-  // Store the initial value into the alloca.
-  if (DoStore)
-    EmitStoreOfScalar(ArgVal, lv, /* isInitialization */ true);
+    // Store the initial value into the alloca.
+    if (DoStore)
+      EmitStoreOfScalar(ArgVal, lv, /* isInitialization */ true);
+  }
 
   setAddrOfLocalVar(&D, DeclPtr);
 
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 299ee1460b3db0..8f0c7caa2f3b4b 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -1083,10 +1083,12 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF,
 
     // Allocate space for the variable to be globalized
     llvm::Value *AllocArgs[] = {CGF.getTypeSize(VD->getType())};
-    llvm::CallBase *VoidPtr =
-        CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
-                                CGM.getModule(), OMPRTL___kmpc_alloc_shared),
-                            AllocArgs, VD->getName());
+    llvm::CallBase *VoidPtr = CGF.EmitRuntimeCall(
+        OMPBuilder.getOrCreateRuntimeFunction(
+            CGM.getModule(), CGM.getLangOpts().OpenMPGlobalizeToGlobalSpace
+                                 ? OMPRTL_malloc
+                                 : OMPRTL___kmpc_alloc_shared),
+        AllocArgs, VD->getName());
     // FIXME: We should use the variables actual alignment as an argument.
     VoidPtr->addRetAttr(llvm::Attribute::get(
         CGM.getLLVMContext(), llvm::Attribute::Alignment,
@@ -1149,10 +1151,12 @@ CGOpenMPRuntimeGPU::getKmpcAllocShared(CodeGenFunction &CGF,
 
   // Allocate space for this VLA object to be globalized.
   llvm::Value *AllocArgs[] = {Size};
-  llvm::CallBase *VoidPtr =
-      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
-                              CGM.getModule(), OMPRTL___kmpc_alloc_shared),
-                          AllocArgs, VD->getName());
+  llvm::CallBase *VoidPtr = CGF.EmitRuntimeCall(
+        OMPBuilder.getOrCreateRuntimeFunction(
+            CGM.getModule(), CGM.getLangOpts().OpenMPGlobalizeToGlobalSpace
+                                 ? OMPRTL_malloc
+                                 : OMPRTL___kmpc_alloc_shared),
+        AllocArgs, VD->getName());
   VoidPtr->addRetAttr(llvm::Attribute::get(
       CGM.getLLVMContext(), llvm::Attribute::Alignment, Align.getQuantity()));
 
@@ -1178,20 +1182,29 @@ void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF) {
     // globalized in the prolog (i.e. emitGenericVarsProlog).
     for (const auto &AddrSizePair :
          llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) {
-      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
-                              CGM.getModule(), OMPRTL___kmpc_free_shared),
-                          {AddrSizePair.first, AddrSizePair.second});
+      if (CGM.getLangOpts().OpenMPGlobalizeToGlobalSpace)
+        CGF.EmitRuntimeCall(
+            OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), OMPRTL_free),
+            {AddrSizePair.first});
+      else
+        CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                                CGM.getModule(), OMPRTL___kmpc_free_shared),
+                            {AddrSizePair.first, AddrSizePair.second});
     }
     // Deallocate the memory for each globalized value
     for (auto &Rec : llvm::reverse(I->getSecond().LocalVarData)) {
       const auto *VD = cast<VarDecl>(Rec.first);
       I->getSecond().MappedParams->restore(CGF);
 
-      llvm::Value *FreeArgs[] = {Rec.second.GlobalizedVal,
-                                 CGF.getTypeSize(VD->getType())};
-      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
-                              CGM.getModule(), OMPRTL___kmpc_free_shared),
-                          FreeArgs);
+      if (CGM.getLangOpts().OpenMPGlobalizeToGlobalSpace)
+        CGF.EmitRuntimeCall(
+            OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), OMPRTL_free),
+            {Rec.second.GlobalizedVal});
+      else
+        CGF.EmitRuntimeCall(
+            OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
+                                                  OMPRTL___kmpc_free_shared),
+            {Rec.second.GlobalizedVal, CGF.getTypeSize(VD->getType())});
     }
   }
 }
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index d22d2a8e948b00..90d5d197396749 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -227,7 +227,9 @@ __OMP_RTL(__kmpc_get_hardware_num_threads_in_block, false, Int32, )
 __OMP_RTL(__kmpc_get_warp_size, false, Int32, )
 
 __OMP_RTL(omp_get_thread_num, false, Int32, )
+__OMP_RTL(omp_get_bulk_thread_num, false, Int32, )
 __OMP_RTL(omp_get_num_threads, false, Int32, )
+__OMP_RTL(omp_get_bulk_num_threads, false, Int32, )
 __OMP_RTL(omp_get_max_threads, false, Int32, )
 __OMP_RTL(omp_in_parallel, false, Int32, )
 __OMP_RTL(omp_get_dynamic, false, Int32, )
@@ -490,6 +492,8 @@ __OMP_RTL(__kmpc_reduction_get_fixed_buffer, false, VoidPtr, )
 
 __OMP_RTL(__kmpc_shuffle_int64, false, Int64, Int64, Int16, Int16)
 
+__OMP_RTL(malloc, false, VoidPtr, SizeTy)
+__OMP_RTL(free, false, Void, VoidPtr)
 __OMP_RTL(__kmpc_alloc_shared, false, VoidPtr, SizeTy)
 __OMP_RTL(__kmpc_free_shared, false, Void, VoidPtr, SizeTy)
 __OMP_RTL(__kmpc_begin_sharing_variables, false, Void, VoidPtrPtrPtr, SizeTy)
@@ -503,6 +507,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32)
 __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,)
 __OMP_RTL(__kmpc_syncwarp, false, Void, Int64)
 
+__OMP_RTL(__kmpc_launch_parallel_51_kernel, false, Void, Int8Ptr, Int32, Int32,
+          Int32, VoidPtrPtr, Int64)
+
 __OMP_RTL(__last, false, Void, )
 
 #undef __OMP_RTL
@@ -710,6 +717,8 @@ __OMP_RTL_ATTRS(__kmpc_get_warp_size, GetterAttrs, ZExt, ParamAttrs())
 
 __OMP_RTL_ATTRS(omp_get_thread_num, GetterAttrs, SExt, ParamAttrs())
 __OMP_RTL_ATTRS(omp_get_num_threads, GetterAttrs, SExt, ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_bulk_thread_num, GetterAttrs, SExt, ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_bulk_num_threads, GetterAttrs, SExt, ParamAttrs())
 __OMP_RTL_ATTRS(omp_get_max_threads, GetterAttrs, SExt, ParamAttrs())
 __OMP_RTL_ATTRS(omp_in_parallel, GetterAttrs, SExt, ParamAttrs())
 __OMP_RTL_ATTRS(omp_get_dynamic, GetterAttrs, SExt, ParamAttrs())

``````````

</details>


https://github.com/llvm/llvm-project/pull/85795


More information about the cfe-commits mailing list