[clang] 2c8b912 - Revert "[PGO][OpenMP] Instrumentation for GPU devices (#76587)"

Ethan Luis McDonough via cfe-commits cfe-commits at lists.llvm.org
Fri Jun 28 10:31:24 PDT 2024


Author: Ethan Luis McDonough
Date: 2024-06-28T12:30:45-05:00
New Revision: 2c8b912f630f9ec647a4870b9c5ee922c2ec1298

URL: https://github.com/llvm/llvm-project/commit/2c8b912f630f9ec647a4870b9c5ee922c2ec1298
DIFF: https://github.com/llvm/llvm-project/commit/2c8b912f630f9ec647a4870b9c5ee922c2ec1298.diff

LOG: Revert "[PGO][OpenMP] Instrumentation for GPU devices (#76587)"

This reverts commit 5fd2af38e461445c583d7ffc2fe23858966eee76. It caused build issues and broke the buildbot.

Added: 
    

Modified: 
    clang/lib/CodeGen/CodeGenPGO.cpp
    llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
    llvm/include/llvm/ProfileData/InstrProf.h
    llvm/lib/ProfileData/InstrProf.cpp
    llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
    llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
    offload/DeviceRTL/CMakeLists.txt
    offload/plugins-nextgen/common/include/GlobalHandler.h
    offload/plugins-nextgen/common/src/GlobalHandler.cpp
    offload/plugins-nextgen/common/src/PluginInterface.cpp
    offload/test/CMakeLists.txt
    offload/test/lit.cfg
    offload/test/lit.site.cfg.in

Removed: 
    offload/DeviceRTL/include/Profiling.h
    offload/DeviceRTL/src/Profiling.cpp
    offload/test/offloading/pgo1.c


################################################################################
diff  --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp
index a628082851938..ea726b5708a4a 100644
--- a/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -1193,15 +1193,10 @@ void CodeGenPGO::emitCounterSetOrIncrement(CGBuilderTy &Builder, const Stmt *S,
 
   unsigned Counter = (*RegionCounterMap)[S];
 
-  // Make sure that pointer to global is passed in with zero addrspace
-  // This is relevant during GPU profiling
-  auto *NormalizedFuncNameVarPtr =
-      llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
-          FuncNameVar, llvm::PointerType::get(CGM.getLLVMContext(), 0));
-
-  llvm::Value *Args[] = {
-      NormalizedFuncNameVarPtr, Builder.getInt64(FunctionHash),
-      Builder.getInt32(NumRegionCounters), Builder.getInt32(Counter), StepV};
+  llvm::Value *Args[] = {FuncNameVar,
+                         Builder.getInt64(FunctionHash),
+                         Builder.getInt32(NumRegionCounters),
+                         Builder.getInt32(Counter), StepV};
 
   if (llvm::EnableSingleByteCoverage)
     Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_cover),

diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 51e97458825ac..fe09bb8177c28 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -502,9 +502,6 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32)
 __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,)
 __OMP_RTL(__kmpc_syncwarp, false, Void, Int64)
 
-__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr)
-__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64)
-
 __OMP_RTL(__last, false, Void, )
 
 #undef __OMP_RTL

diff  --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 818a34bfddfb9..7fa6d44990a14 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -177,10 +177,6 @@ inline StringRef getInstrProfCounterBiasVarName() {
 /// Return the marker used to separate PGO names during serialization.
 inline StringRef getInstrProfNameSeparator() { return "\01"; }
 
-/// Determines whether module targets a GPU eligable for PGO
-/// instrumentation
-bool isGPUProfTarget(const Module &M);
-
 /// Please use getIRPGOFuncName for LLVM IR instrumentation. This function is
 /// for front-end (Clang, etc) instrumentation.
 /// Return the modified name for function \c F suitable to be

diff  --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 1b9a5249cbae5..c7749f33d9af5 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -432,31 +432,13 @@ std::string getPGOFuncNameVarName(StringRef FuncName,
   return VarName;
 }
 
-bool isGPUProfTarget(const Module &M) {
-  const auto &T = Triple(M.getTargetTriple());
-  return T.isAMDGPU() || T.isNVPTX();
-}
-
-void setPGOFuncVisibility(Module &M, GlobalVariable *FuncNameVar) {
-  // If the target is a GPU, make the symbol protected so it can
-  // be read from the host device
-  if (isGPUProfTarget(M))
-    FuncNameVar->setVisibility(GlobalValue::ProtectedVisibility);
-  // Hide the symbol so that we correctly get a copy for each executable.
-  else if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
-    FuncNameVar->setVisibility(GlobalValue::HiddenVisibility);
-}
-
 GlobalVariable *createPGOFuncNameVar(Module &M,
                                      GlobalValue::LinkageTypes Linkage,
                                      StringRef PGOFuncName) {
-  // Ensure profiling variables on GPU are visible to be read from host
-  if (isGPUProfTarget(M))
-    Linkage = GlobalValue::ExternalLinkage;
   // We generally want to match the function's linkage, but available_externally
   // and extern_weak both have the wrong semantics, and anything that doesn't
   // need to link across compilation units doesn't need to be visible at all.
-  else if (Linkage == GlobalValue::ExternalWeakLinkage)
+  if (Linkage == GlobalValue::ExternalWeakLinkage)
     Linkage = GlobalValue::LinkOnceAnyLinkage;
   else if (Linkage == GlobalValue::AvailableExternallyLinkage)
     Linkage = GlobalValue::LinkOnceODRLinkage;
@@ -470,7 +452,10 @@ GlobalVariable *createPGOFuncNameVar(Module &M,
       new GlobalVariable(M, Value->getType(), true, Linkage, Value,
                          getPGOFuncNameVarName(PGOFuncName, Linkage));
 
-  setPGOFuncVisibility(M, FuncNameVar);
+  // Hide the symbol so that we correctly get a copy for each executable.
+  if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
+    FuncNameVar->setVisibility(GlobalValue::HiddenVisibility);
+
   return FuncNameVar;
 }
 

diff  --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 7caf71bd11713..f994f8a62c320 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -879,8 +879,6 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
                       llvm::InstrProfValueKind::IPVK_MemOPSize);
   CallInst *Call = nullptr;
   auto *TLI = &GetTLI(*Ind->getFunction());
-  auto *NormalizedDataVarPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
-      DataVar, PointerType::get(M.getContext(), 0));
 
   // To support value profiling calls within Windows exception handlers, funclet
   // information contained within operand bundles needs to be copied over to
@@ -889,13 +887,11 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
   SmallVector<OperandBundleDef, 1> OpBundles;
   Ind->getOperandBundlesAsDefs(OpBundles);
   if (!IsMemOpSize) {
-    Value *Args[3] = {Ind->getTargetValue(), NormalizedDataVarPtr,
-                      Builder.getInt32(Index)};
+    Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)};
     Call = Builder.CreateCall(getOrInsertValueProfilingCall(M, *TLI), Args,
                               OpBundles);
   } else {
-    Value *Args[3] = {Ind->getTargetValue(), NormalizedDataVarPtr,
-                      Builder.getInt32(Index)};
+    Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)};
     Call = Builder.CreateCall(
         getOrInsertValueProfilingCall(M, *TLI, ValueProfilingCallType::MemOp),
         Args, OpBundles);
@@ -1620,8 +1616,7 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
         getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
     ValuesVar->setAlignment(Align(8));
     maybeSetComdat(ValuesVar, Fn, CntsVarName);
-    ValuesPtrExpr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
-        ValuesVar, PointerType::get(Fn->getContext(), 0));
+    ValuesPtrExpr = ValuesVar;
   }
 
   uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
@@ -1645,10 +1640,6 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
   for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
     Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
 
-  if (isGPUProfTarget(M)) {
-    Linkage = GlobalValue::ExternalLinkage;
-    Visibility = GlobalValue::ProtectedVisibility;
-  }
   // If the data variable is not referenced by code (if we don't emit
   // @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
   // data variable live under linker GC, the data variable can be private. This
@@ -1660,9 +1651,9 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
   // If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees
   // that other copies must have the same CFG and cannot have value profiling.
   // If no hash suffix, other profd copies may be referenced by code.
-  else if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
-           (TT.isOSBinFormatELF() ||
-            (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
+  if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
+      (TT.isOSBinFormatELF() ||
+       (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
     Linkage = GlobalValue::PrivateLinkage;
     Visibility = GlobalValue::DefaultVisibility;
   }
@@ -1785,13 +1776,6 @@ void InstrLowerer::emitNameData() {
   NamesVar = new GlobalVariable(M, NamesVal->getType(), true,
                                 GlobalValue::PrivateLinkage, NamesVal,
                                 getInstrProfNamesVarName());
-
-  // Make names variable public if current target is a GPU
-  if (isGPUProfTarget(M)) {
-    NamesVar->setLinkage(GlobalValue::ExternalLinkage);
-    NamesVar->setVisibility(GlobalValue::VisibilityTypes::ProtectedVisibility);
-  }
-
   NamesSize = CompressedNameStr.size();
   setGlobalVariableLargeSection(TT, *NamesVar);
   NamesVar->setSection(
@@ -1858,13 +1842,10 @@ void InstrLowerer::emitRegistration() {
   IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", RegisterF));
   for (Value *Data : CompilerUsedVars)
     if (!isa<Function>(Data))
-      // Check for addrspace cast when profiling GPU
-      IRB.CreateCall(RuntimeRegisterF,
-                     IRB.CreatePointerBitCastOrAddrSpaceCast(Data, VoidPtrTy));
+      IRB.CreateCall(RuntimeRegisterF, Data);
   for (Value *Data : UsedVars)
     if (Data != NamesVar && !isa<Function>(Data))
-      IRB.CreateCall(RuntimeRegisterF,
-                     IRB.CreatePointerBitCastOrAddrSpaceCast(Data, VoidPtrTy));
+      IRB.CreateCall(RuntimeRegisterF, Data);
 
   if (NamesVar) {
     Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
@@ -1873,9 +1854,7 @@ void InstrLowerer::emitRegistration() {
     auto *NamesRegisterF =
         Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage,
                          getInstrProfNamesRegFuncName(), M);
-    IRB.CreateCall(NamesRegisterF, {IRB.CreatePointerBitCastOrAddrSpaceCast(
-                                        NamesVar, VoidPtrTy),
-                                    IRB.getInt64(NamesSize)});
+    IRB.CreateCall(NamesRegisterF, {NamesVar, IRB.getInt64(NamesSize)});
   }
 
   IRB.CreateRetVoid();
@@ -1896,10 +1875,7 @@ bool InstrLowerer::emitRuntimeHook() {
   auto *Var =
       new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
                          nullptr, getInstrProfRuntimeHookVarName());
-  if (isGPUProfTarget(M))
-    Var->setVisibility(GlobalValue::ProtectedVisibility);
-  else
-    Var->setVisibility(GlobalValue::HiddenVisibility);
+  Var->setVisibility(GlobalValue::HiddenVisibility);
 
   if (TT.isOSBinFormatELF() && !TT.isPS()) {
     // Mark the user variable as used so that it isn't stripped out.

diff  --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index fca78d769a1eb..572d37a2b3e55 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -883,10 +883,6 @@ static void instrumentOneFunc(
   auto Name = FuncInfo.FuncNameVar;
   auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
                                   FuncInfo.FunctionHash);
-  // Make sure that pointer to global is passed in with zero addrspace
-  // This is relevant during GPU profiling
-  auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
-      Name, PointerType::get(M->getContext(), 0));
   if (PGOFunctionEntryCoverage) {
     auto &EntryBB = F.getEntryBlock();
     IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
@@ -894,7 +890,7 @@ static void instrumentOneFunc(
     //                      i32 <index>)
     Builder.CreateCall(
         Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
-        {NormalizedNamePtr, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
+        {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
     return;
   }
 
@@ -949,8 +945,7 @@ static void instrumentOneFunc(
     //                          i32 <index>)
     Builder.CreateCall(
         Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp),
-        {NormalizedNamePtr, CFGHash, Builder.getInt32(NumCounters),
-         Builder.getInt32(I)});
+        {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)});
     I += PGOBlockCoverage ? 8 : 1;
   }
 
@@ -964,8 +959,7 @@ static void instrumentOneFunc(
         Intrinsic::getDeclaration(M, PGOBlockCoverage
                                          ? Intrinsic::instrprof_cover
                                          : Intrinsic::instrprof_increment),
-        {NormalizedNamePtr, CFGHash, Builder.getInt32(NumCounters),
-         Builder.getInt32(I++)});
+        {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
   }
 
   // Now instrument select instructions:
@@ -1008,14 +1002,11 @@ static void instrumentOneFunc(
         ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
       assert(ToProfile && "value profiling Value is of unexpected type");
 
-      auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
-          Name, PointerType::get(M->getContext(), 0));
-
       SmallVector<OperandBundleDef, 1> OpBundles;
       populateEHOperandBundle(Cand, BlockColors, OpBundles);
       Builder.CreateCall(
           Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
-          {NormalizedNamePtr, Builder.getInt64(FuncInfo.FunctionHash),
+          {FuncInfo.FuncNameVar, Builder.getInt64(FuncInfo.FunctionHash),
            ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
           OpBundles);
     }
@@ -1690,13 +1681,10 @@ void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
   IRBuilder<> Builder(&SI);
   Type *Int64Ty = Builder.getInt64Ty();
   auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
-  auto *NormalizedFuncNameVarPtr =
-      ConstantExpr::getPointerBitCastOrAddrSpaceCast(
-          FuncNameVar, PointerType::get(M->getContext(), 0));
   Builder.CreateCall(
       Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
-      {NormalizedFuncNameVarPtr, Builder.getInt64(FuncHash),
-       Builder.getInt32(TotalNumCtrs), Builder.getInt32(*CurCtrIdx), Step});
+      {FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
+       Builder.getInt32(*CurCtrIdx), Step});
   ++(*CurCtrIdx);
 }
 

diff  --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index c4cfbd0827fe6..d88430a52b8b7 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -77,7 +77,6 @@ set(include_files
   ${include_directory}/Interface.h
   ${include_directory}/LibC.h
   ${include_directory}/Mapping.h
-  ${include_directory}/Profiling.h
   ${include_directory}/State.h
   ${include_directory}/Synchronization.h
   ${include_directory}/Types.h
@@ -93,7 +92,6 @@ set(src_files
   ${source_directory}/Mapping.cpp
   ${source_directory}/Misc.cpp
   ${source_directory}/Parallelism.cpp
-  ${source_directory}/Profiling.cpp
   ${source_directory}/Reduction.cpp
   ${source_directory}/State.cpp
   ${source_directory}/Synchronization.cpp

diff  --git a/offload/DeviceRTL/include/Profiling.h b/offload/DeviceRTL/include/Profiling.h
deleted file mode 100644
index d994752254121..0000000000000
--- a/offload/DeviceRTL/include/Profiling.h
+++ /dev/null
@@ -1,21 +0,0 @@
-//===-------- Profiling.h - OpenMP interface ---------------------- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef OMPTARGET_DEVICERTL_PROFILING_H
-#define OMPTARGET_DEVICERTL_PROFILING_H
-
-extern "C" {
-void __llvm_profile_register_function(void *Ptr);
-void __llvm_profile_register_names_function(void *Ptr, long int I);
-void __llvm_profile_instrument_memop(long int I, void *Ptr, int I2);
-}
-
-#endif

diff  --git a/offload/DeviceRTL/src/Profiling.cpp b/offload/DeviceRTL/src/Profiling.cpp
deleted file mode 100644
index bb3caaadcc03d..0000000000000
--- a/offload/DeviceRTL/src/Profiling.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-//===------- Profiling.cpp ---------------------------------------- C++ ---===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "Profiling.h"
-
-#pragma omp begin declare target device_type(nohost)
-
-extern "C" {
-
-// Provides empty implementations for certain functions in compiler-rt
-// that are emitted by the PGO instrumentation.
-void __llvm_profile_register_function(void *Ptr) {}
-void __llvm_profile_register_names_function(void *Ptr, long int I) {}
-void __llvm_profile_instrument_memop(long int I, void *Ptr, int I2) {}
-}
-
-#pragma omp end declare target

diff  --git a/offload/plugins-nextgen/common/include/GlobalHandler.h b/offload/plugins-nextgen/common/include/GlobalHandler.h
index d2914e7cd0eb4..829b4b7291193 100644
--- a/offload/plugins-nextgen/common/include/GlobalHandler.h
+++ b/offload/plugins-nextgen/common/include/GlobalHandler.h
@@ -13,11 +13,10 @@
 #ifndef LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H
 #define LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H
 
-#include <type_traits>
+#include <string>
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Object/ELFObjectFile.h"
-#include "llvm/ProfileData/InstrProf.h"
 
 #include "Shared/Debug.h"
 #include "Shared/Utils.h"
@@ -56,23 +55,6 @@ class GlobalTy {
   void setPtr(void *P) { Ptr = P; }
 };
 
-using IntPtrT = void *;
-struct __llvm_profile_data {
-#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer)                     \
-  std::remove_const<Type>::type Name;
-#include "llvm/ProfileData/InstrProfData.inc"
-};
-
-/// PGO profiling data extracted from a GPU device
-struct GPUProfGlobals {
-  SmallVector<uint8_t> NamesData;
-  SmallVector<SmallVector<int64_t>> Counts;
-  SmallVector<__llvm_profile_data> Data;
-  Triple TargetTriple;
-
-  void dump() const;
-};
-
 /// Subclass of GlobalTy that holds the memory for a global of \p Ty.
 template <typename Ty> class StaticGlobalTy : public GlobalTy {
   Ty Data;
@@ -182,15 +164,6 @@ class GenericGlobalHandlerTy {
     return moveGlobalBetweenDeviceAndHost(Device, Image, HostGlobal,
                                           /*D2H=*/false);
   }
-
-  /// Checks whether a given image contains profiling globals.
-  bool hasProfilingGlobals(GenericDeviceTy &Device, DeviceImageTy &Image);
-
-  /// Reads profiling data from a GPU image to supplied profdata struct.
-  /// Iterates through the image symbol table and stores global values
-  /// with profiling prefixes.
-  Expected<GPUProfGlobals> readProfilingGlobals(GenericDeviceTy &Device,
-                                                DeviceImageTy &Image);
 };
 
 } // namespace plugin

diff  --git a/offload/plugins-nextgen/common/src/GlobalHandler.cpp b/offload/plugins-nextgen/common/src/GlobalHandler.cpp
index 7717e19a5b677..ba0aa47f8e51c 100644
--- a/offload/plugins-nextgen/common/src/GlobalHandler.cpp
+++ b/offload/plugins-nextgen/common/src/GlobalHandler.cpp
@@ -19,7 +19,6 @@
 #include "llvm/Support/Error.h"
 
 #include <cstring>
-#include <string>
 
 using namespace llvm;
 using namespace omp;
@@ -162,98 +161,3 @@ Error GenericGlobalHandlerTy::readGlobalFromImage(GenericDeviceTy &Device,
 
   return Plugin::success();
 }
-
-bool GenericGlobalHandlerTy::hasProfilingGlobals(GenericDeviceTy &Device,
-                                                 DeviceImageTy &Image) {
-  GlobalTy global(getInstrProfNamesVarName().str(), 0);
-  if (auto Err = getGlobalMetadataFromImage(Device, Image, global)) {
-    consumeError(std::move(Err));
-    return false;
-  }
-  return true;
-}
-
-Expected<GPUProfGlobals>
-GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
-                                             DeviceImageTy &Image) {
-  GPUProfGlobals DeviceProfileData;
-  auto ObjFile = getELFObjectFile(Image);
-  if (!ObjFile)
-    return ObjFile.takeError();
-
-  std::unique_ptr<ELFObjectFileBase> ELFObj(
-      static_cast<ELFObjectFileBase *>(ObjFile->release()));
-  DeviceProfileData.TargetTriple = ELFObj->makeTriple();
-
-  // Iterate through elf symbols
-  for (auto &Sym : ELFObj->symbols()) {
-    auto NameOrErr = Sym.getName();
-    if (!NameOrErr)
-      return NameOrErr.takeError();
-
-    // Check if given current global is a profiling global based
-    // on name
-    if (NameOrErr->equals(getInstrProfNamesVarName())) {
-      // Read in profiled function names
-      DeviceProfileData.NamesData = SmallVector<uint8_t>(Sym.getSize(), 0);
-      GlobalTy NamesGlobal(NameOrErr->str(), Sym.getSize(),
-                           DeviceProfileData.NamesData.data());
-      if (auto Err = readGlobalFromDevice(Device, Image, NamesGlobal))
-        return Err;
-    } else if (NameOrErr->starts_with(getInstrProfCountersVarPrefix())) {
-      // Read global variable profiling counts
-      SmallVector<int64_t> Counts(Sym.getSize() / sizeof(int64_t), 0);
-      GlobalTy CountGlobal(NameOrErr->str(), Sym.getSize(), Counts.data());
-      if (auto Err = readGlobalFromDevice(Device, Image, CountGlobal))
-        return Err;
-      DeviceProfileData.Counts.push_back(std::move(Counts));
-    } else if (NameOrErr->starts_with(getInstrProfDataVarPrefix())) {
-      // Read profiling data for this global variable
-      __llvm_profile_data Data{};
-      GlobalTy DataGlobal(NameOrErr->str(), Sym.getSize(), &Data);
-      if (auto Err = readGlobalFromDevice(Device, Image, DataGlobal))
-        return Err;
-      DeviceProfileData.Data.push_back(std::move(Data));
-    }
-  }
-  return DeviceProfileData;
-}
-
-void GPUProfGlobals::dump() const {
-  outs() << "======= GPU Profile =======\nTarget: " << TargetTriple.str()
-         << "\n";
-
-  outs() << "======== Counters =========\n";
-  for (const auto &Count : Counts) {
-    outs() << "[";
-    for (size_t i = 0; i < Count.size(); i++) {
-      if (i == 0)
-        outs() << " ";
-      outs() << Count[i] << " ";
-    }
-    outs() << "]\n";
-  }
-
-  outs() << "========== Data ===========\n";
-  for (const auto &ProfData : Data) {
-    outs() << "{ ";
-#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer)                     \
-  outs() << ProfData.Name << " ";
-#include "llvm/ProfileData/InstrProfData.inc"
-    outs() << "}\n";
-  }
-
-  outs() << "======== Functions ========\n";
-  std::string s;
-  s.reserve(NamesData.size());
-  for (uint8_t Name : NamesData) {
-    s.push_back((char)Name);
-  }
-
-  InstrProfSymtab Symtab;
-  if (Error Err = Symtab.create(StringRef(s))) {
-    consumeError(std::move(Err));
-  }
-  Symtab.dumpNames(outs());
-  outs() << "===========================\n";
-}

diff  --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index a7899bbfe8a58..118265973f327 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -831,20 +831,6 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
            DeviceMemoryPoolTracking.AllocationMax);
   }
 
-  for (auto *Image : LoadedImages) {
-    GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
-    if (!Handler.hasProfilingGlobals(*this, *Image))
-      continue;
-
-    GPUProfGlobals profdata;
-    auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image);
-    if (!ProfOrErr)
-      return ProfOrErr.takeError();
-
-    // TODO: write data to profiling file
-    ProfOrErr->dump();
-  }
-
   // Delete the memory manager before deinitializing the device. Otherwise,
   // we may delete device allocations after the device is deinitialized.
   if (MemoryManager)

diff  --git a/offload/test/CMakeLists.txt b/offload/test/CMakeLists.txt
index 495d1ef62226e..3ac5d7907e2cc 100644
--- a/offload/test/CMakeLists.txt
+++ b/offload/test/CMakeLists.txt
@@ -12,12 +12,6 @@ else()
   set(LIBOMPTARGET_DEBUG False)
 endif()
 
-if (NOT OPENMP_STANDALONE_BUILD AND "compiler-rt" IN_LIST LLVM_ENABLE_RUNTIMES)
-  set(LIBOMPTARGET_TEST_GPU_PGO True)
-else()
-  set(LIBOMPTARGET_TEST_GPU_PGO False)
-endif()
-
 # Replace the space from user's input with ";" in case that CMake add escape
 # char into the lit command.
 string(REPLACE " " ";" LIBOMPTARGET_LIT_ARG_LIST "${LIBOMPTARGET_LIT_ARGS}")

diff  --git a/offload/test/lit.cfg b/offload/test/lit.cfg
index 069110dc69a6e..6c590603079c4 100644
--- a/offload/test/lit.cfg
+++ b/offload/test/lit.cfg
@@ -112,9 +112,6 @@ config.available_features.add(config.libomptarget_current_target)
 if config.libomptarget_has_libc:
   config.available_features.add('libc')
 
-if config.libomptarget_test_pgo:
-  config.available_features.add('pgo')
-
 # Determine whether the test system supports unified memory.
 # For CUDA, this is the case with compute capability 70 (Volta) or higher.
 # For all other targets, we currently assume it is.

diff  --git a/offload/test/lit.site.cfg.in b/offload/test/lit.site.cfg.in
index f037f69b297fd..43751970cac27 100644
--- a/offload/test/lit.site.cfg.in
+++ b/offload/test/lit.site.cfg.in
@@ -26,6 +26,6 @@ config.libomptarget_not = "@OPENMP_NOT_EXECUTABLE@"
 config.libomptarget_debug = @LIBOMPTARGET_DEBUG@
 config.has_libomptarget_ompt = @LIBOMPTARGET_OMPT_SUPPORT@
 config.libomptarget_has_libc = @LIBOMPTARGET_GPU_LIBC_SUPPORT@
-config.libomptarget_test_pgo = @LIBOMPTARGET_TEST_GPU_PGO@
+
 # Let the main config do the real work.
 lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg")

diff  --git a/offload/test/offloading/pgo1.c b/offload/test/offloading/pgo1.c
deleted file mode 100644
index d95793b508dcf..0000000000000
--- a/offload/test/offloading/pgo1.c
+++ /dev/null
@@ -1,77 +0,0 @@
-// RUN: %libomptarget-compile-generic -fprofile-instr-generate \
-// RUN:     -Xclang "-fprofile-instrument=clang"
-// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic \
-// RUN:     --check-prefix="CLANG-PGO"
-// RUN: %libomptarget-compile-generic -fprofile-generate \
-// RUN:     -Xclang "-fprofile-instrument=llvm"
-// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic \
-// RUN:     --check-prefix="LLVM-PGO"
-
-// UNSUPPORTED: x86_64-pc-linux-gnu
-// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
-// UNSUPPORTED: aarch64-unknown-linux-gnu
-// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
-// REQUIRES: pgo
-
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-int test1(int a) { return a / 2; }
-int test2(int a) { return a * 2; }
-
-int main() {
-  int m = 2;
-#pragma omp target
-  for (int i = 0; i < 10; i++) {
-    m = test1(m);
-    for (int j = 0; j < 2; j++) {
-      m = test2(m);
-    }
-  }
-}
-
-// CLANG-PGO: ======== Counters =========
-// CLANG-PGO-NEXT: [ 0 11 20 ]
-// CLANG-PGO-NEXT: [ 10 ]
-// CLANG-PGO-NEXT: [ 20 ]
-// CLANG-PGO-NEXT: ========== Data ===========
-// CLANG-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CLANG-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// CLANG-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CLANG-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// CLANG-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CLANG-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// CLANG-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// CLANG-PGO-NEXT: ======== Functions ========
-// CLANG-PGO-NEXT: pgo1.c:
-// CLANG-PGO-SAME: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}
-// CLANG-PGO-NEXT: test1
-// CLANG-PGO-NEXT: test2
-
-// LLVM-PGO: ======== Counters =========
-// LLVM-PGO-NEXT: [ 20 ]
-// LLVM-PGO-NEXT: [ 10 ]
-// LLVM-PGO-NEXT: [ 20 10 1 1 ]
-// LLVM-PGO-NEXT: ========== Data ===========
-// LLVM-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// LLVM-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// LLVM-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// LLVM-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// LLVM-PGO-NEXT: { {{[0-9]*}} {{[0-9]*}}
-// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// LLVM-PGO-SAME: {{0x[0-9a-fA-F]*}} {{0x[0-9a-fA-F]*}}
-// LLVM-PGO-SAME: {{[0-9]*}} {{[0-9]*}} {{[0-9]*}} }
-// LLVM-PGO-NEXT: ======== Functions ========
-// LLVM-PGO-NEXT: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}
-// LLVM-PGO-NEXT: test1
-// LLVM-PGO-NEXT: test2


        


More information about the cfe-commits mailing list