[clang] [compiler-rt] [llvm] [PGO][HIP] Fix HIP device profile collection and sections emission (PR #202095)
Larry Meadows via cfe-commits
cfe-commits at lists.llvm.org
Thu Jun 11 06:03:00 PDT 2026
================
@@ -505,23 +550,93 @@ static void **OffloadShadowVariables = nullptr;
static int NumShadowVariables = 0;
static int CapShadowVariables = 0;
+struct OffloadSectionShadow {
+ void *Data;
+ void *Counters;
+ void *Names;
+};
+
+struct OffloadSectionShadowGroup {
+ OffloadSectionShadow *Shadows;
+ int NumShadows;
+ int CapShadows;
+ int NumSections;
+};
+
+static OffloadSectionShadowGroup *OffloadSectionShadowGroups = nullptr;
+static int CapSectionShadowGroups = 0;
+
+static int ensureSectionShadowGroupCapacity(void) {
+ if (CapSectionShadowGroups >= CapShadowVariables)
+ return 0;
+ OffloadSectionShadowGroup *New = (OffloadSectionShadowGroup *)realloc(
+ OffloadSectionShadowGroups, CapShadowVariables * sizeof(*New));
+ if (!New)
+ return -1;
+ __builtin_memset(New + CapSectionShadowGroups, 0,
+ (CapShadowVariables - CapSectionShadowGroups) *
+ sizeof(*New));
+ OffloadSectionShadowGroups = New;
+ CapSectionShadowGroups = CapShadowVariables;
+ return 0;
+}
+
+static int ensureSectionShadowCapacity(OffloadSectionShadowGroup *Group,
+ int MinCapacity) {
+ if (Group->CapShadows >= MinCapacity)
+ return 0;
+ int NewCap = Group->CapShadows ? Group->CapShadows * 2 : 4;
+ while (NewCap < MinCapacity)
+ NewCap *= 2;
+ OffloadSectionShadow *New =
+ (OffloadSectionShadow *)realloc(Group->Shadows, NewCap * sizeof(*New));
+ if (!New)
+ return -1;
+ __builtin_memset(New + Group->CapShadows, 0,
+ (NewCap - Group->CapShadows) * sizeof(*New));
+ Group->Shadows = New;
+ Group->CapShadows = NewCap;
+ return 0;
+}
+
extern "C" void __llvm_profile_offload_register_shadow_variable(void *ptr) {
if (growPtrArray(&OffloadShadowVariables, &NumShadowVariables,
&CapShadowVariables, 64))
return;
- OffloadShadowVariables[NumShadowVariables++] = ptr;
+ if (ensureSectionShadowGroupCapacity())
+ return;
+ int Index = NumShadowVariables++;
+ OffloadShadowVariables[Index] = ptr;
+ __builtin_memset(&OffloadSectionShadowGroups[Index], 0,
+ sizeof(OffloadSectionShadowGroups[Index]));
}
-static void **OffloadSectionShadowVariables = nullptr;
-static int NumSectionShadowVariables = 0;
-static int CapSectionShadowVariables = 0;
-
extern "C" void
__llvm_profile_offload_register_section_shadow_variable(void *ptr) {
----------------
lfmeadow wrote:
Encoding/decoding is fragile. 3 items [data,counter,name] always in the same order, all TUs sections must appear in order, no interleaving (works because global ctors are executed serially).No matching header used by cogen and runtime (see CGCUDANV.cpp comment below). Maybe a shared struct declaration, or at least some comments.
https://github.com/llvm/llvm-project/pull/202095
More information about the cfe-commits
mailing list