[llvm] [PGO][Offload] Make PGO GPU tests atomic (PR #132262)

Ethan Luis McDonough via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 30 10:47:03 PDT 2025


https://github.com/EthanLuisMcDonough updated https://github.com/llvm/llvm-project/pull/132262

>From 10cb0f6ac5b02710fd6d78a4f4c3ef039f504e8f Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Thu, 20 Mar 2025 12:42:10 -0500
Subject: [PATCH 1/2] [PGO][Offload] Make PGO GPU tests atomic

---
 offload/test/offloading/gpupgo/pgo1.c | 26 ++++++++++++--------------
 offload/test/offloading/gpupgo/pgo2.c | 24 ++++++++++++++----------
 2 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/offload/test/offloading/gpupgo/pgo1.c b/offload/test/offloading/gpupgo/pgo1.c
index c8011cbae83c0..4fb11bf702aba 100644
--- a/offload/test/offloading/gpupgo/pgo1.c
+++ b/offload/test/offloading/gpupgo/pgo1.c
@@ -1,5 +1,6 @@
 // RUN: %libomptarget-compile-generic -fcreate-profile \
-// RUN:     -Xarch_device -fprofile-generate
+// RUN:     -Xarch_device -fprofile-generate \
+// RUN:     -Xarch_device -fprofile-update=atomic
 // RUN: env LLVM_PROFILE_FILE=%basename_t.llvm.profraw \
 // RUN:     %libomptarget-run-generic 2>&1
 // RUN: llvm-profdata show --all-functions --counts \
@@ -7,7 +8,8 @@
 // RUN:     %fcheck-generic --check-prefix="LLVM-PGO"
 
 // RUN: %libomptarget-compile-generic -fcreate-profile \
-// RUN:     -Xarch_device -fprofile-instr-generate
+// RUN:     -Xarch_device -fprofile-instr-generate \
+// RUN:     -Xarch_device -fprofile-update=atomic
 // RUN: env LLVM_PROFILE_FILE=%basename_t.clang.profraw \
 // RUN:     %libomptarget-run-generic 2>&1
 // RUN: llvm-profdata show --all-functions --counts \
@@ -23,10 +25,12 @@ int test2(int a) { return a * 2; }
 int main() {
   int m = 2;
 #pragma omp target
-  for (int i = 0; i < 10; i++) {
-    m = test1(m);
-    for (int j = 0; j < 2; j++) {
-      m = test2(m);
+  {
+    for (int i = 0; i < 10; i++) {
+      m = test1(m);
+      for (int j = 0; j < 2; j++) {
+        m = test2(m);
+      }
     }
   }
 }
@@ -34,7 +38,7 @@ int main() {
 // LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
 // LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
 // LLVM-PGO: Counters: 4
-// LLVM-PGO: Block counts: [20, 10, 2, 1]
+// LLVM-PGO: Block counts: [20, 10, {{.*}}, 1]
 
 // LLVM-PGO-LABEL: test1:
 // LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
@@ -53,14 +57,10 @@ int main() {
 // LLVM-PGO-SAME: 3
 // LLVM-PGO-LABEL: Maximum function count:
 // LLVM-PGO-SAME: 20
-// LLVM-PGO-LABEL: Maximum internal block count:
-// LLVM-PGO-SAME: 10
 
 // CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
 // CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
-// CLANG-PGO: Counters: 3
-// CLANG-PGO: Function count: 0
-// CLANG-PGO: Block counts: [11, 20]
+// CLANG-PGO: Block counts: [10, 20]
 
 // CLANG-PGO-LABEL: test1:
 // CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
@@ -78,7 +78,5 @@ int main() {
 // CLANG-PGO-SAME: Front-end
 // CLANG-PGO-LABEL: Functions shown:
 // CLANG-PGO-SAME: 3
-// CLANG-PGO-LABEL: Maximum function count:
-// CLANG-PGO-SAME: 20
 // CLANG-PGO-LABEL: Maximum internal block count:
 // CLANG-PGO-SAME: 20
diff --git a/offload/test/offloading/gpupgo/pgo2.c b/offload/test/offloading/gpupgo/pgo2.c
index b75b0beaffdec..820a597bd5108 100644
--- a/offload/test/offloading/gpupgo/pgo2.c
+++ b/offload/test/offloading/gpupgo/pgo2.c
@@ -1,4 +1,5 @@
-// RUN: %libomptarget-compile-generic -fprofile-generate
+// RUN: %libomptarget-compile-generic -fprofile-generate \
+// RUN:     -fprofile-update=atomic
 // RUN: env LLVM_PROFILE_FILE=%basename_t.llvm.profraw \
 // RUN:     %libomptarget-run-generic 2>&1
 // RUN: llvm-profdata show --all-functions --counts \
@@ -8,7 +9,8 @@
 // RUN:     %target_triple.%basename_t.llvm.profraw \
 // RUN:     | %fcheck-generic --check-prefix="LLVM-DEVICE"
 
-// RUN: %libomptarget-compile-generic -fprofile-instr-generate
+// RUN: %libomptarget-compile-generic -fprofile-instr-generate \
+// RUN:     -fprofile-update=atomic
 // RUN: env LLVM_PROFILE_FILE=%basename_t.clang.profraw \
 // RUN:     %libomptarget-run-generic 2>&1
 // RUN: llvm-profdata show --all-functions --counts \
@@ -18,7 +20,8 @@
 // RUN:     %target_triple.%basename_t.clang.profraw | \
 // RUN:     %fcheck-generic --check-prefix="CLANG-DEV"
 
-// RUN: %libomptarget-compile-generic -Xarch_host -fprofile-generate
+// RUN: %libomptarget-compile-generic -Xarch_host -fprofile-generate \
+// RUN:     -fprofile-update=atomic
 // RUN: env LLVM_PROFILE_FILE=%basename_t.nogpu.profraw \
 // RUN:     %libomptarget-run-generic 2>&1
 // RUN: llvm-profdata show --all-functions --counts \
@@ -27,7 +30,7 @@
 // RUN: not test -e %target_triple.%basename_t.nogpu.profraw
 
 // RUN: %libomptarget-compile-generic -Xarch_host -fprofile-generate \
-// RUN:     -Xarch_device -fprofile-instr-generate
+// RUN:     -Xarch_device -fprofile-instr-generate -fprofile-update=atomic
 // RUN: env LLVM_PROFILE_FILE=%basename_t.hidf.profraw \
 // RUN:     %libomptarget-run-generic 2>&1
 // RUN: llvm-profdata show --all-functions --counts \
@@ -38,7 +41,7 @@
 // RUN:     | %fcheck-generic --check-prefix="CLANG-DEV"
 
 // RUN: %libomptarget-compile-generic -Xarch_device -fprofile-generate \
-// RUN:     -Xarch_host -fprofile-instr-generate
+// RUN:     -Xarch_host -fprofile-instr-generate -fprofile-update=atomic
 // RUN: env LLVM_PROFILE_FILE=%basename_t.hfdi.profraw \
 // RUN:     %libomptarget-run-generic 2>&1
 // RUN: llvm-profdata show --all-functions --counts \
@@ -59,8 +62,10 @@ int main() {
 
   int device_var = 1;
 #pragma omp target
-  for (int i = 0; i < 10; i++) {
-    device_var *= i;
+  {
+    for (int i = 0; i < 10; i++) {
+      device_var *= i;
+    }
   }
 }
 
@@ -78,7 +83,7 @@ int main() {
 // LLVM-DEVICE-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
 // LLVM-DEVICE: Hash: {{0[xX][0-9a-fA-F]+}}
 // LLVM-DEVICE: Counters: 3
-// LLVM-DEVICE: Block counts: [10, 2, 1]
+// LLVM-DEVICE: Block counts: [10, {{.*}}, 1]
 // LLVM-DEVICE: Instrumentation level: IR
 
 // CLANG-HOST-LABEL: main:
@@ -97,6 +102,5 @@ int main() {
 // CLANG-DEV-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
 // CLANG-DEV: Hash: {{0[xX][0-9a-fA-F]+}}
 // CLANG-DEV: Counters: 2
-// CLANG-DEV: Function count: 0
-// CLANG-DEV: Block counts: [11]
+// CLANG-DEV: Block counts: [10]
 // CLANG-DEV: Instrumentation level: Front-end

>From 0c483466e0ffe15bbdb2348f20fdc46a1ce224f3 Mon Sep 17 00:00:00 2001
From: Ethan Luis McDonough <ethanluismcdonough at gmail.com>
Date: Wed, 30 Apr 2025 12:24:08 -0500
Subject: [PATCH 2/2] [PGO][Offload] Update GPU PGO tests

---
 offload/test/offloading/gpupgo/pgo1.c |   6 +-
 offload/test/offloading/gpupgo/pgo2.c |  13 ++--
 offload/test/offloading/gpupgo/pgo3.c |  84 +++++++++++++++++++++
 offload/test/offloading/gpupgo/pgo4.c | 102 ++++++++++++++++++++++++++
 4 files changed, 193 insertions(+), 12 deletions(-)
 create mode 100644 offload/test/offloading/gpupgo/pgo3.c
 create mode 100644 offload/test/offloading/gpupgo/pgo4.c

diff --git a/offload/test/offloading/gpupgo/pgo1.c b/offload/test/offloading/gpupgo/pgo1.c
index 4fb11bf702aba..fee3ab474ff89 100644
--- a/offload/test/offloading/gpupgo/pgo1.c
+++ b/offload/test/offloading/gpupgo/pgo1.c
@@ -1,6 +1,5 @@
 // RUN: %libomptarget-compile-generic -fcreate-profile \
-// RUN:     -Xarch_device -fprofile-generate \
-// RUN:     -Xarch_device -fprofile-update=atomic
+// RUN:     -Xarch_device -fprofile-generate
 // RUN: env LLVM_PROFILE_FILE=%basename_t.llvm.profraw \
 // RUN:     %libomptarget-run-generic 2>&1
 // RUN: llvm-profdata show --all-functions --counts \
@@ -8,8 +7,7 @@
 // RUN:     %fcheck-generic --check-prefix="LLVM-PGO"
 
 // RUN: %libomptarget-compile-generic -fcreate-profile \
-// RUN:     -Xarch_device -fprofile-instr-generate \
-// RUN:     -Xarch_device -fprofile-update=atomic
+// RUN:     -Xarch_device -fprofile-instr-generate
 // RUN: env LLVM_PROFILE_FILE=%basename_t.clang.profraw \
 // RUN:     %libomptarget-run-generic 2>&1
 // RUN: llvm-profdata show --all-functions --counts \
diff --git a/offload/test/offloading/gpupgo/pgo2.c b/offload/test/offloading/gpupgo/pgo2.c
index 820a597bd5108..8828f1554f8b2 100644
--- a/offload/test/offloading/gpupgo/pgo2.c
+++ b/offload/test/offloading/gpupgo/pgo2.c
@@ -1,5 +1,4 @@
-// RUN: %libomptarget-compile-generic -fprofile-generate \
-// RUN:     -fprofile-update=atomic
+// RUN: %libomptarget-compile-generic -fprofile-generate
 // RUN: env LLVM_PROFILE_FILE=%basename_t.llvm.profraw \
 // RUN:     %libomptarget-run-generic 2>&1
 // RUN: llvm-profdata show --all-functions --counts \
@@ -9,8 +8,7 @@
 // RUN:     %target_triple.%basename_t.llvm.profraw \
 // RUN:     | %fcheck-generic --check-prefix="LLVM-DEVICE"
 
-// RUN: %libomptarget-compile-generic -fprofile-instr-generate \
-// RUN:     -fprofile-update=atomic
+// RUN: %libomptarget-compile-generic -fprofile-instr-generate
 // RUN: env LLVM_PROFILE_FILE=%basename_t.clang.profraw \
 // RUN:     %libomptarget-run-generic 2>&1
 // RUN: llvm-profdata show --all-functions --counts \
@@ -20,8 +18,7 @@
 // RUN:     %target_triple.%basename_t.clang.profraw | \
 // RUN:     %fcheck-generic --check-prefix="CLANG-DEV"
 
-// RUN: %libomptarget-compile-generic -Xarch_host -fprofile-generate \
-// RUN:     -fprofile-update=atomic
+// RUN: %libomptarget-compile-generic -Xarch_host -fprofile-generate
 // RUN: env LLVM_PROFILE_FILE=%basename_t.nogpu.profraw \
 // RUN:     %libomptarget-run-generic 2>&1
 // RUN: llvm-profdata show --all-functions --counts \
@@ -30,7 +27,7 @@
 // RUN: not test -e %target_triple.%basename_t.nogpu.profraw
 
 // RUN: %libomptarget-compile-generic -Xarch_host -fprofile-generate \
-// RUN:     -Xarch_device -fprofile-instr-generate -fprofile-update=atomic
+// RUN:     -Xarch_device -fprofile-instr-generate
 // RUN: env LLVM_PROFILE_FILE=%basename_t.hidf.profraw \
 // RUN:     %libomptarget-run-generic 2>&1
 // RUN: llvm-profdata show --all-functions --counts \
@@ -41,7 +38,7 @@
 // RUN:     | %fcheck-generic --check-prefix="CLANG-DEV"
 
 // RUN: %libomptarget-compile-generic -Xarch_device -fprofile-generate \
-// RUN:     -Xarch_host -fprofile-instr-generate -fprofile-update=atomic
+// RUN:     -Xarch_host -fprofile-instr-generate
 // RUN: env LLVM_PROFILE_FILE=%basename_t.hfdi.profraw \
 // RUN:     %libomptarget-run-generic 2>&1
 // RUN: llvm-profdata show --all-functions --counts \
diff --git a/offload/test/offloading/gpupgo/pgo3.c b/offload/test/offloading/gpupgo/pgo3.c
new file mode 100644
index 0000000000000..f0e7111f7a64b
--- /dev/null
+++ b/offload/test/offloading/gpupgo/pgo3.c
@@ -0,0 +1,84 @@
+// RUN: %libomptarget-compile-generic -fcreate-profile \
+// RUN:     -Xarch_device -fprofile-generate \
+// RUN:     -Xarch_device -fprofile-update=atomic
+// RUN: env LLVM_PROFILE_FILE=%basename_t.llvm.profraw \
+// RUN:     %libomptarget-run-generic 2>&1
+// RUN: llvm-profdata show --all-functions --counts \
+// RUN:     %target_triple.%basename_t.llvm.profraw | \
+// RUN:     %fcheck-generic --check-prefix="LLVM-PGO"
+
+// RUN: %libomptarget-compile-generic -fcreate-profile \
+// RUN:     -Xarch_device -fprofile-instr-generate \
+// RUN:     -Xarch_device -fprofile-update=atomic
+// RUN: env LLVM_PROFILE_FILE=%basename_t.clang.profraw \
+// RUN:     %libomptarget-run-generic 2>&1
+// RUN: llvm-profdata show --all-functions --counts \
+// RUN:     %target_triple.%basename_t.clang.profraw | \
+// RUN:     %fcheck-generic --check-prefix="CLANG-PGO"
+
+// REQUIRES: gpu
+// REQUIRES: pgo
+
+int test1(int a) { return a / 2; }
+
+int main() {
+  int device_var = 1;
+#pragma omp target map(tofrom : device_var)
+  {
+#pragma omp parallel for
+    for (int i = 1; i <= 10; i++) {
+      device_var *= i;
+      if (i % 2 == 0) {
+        device_var += test1(device_var);
+      }
+    }
+  }
+}
+
+// clang-format off
+// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
+// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// LLVM-PGO: Counters: 2
+// LLVM-PGO: Block counts: [0, {{.*}}]
+
+// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}_omp_outlined:
+// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// LLVM-PGO: Counters: 5
+// LLVM-PGO: Block counts: [10, 5, {{.*}}, 10, {{.*}}]
+
+// LLVM-PGO-LABEL: test1:
+// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// LLVM-PGO: Counters: 1
+// LLVM-PGO: Block counts: [5]
+
+// LLVM-PGO-LABEL: Instrumentation level:
+// LLVM-PGO-SAME: IR
+// LLVM-PGO-SAME: entry_first = 0
+// LLVM-PGO-LABEL: Functions shown:
+// LLVM-PGO-SAME: 3
+// LLVM-PGO-LABEL: Maximum function count:
+// LLVM-PGO-SAME: 10
+
+// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
+// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// CLANG-PGO: Counters: 1
+// CLANG-PGO: Function count: {{.*}}
+// CLANG-PGO: Block counts: []
+
+// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}_omp_outlined:
+// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// CLANG-PGO: Counters: 3
+// CLANG-PGO: Function count: {{.*}}
+// CLANG-PGO: Block counts: [{{.*}}, 5]
+
+// CLANG-PGO-LABEL: test1:
+// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// CLANG-PGO: Counters: 1
+// CLANG-PGO: Function count: 5
+// CLANG-PGO: Block counts: []
+
+// CLANG-PGO-LABEL: Instrumentation level:
+// CLANG-PGO-SAME: Front-end
+// CLANG-PGO-LABEL: Functions shown:
+// CLANG-PGO-SAME: 3
+// clang-format on
diff --git a/offload/test/offloading/gpupgo/pgo4.c b/offload/test/offloading/gpupgo/pgo4.c
new file mode 100644
index 0000000000000..7bf3b1c11f28b
--- /dev/null
+++ b/offload/test/offloading/gpupgo/pgo4.c
@@ -0,0 +1,102 @@
+// RUN: %libomptarget-compile-generic -fcreate-profile \
+// RUN:     -Xarch_device -fprofile-generate \
+// RUN:     -Xarch_device -fprofile-update=atomic
+// RUN: env LLVM_PROFILE_FILE=%basename_t.llvm.profraw \
+// RUN:     %libomptarget-run-generic 2>&1
+// RUN: llvm-profdata show --all-functions --counts \
+// RUN:     %target_triple.%basename_t.llvm.profraw | \
+// RUN:     %fcheck-generic --check-prefix="LLVM-PGO"
+
+// RUN: %libomptarget-compile-generic -fcreate-profile \
+// RUN:     -Xarch_device -fprofile-instr-generate \
+// RUN:     -Xarch_device -fprofile-update=atomic
+// RUN: env LLVM_PROFILE_FILE=%basename_t.clang.profraw \
+// RUN:     %libomptarget-run-generic 2>&1
+// RUN: llvm-profdata show --all-functions --counts \
+// RUN:     %target_triple.%basename_t.clang.profraw | \
+// RUN:     %fcheck-generic --check-prefix="CLANG-PGO"
+
+// REQUIRES: gpu
+// REQUIRES: pgo
+
+int test1(int a) { return a / 2; }
+int test2(int a) { return a * 2; }
+
+int main() {
+  int device_var = 1;
+
+#pragma omp target teams distribute parallel for num_teams(3)                  \
+    map(tofrom : device_var)
+  for (int i = 1; i <= 30; i++) {
+    device_var *= i;
+    if (i % 2 == 0) {
+      device_var += test1(device_var);
+    }
+    if (i % 3 == 0) {
+      device_var += test2(device_var);
+    }
+  }
+}
+
+// clang-format off
+// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
+// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// LLVM-PGO: Counters: 2
+// LLVM-PGO: Block counts: [0, {{.*}}]
+
+// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}_omp_outlined:
+// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// LLVM-PGO: Counters: 4
+// LLVM-PGO: Block counts: [{{.*}}, 0, {{.*}}, 0]
+
+// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}_omp_outlined_omp_outlined:
+// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// LLVM-PGO: Counters: 4
+// LLVM-PGO: Block counts: [30, 15, 10, {{.*}}]
+
+// LLVM-PGO-LABEL: test1:
+// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// LLVM-PGO: Counters: 1
+// LLVM-PGO: Block counts: [15]
+
+// LLVM-PGO-LABEL: test2:
+// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// LLVM-PGO: Counters: 1
+// LLVM-PGO: Block counts: [10]
+
+// LLVM-PGO-LABEL: Instrumentation level:
+// LLVM-PGO-SAME: IR
+
+// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
+// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// CLANG-PGO: Counters: 1
+// CLANG-PGO: Function count: {{.*}}
+// CLANG-PGO: Block counts: []
+
+// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}_omp_outlined:
+// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// CLANG-PGO: Counters: 1
+// CLANG-PGO: Function count: {{.*}}
+// CLANG-PGO: Block counts: []
+
+// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}_omp_outlined_omp_outlined:
+// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// CLANG-PGO: Counters: 4
+// CLANG-PGO: Function count: 30
+// CLANG-PGO: Block counts: [{{.*}}, 15, 10]
+
+// CLANG-PGO-LABEL: test1:
+// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// CLANG-PGO: Counters: 1
+// CLANG-PGO: Function count: 15
+// CLANG-PGO: Block counts: []
+
+// CLANG-PGO-LABEL: test2:
+// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
+// CLANG-PGO: Counters: 1
+// CLANG-PGO: Function count: 10
+// CLANG-PGO: Block counts: []
+
+// CLANG-PGO-LABEL: Instrumentation level:
+// CLANG-PGO-SAME: Front-end
+// clang-format on



More information about the llvm-commits mailing list