[llvm] [KernelInfo] Implement new LLVM IR pass for GPU code analysis (PR #102944)

Thu Sep 26 11:15:26 PDT 2024

================
@@ -0,0 +1,148 @@
+//=- KernelInfo.h - Kernel Analysis -------------------------------*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the KernelInfo, KernelInfoAnalysis, and KernelInfoPrinter
+// classes used to extract function properties from a GPU kernel.
+//
+// To analyze a C program as it appears to an LLVM GPU backend at the end of
+// LTO:
+//
+//   $ clang -O2 -g -fopenmp --offload-arch=native test.c -foffload-lto \
+//       -Rpass=kernel-info -mllvm -kernel-info-end-lto
+//
+// To analyze specified LLVM IR, perhaps previously generated by something like
+// 'clang -save-temps -g -fopenmp --offload-arch=native test.c':
+//
+//   $ opt -disable-output test-openmp-nvptx64-nvidia-cuda-sm_70.bc \
+//       -pass-remarks=kernel-info -passes=kernel-info
+//
+// kernel-info can also be inserted into a specified LLVM pass pipeline using
+// -kernel-info-end-lto, or it can be positioned explicitly in that pipeline:
+//
+//   $ clang -O2 -g -fopenmp --offload-arch=native test.c -foffload-lto \
+//       -Rpass=kernel-info -mllvm -kernel-info-end-lto \
+//       -Xoffload-linker --lto-newpm-passes='lto<O2>'
+//
+//   $ clang -O2 -g -fopenmp --offload-arch=native test.c -foffload-lto \
+//       -Rpass=kernel-info \
+//       -Xoffload-linker --lto-newpm-passes='lto<O2>,module(kernel-info)'
+//
+//   $ opt -disable-output test-openmp-nvptx64-nvidia-cuda-sm_70.bc \
+//       -pass-remarks=kernel-info -kernel-info-end-lto -passes='lto<O2>'
+//
+//   $ opt -disable-output test-openmp-nvptx64-nvidia-cuda-sm_70.bc \
+//       -pass-remarks=kernel-info -passes='lto<O2>,module(kernel-info)'
+// ===---------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_KERNELINFO_H
+#define LLVM_ANALYSIS_KERNELINFO_H
+
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+
+namespace llvm {
+class DominatorTree;
+class Function;
+
+/// Data structure holding function info for kernels.
+class KernelInfo {
+  void updateForBB(const BasicBlock &BB, int64_t Direction,
+                   OptimizationRemarkEmitter &ORE);
+
+public:
+  static KernelInfo getKernelInfo(Function &F, FunctionAnalysisManager &FAM);
+
+  bool operator==(const KernelInfo &FPI) const {
+    return std::memcmp(this, &FPI, sizeof(KernelInfo)) == 0;
+  }
+
+  bool operator!=(const KernelInfo &FPI) const { return !(*this == FPI); }
+
+  /// If false, nothing was recorded here because the supplied function didn't
+  /// appear in a module compiled for a GPU.
+  bool IsValid = false;
+
+  /// Whether the function has external linkage and is not a kernel function.
+  bool ExternalNotKernel = false;
+
+  /// OpenMP Launch bounds.
+  ///@{
+  std::optional<int64_t> OmpTargetNumTeams;
+  std::optional<int64_t> OmpTargetThreadLimit;
+  ///@}
+
+  /// AMDGPU launch bounds.
+  ///@{
+  std::optional<int64_t> AmdgpuMaxNumWorkgroupsX;
+  std::optional<int64_t> AmdgpuMaxNumWorkgroupsY;
+  std::optional<int64_t> AmdgpuMaxNumWorkgroupsZ;
+  std::optional<int64_t> AmdgpuFlatWorkGroupSizeMin;
----------------
jdenny-ornl wrote:

I believe this comment was part of [the more general discussion here](https://github.com/llvm/llvm-project/pull/102944#issuecomment-2284621052).  Resolving.  Please reopen if necessary.

https://github.com/llvm/llvm-project/pull/102944