[llvm] [SPIRV] Add `SPV_INTEL_kernel_attributes` (PR #165891)

Fri Oct 31 10:52:44 PDT 2025

https://github.com/AlexVlx created https://github.com/llvm/llvm-project/pull/165891

This adds BE support for the [`SPV_INTEL_kernel_attributes`](https://github.khronos.org/SPIRV-Registry/extensions/INTEL/SPV_INTEL_kernel_attributes.html) extension. The extension is necessary to encode the rather useful `max_work_group_size` kernel attribute, via `OpExecutionMode MaxWorkgroupSizeINTEL`, which is the only Execution Mode added by the extension that this patch adds full processing for. Future patches will add the other Execution Modes and Capabilities. The test is adapted from the equivalent Translator test; it depends on #165815.

>From 3e0abe10c53e08cc1ebf16662d8d58cd7245f70f Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Thu, 15 May 2025 23:27:42 +0100
Subject: [PATCH 1/2] Add pass which forwards unimplemented math builtins /
 libcalls to the HIPSTDPAR runtime component.

---
 .../llvm/Transforms/HipStdPar/HipStdPar.h     |   7 +
 llvm/lib/Passes/PassRegistry.def              |   1 +
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   8 +-
 llvm/lib/Transforms/HipStdPar/HipStdPar.cpp   | 117 +++++++++
 llvm/test/Transforms/HipStdPar/math-fixup.ll  | 240 ++++++++++++++++++
 5 files changed, 371 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Transforms/HipStdPar/math-fixup.ll

diff --git a/llvm/include/llvm/Transforms/HipStdPar/HipStdPar.h b/llvm/include/llvm/Transforms/HipStdPar/HipStdPar.h
index 5ff38bdf04812..27195051ed7eb 100644
--- a/llvm/include/llvm/Transforms/HipStdPar/HipStdPar.h
+++ b/llvm/include/llvm/Transforms/HipStdPar/HipStdPar.h
@@ -40,6 +40,13 @@ class HipStdParAllocationInterpositionPass
   static bool isRequired() { return true; }
 };
 
+class HipStdParMathFixupPass : public PassInfoMixin<HipStdParMathFixupPass> {
+public:
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
+
+  static bool isRequired() { return true; }
+};
+
 } // namespace llvm
 
 #endif // LLVM_TRANSFORMS_HIPSTDPAR_HIPSTDPAR_H
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 94dabe290213d..3acdbf4d49fde 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -80,6 +80,7 @@ MODULE_PASS("global-merge-func", GlobalMergeFuncPass())
 MODULE_PASS("globalopt", GlobalOptPass())
 MODULE_PASS("globalsplit", GlobalSplitPass())
 MODULE_PASS("hipstdpar-interpose-alloc", HipStdParAllocationInterpositionPass())
+MODULE_PASS("hipstdpar-math-fixup", HipStdParMathFixupPass())
 MODULE_PASS("hipstdpar-select-accelerator-code",
             HipStdParAcceleratorCodeSelectionPass())
 MODULE_PASS("hotcoldsplit", HotColdSplittingPass())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index ccb251b730f16..c3f8cee1e1783 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -819,8 +819,10 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
           // When we are not using -fgpu-rdc, we can run accelerator code
           // selection relatively early, but still after linking to prevent
           // eager removal of potentially reachable symbols.
-          if (EnableHipStdPar)
+          if (EnableHipStdPar) {
+            PM.addPass(HipStdParMathFixupPass());
             PM.addPass(HipStdParAcceleratorCodeSelectionPass());
+          }
           PM.addPass(AMDGPUPrintfRuntimeBindingPass());
         }
 
@@ -899,8 +901,10 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
         // selection after linking to prevent, otherwise we end up removing
         // potentially reachable symbols that were exported as external in other
         // modules.
-        if (EnableHipStdPar)
+        if (EnableHipStdPar) {
+          PM.addPass(HipStdParMathFixupPass());
           PM.addPass(HipStdParAcceleratorCodeSelectionPass());
+        }
         // We want to support the -lto-partitions=N option as "best effort".
         // For that, we need to lower LDS earlier in the pipeline before the
         // module is partitioned for codegen.
diff --git a/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp b/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp
index 5a87cf8c83d79..815878089c69e 100644
--- a/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp
+++ b/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp
@@ -37,6 +37,16 @@
 //    memory that ends up in one of the runtime equivalents, since this can
 //    happen if e.g. a library that was compiled without interposition returns
 //    an allocation that can be validly passed to `free`.
+//
+// 3. MathFixup (required): Some accelerators might have an incomplete
+//    implementation for the intrinsics used to implement some of the math
+//    functions in <cmath> / their corresponding libcall lowerings. Since this
+//    can vary quite significantly between accelerators, we replace calls to a
+//    set of intrinsics / lib functions known to be problematic with calls to a
+//    HIPSTDPAR specific forwarding layer, which gives an uniform interface for
+//    accelerators to implement in their own runtime components. This pass
+//    should run before AcceleratorCodeSelection so as to prevent the spurious
+//    removal of the HIPSTDPAR specific forwarding functions.
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/HipStdPar/HipStdPar.h"
@@ -48,6 +58,7 @@
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 
@@ -321,3 +332,109 @@ HipStdParAllocationInterpositionPass::run(Module &M, ModuleAnalysisManager&) {
 
   return PreservedAnalyses::none();
 }
+
+static constexpr std::pair<StringLiteral, StringLiteral> MathLibToHipStdPar[]{
+    {"acosh", "__hipstdpar_acosh_f64"},
+    {"acoshf", "__hipstdpar_acosh_f32"},
+    {"asinh", "__hipstdpar_asinh_f64"},
+    {"asinhf", "__hipstdpar_asinh_f32"},
+    {"atanh", "__hipstdpar_atanh_f64"},
+    {"atanhf", "__hipstdpar_atanh_f32"},
+    {"cbrt", "__hipstdpar_cbrt_f64"},
+    {"cbrtf", "__hipstdpar_cbrt_f32"},
+    {"erf", "__hipstdpar_erf_f64"},
+    {"erff", "__hipstdpar_erf_f32"},
+    {"erfc", "__hipstdpar_erfc_f64"},
+    {"erfcf", "__hipstdpar_erfc_f32"},
+    {"fdim", "__hipstdpar_fdim_f64"},
+    {"fdimf", "__hipstdpar_fdim_f32"},
+    {"expm1", "__hipstdpar_expm1_f64"},
+    {"expm1f", "__hipstdpar_expm1_f32"},
+    {"hypot", "__hipstdpar_hypot_f64"},
+    {"hypotf", "__hipstdpar_hypot_f32"},
+    {"ilogb", "__hipstdpar_ilogb_f64"},
+    {"ilogbf", "__hipstdpar_ilogb_f32"},
+    {"lgamma", "__hipstdpar_lgamma_f64"},
+    {"lgammaf", "__hipstdpar_lgamma_f32"},
+    {"log1p", "__hipstdpar_log1p_f64"},
+    {"log1pf", "__hipstdpar_log1p_f32"},
+    {"logb", "__hipstdpar_logb_f64"},
+    {"logbf", "__hipstdpar_logb_f32"},
+    {"nextafter", "__hipstdpar_nextafter_f64"},
+    {"nextafterf", "__hipstdpar_nextafter_f32"},
+    {"nexttoward", "__hipstdpar_nexttoward_f64"},
+    {"nexttowardf", "__hipstdpar_nexttoward_f32"},
+    {"remainder", "__hipstdpar_remainder_f64"},
+    {"remainderf", "__hipstdpar_remainder_f32"},
+    {"remquo", "__hipstdpar_remquo_f64"},
+    {"remquof", "__hipstdpar_remquo_f32"},
+    {"scalbln", "__hipstdpar_scalbln_f64"},
+    {"scalblnf", "__hipstdpar_scalbln_f32"},
+    {"scalbn", "__hipstdpar_scalbn_f64"},
+    {"scalbnf", "__hipstdpar_scalbn_f32"},
+    {"tgamma", "__hipstdpar_tgamma_f64"},
+    {"tgammaf", "__hipstdpar_tgamma_f32"}};
+
+PreservedAnalyses HipStdParMathFixupPass::run(Module &M,
+                                              ModuleAnalysisManager &) {
+  if (M.empty())
+    return PreservedAnalyses::all();
+
+  SmallVector<std::pair<Function *, std::string>> ToReplace;
+  for (auto &&F : M) {
+    if (!F.hasName())
+      continue;
+
+    auto N = F.getName().str();
+    auto ID = F.getIntrinsicID();
+
+    switch (ID) {
+    case Intrinsic::not_intrinsic: {
+      auto It = find_if(MathLibToHipStdPar,
+                        [&](auto &&M) { return M.first == N; });
+      if (It == std::cend(MathLibToHipStdPar))
+        continue;
+      ToReplace.emplace_back(&F, It->second);
+      break;
+    }
+    case Intrinsic::acos:
+    case Intrinsic::asin:
+    case Intrinsic::atan:
+    case Intrinsic::atan2:
+    case Intrinsic::cosh:
+    case Intrinsic::modf:
+    case Intrinsic::sinh:
+    case Intrinsic::tan:
+    case Intrinsic::tanh:
+      break;
+    default: {
+      if (F.getReturnType()->isDoubleTy()) {
+        switch (ID) {
+        case Intrinsic::cos:
+        case Intrinsic::exp:
+        case Intrinsic::exp2:
+        case Intrinsic::log:
+        case Intrinsic::log10:
+        case Intrinsic::log2:
+        case Intrinsic::pow:
+        case Intrinsic::sin:
+          break;
+        default:
+          continue;
+        }
+        break;
+      }
+      continue;
+    }
+    }
+
+    llvm::replace(N, '.', '_');
+    N.replace(0, sizeof("llvm"), "__hipstdpar_");
+    ToReplace.emplace_back(&F, std::move(N));
+  }
+  for (auto &&F : ToReplace)
+    F.first->replaceAllUsesWith(M.getOrInsertFunction(
+        F.second, F.first->getFunctionType()).getCallee());
+
+  return PreservedAnalyses::none();
+}
\ No newline at end of file
diff --git a/llvm/test/Transforms/HipStdPar/math-fixup.ll b/llvm/test/Transforms/HipStdPar/math-fixup.ll
new file mode 100644
index 0000000000000..e0e2ca79c0843
--- /dev/null
+++ b/llvm/test/Transforms/HipStdPar/math-fixup.ll
@@ -0,0 +1,240 @@
+; RUN: opt -S -passes=hipstdpar-math-fixup %s | FileCheck %s
+
+define void @foo(double noundef %dbl, float noundef %flt, i32 noundef %quo) #0 {
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: double noundef [[DBL:%.*]], float noundef [[FLT:%.*]], i32 noundef [[QUO:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[QUO_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[QUO]], ptr [[QUO_ADDR]], align 4
+entry:
+  %quo.addr = alloca i32, align 4
+  store i32 %quo, ptr %quo.addr, align 4
+  ; CHECK-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.fabs.f64(double [[DBL]])
+  %0 = tail call contract double @llvm.fabs.f64(double %dbl)
+  ; CHECK-NEXT:    [[TMP1:%.*]] = tail call contract float @llvm.fabs.f32(float [[FLT]])
+  %1 = tail call contract float @llvm.fabs.f32(float %flt)
+  ; CHECK-NEXT:    [[CALL:%.*]] = tail call contract double @__hipstdpar_remainder_f64(double noundef [[TMP0]], double noundef [[TMP0]]) #[[ATTR4:[0-9]+]]
+  %call = tail call contract double @remainder(double noundef %0, double noundef %0) #4
+  ; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @__hipstdpar_remainder_f32(float noundef [[TMP1]], float noundef [[TMP1]]) #[[ATTR4]]
+  %call1 = tail call contract float @remainderf(float noundef %1, float noundef %1) #4
+  ; CHECK-NEXT:    [[CALL2:%.*]] = call contract double @__hipstdpar_remquo_f64(double noundef [[CALL]], double noundef [[CALL]], ptr noundef nonnull [[QUO_ADDR]]) #[[ATTR3:[0-9]+]]
+  %call2 = call contract double @remquo(double noundef %call, double noundef %call, ptr noundef nonnull %quo.addr) #5
+  ; CHECK-NEXT:    [[CALL3:%.*]] = call contract float @__hipstdpar_remquo_f32(float noundef [[CALL1]], float noundef [[CALL1]], ptr noundef nonnull [[QUO_ADDR]]) #[[ATTR3]]
+  %call3 = call contract float @remquof(float noundef %call1, float noundef %call1, ptr noundef nonnull %quo.addr) #5
+  ; CHECK-NEXT:    [[TMP2:%.*]] = call contract double @llvm.fma.f64(double [[CALL2]], double [[CALL2]], double [[CALL2]])
+  %2 = call contract double @llvm.fma.f64(double %call2, double %call2, double %call2)
+  ; CHECK-NEXT:    [[TMP3:%.*]] = call contract float @llvm.fma.f32(float [[CALL3]], float [[CALL3]], float [[CALL3]])
+  %3 = call contract float @llvm.fma.f32(float %call3, float %call3, float %call3)
+  ; CHECK-NEXT:    [[CALL4:%.*]] = call contract double @__hipstdpar_fdim_f64(double noundef [[TMP2]], double noundef [[TMP2]]) #[[ATTR4]]
+  %call4 = call contract double @fdim(double noundef %2, double noundef %2) #4
+  ; CHECK-NEXT:    [[CALL5:%.*]] = call contract float @__hipstdpar_fdim_f32(float noundef [[TMP3]], float noundef [[TMP3]]) #[[ATTR4]]
+  %call5 = call contract float @fdimf(float noundef %3, float noundef %3) #4
+  ; CHECK-NEXT:    [[TMP4:%.*]] = call contract double @__hipstdpar_exp_f64(double [[CALL4]])
+  %4 = call contract double @llvm.exp.f64(double %call4)
+  ; CHECK-NEXT:    [[TMP5:%.*]] = call contract float @llvm.exp.f32(float [[CALL5]])
+  %5 = call contract float @llvm.exp.f32(float %call5)
+  ; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @__hipstdpar_exp2_f64(double [[TMP4]])
+  %6 = call contract double @llvm.exp2.f64(double %4)
+  ; CHECK-NEXT:    [[TMP7:%.*]] = call contract float @llvm.exp2.f32(float [[TMP5]])
+  %7 = call contract float @llvm.exp2.f32(float %5)
+  ; CHECK-NEXT:    [[CALL6:%.*]] = call contract double @__hipstdpar_expm1_f64(double noundef [[TMP6]]) #[[ATTR4]]
+  %call6 = call contract double @expm1(double noundef %6) #4
+  ; CHECK-NEXT:    [[CALL7:%.*]] = call contract float @__hipstdpar_expm1_f32(float noundef [[TMP7]]) #[[ATTR4]]
+  %call7 = call contract float @expm1f(float noundef %7) #4
+  ; CHECK-NEXT:    [[TMP8:%.*]] = call contract double @__hipstdpar_log_f64(double [[CALL6]])
+  %8 = call contract double @llvm.log.f64(double %call6)
+  ; CHECK-NEXT:    [[TMP9:%.*]] = call contract float @llvm.log.f32(float [[CALL7]])
+  %9 = call contract float @llvm.log.f32(float %call7)
+  ; CHECK-NEXT:    [[TMP10:%.*]] = call contract double @__hipstdpar_log10_f64(double [[TMP8]])
+  %10 = call contract double @llvm.log10.f64(double %8)
+  ; CHECK-NEXT:    [[TMP11:%.*]] = call contract float @llvm.log10.f32(float [[TMP9]])
+  %11 = call contract float @llvm.log10.f32(float %9)
+  ; CHECK-NEXT:    [[TMP12:%.*]] = call contract double @__hipstdpar_log2_f64(double [[TMP10]])
+  %12 = call contract double @llvm.log2.f64(double %10)
+  ; CHECK-NEXT:    [[TMP13:%.*]] = call contract float @llvm.log2.f32(float [[TMP11]])
+  %13 = call contract float @llvm.log2.f32(float %11)
+  ; CHECK-NEXT:    [[CALL8:%.*]] = call contract double @__hipstdpar_log1p_f64(double noundef [[TMP12]]) #[[ATTR4]]
+  %call8 = call contract double @log1p(double noundef %12) #4
+  ; CHECK-NEXT:    [[CALL9:%.*]] = call contract float @__hipstdpar_log1p_f32(float noundef [[TMP13]]) #[[ATTR4]]
+  %call9 = call contract float @log1pf(float noundef %13) #4
+  ; CHECK-NEXT:    [[TMP14:%.*]] = call contract float @llvm.pow.f32(float [[CALL9]], float [[CALL9]])
+  %14 = call contract float @llvm.pow.f32(float %call9, float %call9)
+  ; CHECK-NEXT:    [[TMP15:%.*]] = call contract double @llvm.sqrt.f64(double [[CALL8]])
+  %15 = call contract double @llvm.sqrt.f64(double %call8)
+  ; CHECK-NEXT:    [[TMP16:%.*]] = call contract float @llvm.sqrt.f32(float [[TMP14]])
+  %16 = call contract float @llvm.sqrt.f32(float %14)
+  ; CHECK-NEXT:    [[CALL10:%.*]] = call contract double @__hipstdpar_cbrt_f64(double noundef [[TMP15]]) #[[ATTR4]]
+  %call10 = call contract double @cbrt(double noundef %15) #4
+  ; CHECK-NEXT:    [[CALL11:%.*]] = call contract float @__hipstdpar_cbrt_f32(float noundef [[TMP16]]) #[[ATTR4]]
+  %call11 = call contract float @cbrtf(float noundef %16) #4
+  ; CHECK-NEXT:    [[CALL12:%.*]] = call contract double @__hipstdpar_hypot_f64(double noundef [[CALL10]], double noundef [[CALL10]]) #[[ATTR4]]
+  %call12 = call contract double @hypot(double noundef %call10, double noundef %call10) #4
+  ; CHECK-NEXT:    [[CALL13:%.*]] = call contract float @__hipstdpar_hypot_f32(float noundef [[CALL11]], float noundef [[CALL11]]) #[[ATTR4]]
+  %call13 = call contract float @hypotf(float noundef %call11, float noundef %call11) #4
+  ; CHECK-NEXT:    [[TMP17:%.*]] = call contract float @llvm.sin.f32(float [[CALL13]])
+  %17 = call contract float @llvm.sin.f32(float %call13)
+  ; CHECK-NEXT:    [[TMP18:%.*]] = call contract float @llvm.cos.f32(float [[TMP17]])
+  %18 = call contract float @llvm.cos.f32(float %17)
+  ; CHECK-NEXT:    [[TMP19:%.*]] = call contract double @__hipstdpar_tan_f64(double [[CALL12]])
+  %19 = call contract double @llvm.tan.f64(double %call12)
+  ; CHECK-NEXT:    [[TMP20:%.*]] = call contract double @__hipstdpar_asin_f64(double [[TMP19]])
+  %20 = call contract double @llvm.asin.f64(double %19)
+  ; CHECK-NEXT:    [[TMP21:%.*]] = call contract double @__hipstdpar_acos_f64(double [[TMP20]])
+  %21 = call contract double @llvm.acos.f64(double %20)
+  ; CHECK-NEXT:    [[TMP22:%.*]] = call contract double @__hipstdpar_atan_f64(double [[TMP21]])
+  %22 = call contract double @llvm.atan.f64(double %21)
+  ; CHECK-NEXT:    [[TMP23:%.*]] = call contract double @__hipstdpar_atan2_f64(double [[TMP22]], double [[TMP22]])
+  %23 = call contract double @llvm.atan2.f64(double %22, double %22)
+  ; CHECK-NEXT:    [[TMP24:%.*]] = call contract double @__hipstdpar_sinh_f64(double [[TMP23]])
+  %24 = call contract double @llvm.sinh.f64(double %23)
+  ; CHECK-NEXT:    [[TMP25:%.*]] = call contract double @__hipstdpar_cosh_f64(double [[TMP24]])
+  %25 = call contract double @llvm.cosh.f64(double %24)
+  ; CHECK-NEXT:    [[TMP26:%.*]] = call contract double @__hipstdpar_tanh_f64(double [[TMP25]])
+  %26 = call contract double @llvm.tanh.f64(double %25)
+  ; CHECK-NEXT:    [[CALL14:%.*]] = call contract double @__hipstdpar_asinh_f64(double noundef [[TMP26]]) #[[ATTR4]]
+  %call14 = call contract double @asinh(double noundef %26) #4
+  ; CHECK-NEXT:    [[CALL15:%.*]] = call contract float @__hipstdpar_asinh_f32(float noundef [[TMP18]]) #[[ATTR4]]
+  %call15 = call contract float @asinhf(float noundef %18) #4
+  ; CHECK-NEXT:    [[CALL16:%.*]] = call contract double @__hipstdpar_acosh_f64(double noundef [[CALL14]]) #[[ATTR4]]
+  %call16 = call contract double @acosh(double noundef %call14) #4
+  ; CHECK-NEXT:    [[CALL17:%.*]] = call contract float @__hipstdpar_acosh_f32(float noundef [[CALL15]]) #[[ATTR4]]
+  %call17 = call contract float @acoshf(float noundef %call15) #4
+  ; CHECK-NEXT:    [[CALL18:%.*]] = call contract double @__hipstdpar_atanh_f64(double noundef [[CALL16]]) #[[ATTR4]]
+  %call18 = call contract double @atanh(double noundef %call16) #4
+  ; CHECK-NEXT:    [[CALL19:%.*]] = call contract float @__hipstdpar_atanh_f32(float noundef [[CALL17]]) #[[ATTR4]]
+  %call19 = call contract float @atanhf(float noundef %call17) #4
+  ; CHECK-NEXT:    [[CALL20:%.*]] = call contract double @__hipstdpar_erf_f64(double noundef [[CALL18]]) #[[ATTR4]]
+  %call20 = call contract double @erf(double noundef %call18) #4
+  ; CHECK-NEXT:    [[CALL21:%.*]] = call contract float @__hipstdpar_erf_f32(float noundef [[CALL19]]) #[[ATTR4]]
+  %call21 = call contract float @erff(float noundef %call19) #4
+  ; CHECK-NEXT:    [[CALL22:%.*]] = call contract double @__hipstdpar_erfc_f64(double noundef [[CALL20]]) #[[ATTR4]]
+  %call22 = call contract double @erfc(double noundef %call20) #4
+  ; CHECK-NEXT:    [[CALL23:%.*]] = call contract float @__hipstdpar_erfc_f32(float noundef [[CALL21]]) #[[ATTR4]]
+  %call23 = call contract float @erfcf(float noundef %call21) #4
+  ; CHECK-NEXT:    [[CALL24:%.*]] = call contract double @__hipstdpar_tgamma_f64(double noundef [[CALL22]]) #[[ATTR4]]
+  %call24 = call contract double @tgamma(double noundef %call22) #4
+  ; CHECK-NEXT:    [[CALL25:%.*]] = call contract float @__hipstdpar_tgamma_f32(float noundef [[CALL23]]) #[[ATTR4]]
+  %call25 = call contract float @tgammaf(float noundef %call23) #4
+  ; CHECK-NEXT:    [[CALL26:%.*]] = call contract double @__hipstdpar_lgamma_f64(double noundef [[CALL24]]) #[[ATTR3]]
+  %call26 = call contract double @lgamma(double noundef %call24) #5
+  ; CHECK-NEXT:    [[CALL27:%.*]] = call contract float @__hipstdpar_lgamma_f32(float noundef [[CALL25]]) #[[ATTR3]]
+  %call27 = call contract float @lgammaf(float noundef %call25) #5
+  ret void
+}
+
+declare double @llvm.fabs.f64(double) #1
+
+declare float @llvm.fabs.f32(float) #1
+
+declare hidden double @remainder(double noundef, double noundef) local_unnamed_addr #2
+
+declare hidden float @remainderf(float noundef, float noundef) local_unnamed_addr #2
+
+declare hidden double @remquo(double noundef, double noundef, ptr noundef) local_unnamed_addr #3
+
+declare hidden float @remquof(float noundef, float noundef, ptr noundef) local_unnamed_addr #3
+
+declare double @llvm.fma.f64(double, double, double) #1
+
+declare float @llvm.fma.f32(float, float, float) #1
+
+declare hidden double @fdim(double noundef, double noundef) local_unnamed_addr #2
+
+declare hidden float @fdimf(float noundef, float noundef) local_unnamed_addr #2
+
+declare double @llvm.exp.f64(double) #1
+
+declare float @llvm.exp.f32(float) #1
+
+declare double @llvm.exp2.f64(double) #1
+
+declare float @llvm.exp2.f32(float) #1
+
+declare hidden double @expm1(double noundef) local_unnamed_addr #2
+
+declare hidden float @expm1f(float noundef) local_unnamed_addr #2
+
+declare double @llvm.log.f64(double) #1
+
+declare float @llvm.log.f32(float) #1
+
+declare double @llvm.log10.f64(double) #1
+
+declare float @llvm.log10.f32(float) #1
+
+declare double @llvm.log2.f64(double) #1
+
+declare float @llvm.log2.f32(float) #1
+
+declare hidden double @log1p(double noundef) local_unnamed_addr #2
+
+declare hidden float @log1pf(float noundef) local_unnamed_addr #2
+
+declare float @llvm.pow.f32(float, float) #1
+
+declare double @llvm.sqrt.f64(double) #1
+
+declare float @llvm.sqrt.f32(float) #1
+
+declare hidden double @cbrt(double noundef) local_unnamed_addr #2
+
+declare hidden float @cbrtf(float noundef) local_unnamed_addr #2
+
+declare hidden double @hypot(double noundef, double noundef) local_unnamed_addr #2
+
+declare hidden float @hypotf(float noundef, float noundef) local_unnamed_addr #2
+
+declare float @llvm.sin.f32(float) #1
+
+declare float @llvm.cos.f32(float) #1
+
+declare double @llvm.tan.f64(double) #1
+
+declare double @llvm.asin.f64(double) #1
+
+declare double @llvm.acos.f64(double) #1
+
+declare double @llvm.atan.f64(double) #1
+
+declare double @llvm.atan2.f64(double, double) #1
+
+declare double @llvm.sinh.f64(double) #1
+
+declare double @llvm.cosh.f64(double) #1
+
+declare double @llvm.tanh.f64(double) #1
+
+declare hidden double @asinh(double noundef) local_unnamed_addr #2
+
+declare hidden float @asinhf(float noundef) local_unnamed_addr #2
+
+declare hidden double @acosh(double noundef) local_unnamed_addr #2
+
+declare hidden float @acoshf(float noundef) local_unnamed_addr #2
+
+declare hidden double @atanh(double noundef) local_unnamed_addr #2
+
+declare hidden float @atanhf(float noundef) local_unnamed_addr #2
+
+declare hidden double @erf(double noundef) local_unnamed_addr #2
+
+declare hidden float @erff(float noundef) local_unnamed_addr #2
+
+declare hidden double @erfc(double noundef) local_unnamed_addr #2
+
+declare hidden float @erfcf(float noundef) local_unnamed_addr #2
+
+declare hidden double @tgamma(double noundef) local_unnamed_addr #2
+
+declare hidden float @tgammaf(float noundef) local_unnamed_addr #2
+
+declare hidden double @lgamma(double noundef) local_unnamed_addr #3
+
+declare hidden float @lgammaf(float noundef) local_unnamed_addr #3
+
+attributes #0 = { convergent mustprogress norecurse nounwind }
+attributes #1 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { convergent mustprogress nofree nounwind willreturn memory(none) }
+attributes #3 = { convergent nounwind }
+attributes #4 = { convergent nounwind willreturn memory(none) }
+attributes #5 = { convergent nounwind }

>From 95f600547a974ae23fd40fdd8a88bdcfeabdee3d Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 31 Oct 2025 19:45:42 +0200
Subject: [PATCH 2/2] Add support for `SPV_INTEL_kernel_attributes`.

---
 llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp     |  8 ++++-
 llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp    |  4 ++-
 llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp |  5 +++
 .../lib/Target/SPIRV/SPIRVSymbolicOperands.td | 12 ++++++-
 .../max_work_group_size.ll                    | 32 +++++++++++++++++++
 5 files changed, 58 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_kernel_attributes/max_work_group_size.ll

diff --git a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
index 640b014646f36..46d0a6724a74b 100644
--- a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
@@ -577,6 +577,11 @@ void SPIRVAsmPrinter::outputExecutionMode(const Module &M) {
     if (MDNode *Node = F.getMetadata("intel_reqd_sub_group_size"))
       outputExecutionModeFromMDNode(FReg, Node,
                                     SPIRV::ExecutionMode::SubgroupSize, 0, 0);
+    if (MDNode *Node = F.getMetadata("max_work_group_size")) {
+      if (ST->canUseExtension(SPIRV::Extension::SPV_INTEL_kernel_attributes))
+        outputExecutionModeFromMDNode(
+            FReg, Node, SPIRV::ExecutionMode::MaxWorkgroupSizeINTEL, 3, 1);
+    }
     if (MDNode *Node = F.getMetadata("vec_type_hint")) {
       MCInst Inst;
       Inst.setOpcode(SPIRV::OpExecutionMode);
@@ -628,7 +633,8 @@ void SPIRVAsmPrinter::outputExecutionMode(const Module &M) {
             // Check if the constant is int32, if not skip it.
             const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
             MachineInstr *TypeMI = MRI.getVRegDef(MI->getOperand(1).getReg());
-            if (!TypeMI || TypeMI->getOperand(1).getImm() != 32)
+            if (!TypeMI || !TypeMI->getOperand(1).isImm() ||
+                TypeMI->getOperand(1).getImm() != 32)
               continue;
 
             ConstZero = MI;
diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
index 96f5dee21bc2a..f0558ebcb6681 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
@@ -155,7 +155,9 @@ static const std::map<std::string, SPIRV::Extension::Extension, std::less<>>
         {"SPV_INTEL_predicated_io",
          SPIRV::Extension::Extension::SPV_INTEL_predicated_io},
         {"SPV_KHR_maximal_reconvergence",
-         SPIRV::Extension::Extension::SPV_KHR_maximal_reconvergence}};
+         SPIRV::Extension::Extension::SPV_KHR_maximal_reconvergence},
+        {"SPV_INTEL_kernel_attributes",
+         SPIRV::Extension::Extension::SPV_INTEL_kernel_attributes}};
 
 bool SPIRVExtensionsParser::parse(cl::Option &O, StringRef ArgName,
                                   StringRef ArgValue,
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index f7cdfcb65623b..5db592b2c097e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -2181,6 +2181,11 @@ static void collectReqs(const Module &M, SPIRV::ModuleAnalysisInfo &MAI,
       MAI.Reqs.getAndAddRequirements(
           SPIRV::OperandCategory::ExecutionModeOperand,
           SPIRV::ExecutionMode::SubgroupSize, ST);
+    if (F.getMetadata("max_work_group_size"))
+      MAI.Reqs.getAndAddRequirements(
+          SPIRV::OperandCategory::ExecutionModeOperand,
+          SPIRV::ExecutionMode::MaxWorkgroupSizeINTEL,
+          ST);
     if (F.getMetadata("vec_type_hint"))
       MAI.Reqs.getAndAddRequirements(
           SPIRV::OperandCategory::ExecutionModeOperand,
diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
index 7d08b29a51a6e..e4c355eaf5185 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
+++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
@@ -587,6 +587,9 @@ defm CooperativeMatrixBFloat16ComponentTypeINTEL : CapabilityOperand<6437, 0, 0,
 defm RoundToInfinityINTEL : CapabilityOperand<5582, 0, 0, [SPV_INTEL_float_controls2], []>;
 defm FloatingPointModeINTEL : CapabilityOperand<5583, 0, 0, [SPV_INTEL_float_controls2], []>;
 defm FunctionFloatControlINTEL : CapabilityOperand<5821, 0, 0, [SPV_INTEL_float_controls2], []>;
+defm KernelAttributesINTEL : CapabilityOperand<5892, 0, 0, [SPV_INTEL_kernel_attributes], [Kernel]>;
+defm FPGAKernelAttributesINTEL : CapabilityOperand<5897, 0, 0, [SPV_INTEL_kernel_attributes], [Kernel]>;
+defm FPGAKernelAttributesv2INTEL : CapabilityOperand<6161, 0, 0, [SPV_INTEL_kernel_attributes], [Kernel]>;
 defm LongCompositesINTEL : CapabilityOperand<6089, 0, 0, [SPV_INTEL_long_composites], []>;
 defm BindlessImagesINTEL : CapabilityOperand<6528, 0, 0, [SPV_INTEL_bindless_images], []>;
 defm MemoryAccessAliasingINTEL : CapabilityOperand<5910, 0, 0, [SPV_INTEL_memory_access_aliasing], []>;
@@ -805,6 +808,13 @@ defm RoundingModeRTPINTEL : ExecutionModeOperand<5620, [RoundToInfinityINTEL]>;
 defm RoundingModeRTNINTEL : ExecutionModeOperand<5621, [RoundToInfinityINTEL]>;
 defm FloatingPointModeALTINTEL : ExecutionModeOperand<5622, [FloatingPointModeINTEL]>;
 defm FloatingPointModeIEEEINTEL : ExecutionModeOperand<5623, [FloatingPointModeINTEL]>;
+defm MaxWorkgroupSizeINTEL : ExecutionModeOperand<5893, [KernelAttributesINTEL]>;
+defm MaxWorkDimINTEL : ExecutionModeOperand<5894, [KernelAttributesINTEL]>;
+defm NoGlobalOffsetINTEL : ExecutionModeOperand<5895, [KernelAttributesINTEL]>;
+defm NumSIMDWorkitemsINTEL : ExecutionModeOperand<5896, [FPGAKernelAttributesINTEL]>;
+defm SchedulerTargetFmaxMhzINTEL : ExecutionModeOperand<5903, [FPGAKernelAttributesINTEL]>;
+defm StreamingInterfaceINTEL : ExecutionModeOperand<6154, [FPGAKernelAttributesv2INTEL]>;
+defm RegisterMapInterfaceINTEL : ExecutionModeOperand<6160, [FPGAKernelAttributesv2INTEL]>;
 defm FPFastMathDefault : ExecutionModeOperand<6028, [FloatControls2]>;
 defm MaximallyReconvergesKHR : ExecutionModeOperand<6023, [Shader]>;
 
@@ -1919,7 +1929,7 @@ defm GenericCastToPtr :  SpecConstantOpOperandsOperand<122, [], [Kernel]>;
 defm PtrCastToGeneric :  SpecConstantOpOperandsOperand<121, [], [Kernel]>;
 defm Bitcast :  SpecConstantOpOperandsOperand<124, [], []>;
 defm QuantizeToF16 :  SpecConstantOpOperandsOperand<116, [], [Shader]>;
-// Arithmetic 
+// Arithmetic
 defm SNegate :  SpecConstantOpOperandsOperand<126, [], []>;
 defm Not :  SpecConstantOpOperandsOperand<200, [], []>;
 defm IAdd :  SpecConstantOpOperandsOperand<128, [], []>;
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_kernel_attributes/max_work_group_size.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_kernel_attributes/max_work_group_size.ll
new file mode 100644
index 0000000000000..717771c965496
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_kernel_attributes/max_work_group_size.ll
@@ -0,0 +1,32 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_kernel_attributes %s -o - | FileCheck %s
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-amd-amdhsa %s -o - | FileCheck %s
+; %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_kernel_attributes %s -o - -filetype=obj | spirv-val %}
+; %if spirv-tools %{ llc -O0 -mtriple=spirv64-amd-amdhsa %s -o - -filetype=obj | spirv-val %}
+
+; CHECK: OpCapability KernelAttributesINTEL
+; CHECK: OpExtension "SPV_INTEL_kernel_attributes"
+; CHECK: OpEntryPoint {{.*}} %[[DIM1:[0-9]+]] "Dim1"
+; CHECK: OpEntryPoint {{.*}} %[[DIM2:[0-9]+]] "Dim2"
+; CHECK: OpEntryPoint {{.*}} %[[DIM3:[0-9]+]] "Dim3"
+; CHECK: OpExecutionMode %[[DIM1]] MaxWorkgroupSizeINTEL 4 1 1
+; CHECK: OpExecutionMode %[[DIM2]] MaxWorkgroupSizeINTEL 8 4 1
+; CHECK: OpExecutionMode %[[DIM3]] MaxWorkgroupSizeINTEL 16 8 4
+; CHECK: %[[DIM1]] = OpFunction
+; CHECK: %[[DIM2]] = OpFunction
+; CHECK: %[[DIM3]] = OpFunction
+
+define spir_kernel void @Dim1() !max_work_group_size !0 {
+  ret void
+}
+
+define spir_kernel void @Dim2() !max_work_group_size !1 {
+  ret void
+}
+
+define spir_kernel void @Dim3() !max_work_group_size !2 {
+  ret void
+}
+
+!0 = !{i32 4}
+!1 = !{i32 8, i32 4}
+!2 = !{i32 16, i32 8, i32 4}