[clang] [llvm] [NFC][SPIRV] Re-factor feature map initialisation for AMDGCN flavoured SPIR-V (PR #192067)

Alex Voicu via cfe-commits cfe-commits at lists.llvm.org
Tue Apr 14 07:29:28 PDT 2026


https://github.com/AlexVlx created https://github.com/llvm/llvm-project/pull/192067

AMDGCN flavoured SPIR-V must support the union of all AMDGCN features, as we cannot early adjudicate on this or that feature's availability. We were hand filling in the feature map, which was error prone and led to constant grind as new features were added. This patch moves to a programmatic approach where we iterate through all AMDGCN GPUs and collect features, thus establishing the union. With this change AMDGCN flavoured SPIR-V will automatically pick up new features as they come along.

>From 3e0abe10c53e08cc1ebf16662d8d58cd7245f70f Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Thu, 15 May 2025 23:27:42 +0100
Subject: [PATCH 1/8] Add pass which forwards unimplemented math builtins /
 libcalls to the HIPSTDPAR runtime component.

---
 .../llvm/Transforms/HipStdPar/HipStdPar.h     |   7 +
 llvm/lib/Passes/PassRegistry.def              |   1 +
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   8 +-
 llvm/lib/Transforms/HipStdPar/HipStdPar.cpp   | 117 +++++++++
 llvm/test/Transforms/HipStdPar/math-fixup.ll  | 240 ++++++++++++++++++
 5 files changed, 371 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Transforms/HipStdPar/math-fixup.ll

diff --git a/llvm/include/llvm/Transforms/HipStdPar/HipStdPar.h b/llvm/include/llvm/Transforms/HipStdPar/HipStdPar.h
index 5ff38bdf04812..27195051ed7eb 100644
--- a/llvm/include/llvm/Transforms/HipStdPar/HipStdPar.h
+++ b/llvm/include/llvm/Transforms/HipStdPar/HipStdPar.h
@@ -40,6 +40,13 @@ class HipStdParAllocationInterpositionPass
   static bool isRequired() { return true; }
 };
 
+class HipStdParMathFixupPass : public PassInfoMixin<HipStdParMathFixupPass> {
+public:
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
+
+  static bool isRequired() { return true; }
+};
+
 } // namespace llvm
 
 #endif // LLVM_TRANSFORMS_HIPSTDPAR_HIPSTDPAR_H
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 94dabe290213d..3acdbf4d49fde 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -80,6 +80,7 @@ MODULE_PASS("global-merge-func", GlobalMergeFuncPass())
 MODULE_PASS("globalopt", GlobalOptPass())
 MODULE_PASS("globalsplit", GlobalSplitPass())
 MODULE_PASS("hipstdpar-interpose-alloc", HipStdParAllocationInterpositionPass())
+MODULE_PASS("hipstdpar-math-fixup", HipStdParMathFixupPass())
 MODULE_PASS("hipstdpar-select-accelerator-code",
             HipStdParAcceleratorCodeSelectionPass())
 MODULE_PASS("hotcoldsplit", HotColdSplittingPass())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index ccb251b730f16..c3f8cee1e1783 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -819,8 +819,10 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
           // When we are not using -fgpu-rdc, we can run accelerator code
           // selection relatively early, but still after linking to prevent
           // eager removal of potentially reachable symbols.
-          if (EnableHipStdPar)
+          if (EnableHipStdPar) {
+            PM.addPass(HipStdParMathFixupPass());
             PM.addPass(HipStdParAcceleratorCodeSelectionPass());
+          }
           PM.addPass(AMDGPUPrintfRuntimeBindingPass());
         }
 
@@ -899,8 +901,10 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
         // selection after linking to prevent, otherwise we end up removing
         // potentially reachable symbols that were exported as external in other
         // modules.
-        if (EnableHipStdPar)
+        if (EnableHipStdPar) {
+          PM.addPass(HipStdParMathFixupPass());
           PM.addPass(HipStdParAcceleratorCodeSelectionPass());
+        }
         // We want to support the -lto-partitions=N option as "best effort".
         // For that, we need to lower LDS earlier in the pipeline before the
         // module is partitioned for codegen.
diff --git a/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp b/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp
index 5a87cf8c83d79..815878089c69e 100644
--- a/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp
+++ b/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp
@@ -37,6 +37,16 @@
 //    memory that ends up in one of the runtime equivalents, since this can
 //    happen if e.g. a library that was compiled without interposition returns
 //    an allocation that can be validly passed to `free`.
+//
+// 3. MathFixup (required): Some accelerators might have an incomplete
+//    implementation for the intrinsics used to implement some of the math
+//    functions in <cmath> / their corresponding libcall lowerings. Since this
+//    can vary quite significantly between accelerators, we replace calls to a
+//    set of intrinsics / lib functions known to be problematic with calls to a
+//    HIPSTDPAR specific forwarding layer, which gives an uniform interface for
+//    accelerators to implement in their own runtime components. This pass
+//    should run before AcceleratorCodeSelection so as to prevent the spurious
+//    removal of the HIPSTDPAR specific forwarding functions.
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/HipStdPar/HipStdPar.h"
@@ -48,6 +58,7 @@
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 
@@ -321,3 +332,109 @@ HipStdParAllocationInterpositionPass::run(Module &M, ModuleAnalysisManager&) {
 
   return PreservedAnalyses::none();
 }
+
+static constexpr std::pair<StringLiteral, StringLiteral> MathLibToHipStdPar[]{
+    {"acosh", "__hipstdpar_acosh_f64"},
+    {"acoshf", "__hipstdpar_acosh_f32"},
+    {"asinh", "__hipstdpar_asinh_f64"},
+    {"asinhf", "__hipstdpar_asinh_f32"},
+    {"atanh", "__hipstdpar_atanh_f64"},
+    {"atanhf", "__hipstdpar_atanh_f32"},
+    {"cbrt", "__hipstdpar_cbrt_f64"},
+    {"cbrtf", "__hipstdpar_cbrt_f32"},
+    {"erf", "__hipstdpar_erf_f64"},
+    {"erff", "__hipstdpar_erf_f32"},
+    {"erfc", "__hipstdpar_erfc_f64"},
+    {"erfcf", "__hipstdpar_erfc_f32"},
+    {"fdim", "__hipstdpar_fdim_f64"},
+    {"fdimf", "__hipstdpar_fdim_f32"},
+    {"expm1", "__hipstdpar_expm1_f64"},
+    {"expm1f", "__hipstdpar_expm1_f32"},
+    {"hypot", "__hipstdpar_hypot_f64"},
+    {"hypotf", "__hipstdpar_hypot_f32"},
+    {"ilogb", "__hipstdpar_ilogb_f64"},
+    {"ilogbf", "__hipstdpar_ilogb_f32"},
+    {"lgamma", "__hipstdpar_lgamma_f64"},
+    {"lgammaf", "__hipstdpar_lgamma_f32"},
+    {"log1p", "__hipstdpar_log1p_f64"},
+    {"log1pf", "__hipstdpar_log1p_f32"},
+    {"logb", "__hipstdpar_logb_f64"},
+    {"logbf", "__hipstdpar_logb_f32"},
+    {"nextafter", "__hipstdpar_nextafter_f64"},
+    {"nextafterf", "__hipstdpar_nextafter_f32"},
+    {"nexttoward", "__hipstdpar_nexttoward_f64"},
+    {"nexttowardf", "__hipstdpar_nexttoward_f32"},
+    {"remainder", "__hipstdpar_remainder_f64"},
+    {"remainderf", "__hipstdpar_remainder_f32"},
+    {"remquo", "__hipstdpar_remquo_f64"},
+    {"remquof", "__hipstdpar_remquo_f32"},
+    {"scalbln", "__hipstdpar_scalbln_f64"},
+    {"scalblnf", "__hipstdpar_scalbln_f32"},
+    {"scalbn", "__hipstdpar_scalbn_f64"},
+    {"scalbnf", "__hipstdpar_scalbn_f32"},
+    {"tgamma", "__hipstdpar_tgamma_f64"},
+    {"tgammaf", "__hipstdpar_tgamma_f32"}};
+
+PreservedAnalyses HipStdParMathFixupPass::run(Module &M,
+                                              ModuleAnalysisManager &) {
+  if (M.empty())
+    return PreservedAnalyses::all();
+
+  SmallVector<std::pair<Function *, std::string>> ToReplace;
+  for (auto &&F : M) {
+    if (!F.hasName())
+      continue;
+
+    auto N = F.getName().str();
+    auto ID = F.getIntrinsicID();
+
+    switch (ID) {
+    case Intrinsic::not_intrinsic: {
+      auto It = find_if(MathLibToHipStdPar,
+                        [&](auto &&M) { return M.first == N; });
+      if (It == std::cend(MathLibToHipStdPar))
+        continue;
+      ToReplace.emplace_back(&F, It->second);
+      break;
+    }
+    case Intrinsic::acos:
+    case Intrinsic::asin:
+    case Intrinsic::atan:
+    case Intrinsic::atan2:
+    case Intrinsic::cosh:
+    case Intrinsic::modf:
+    case Intrinsic::sinh:
+    case Intrinsic::tan:
+    case Intrinsic::tanh:
+      break;
+    default: {
+      if (F.getReturnType()->isDoubleTy()) {
+        switch (ID) {
+        case Intrinsic::cos:
+        case Intrinsic::exp:
+        case Intrinsic::exp2:
+        case Intrinsic::log:
+        case Intrinsic::log10:
+        case Intrinsic::log2:
+        case Intrinsic::pow:
+        case Intrinsic::sin:
+          break;
+        default:
+          continue;
+        }
+        break;
+      }
+      continue;
+    }
+    }
+
+    llvm::replace(N, '.', '_');
+    N.replace(0, sizeof("llvm"), "__hipstdpar_");
+    ToReplace.emplace_back(&F, std::move(N));
+  }
+  for (auto &&F : ToReplace)
+    F.first->replaceAllUsesWith(M.getOrInsertFunction(
+        F.second, F.first->getFunctionType()).getCallee());
+
+  return PreservedAnalyses::none();
+}
\ No newline at end of file
diff --git a/llvm/test/Transforms/HipStdPar/math-fixup.ll b/llvm/test/Transforms/HipStdPar/math-fixup.ll
new file mode 100644
index 0000000000000..e0e2ca79c0843
--- /dev/null
+++ b/llvm/test/Transforms/HipStdPar/math-fixup.ll
@@ -0,0 +1,240 @@
+; RUN: opt -S -passes=hipstdpar-math-fixup %s | FileCheck %s
+
+define void @foo(double noundef %dbl, float noundef %flt, i32 noundef %quo) #0 {
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: double noundef [[DBL:%.*]], float noundef [[FLT:%.*]], i32 noundef [[QUO:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[QUO_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[QUO]], ptr [[QUO_ADDR]], align 4
+entry:
+  %quo.addr = alloca i32, align 4
+  store i32 %quo, ptr %quo.addr, align 4
+  ; CHECK-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.fabs.f64(double [[DBL]])
+  %0 = tail call contract double @llvm.fabs.f64(double %dbl)
+  ; CHECK-NEXT:    [[TMP1:%.*]] = tail call contract float @llvm.fabs.f32(float [[FLT]])
+  %1 = tail call contract float @llvm.fabs.f32(float %flt)
+  ; CHECK-NEXT:    [[CALL:%.*]] = tail call contract double @__hipstdpar_remainder_f64(double noundef [[TMP0]], double noundef [[TMP0]]) #[[ATTR4:[0-9]+]]
+  %call = tail call contract double @remainder(double noundef %0, double noundef %0) #4
+  ; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @__hipstdpar_remainder_f32(float noundef [[TMP1]], float noundef [[TMP1]]) #[[ATTR4]]
+  %call1 = tail call contract float @remainderf(float noundef %1, float noundef %1) #4
+  ; CHECK-NEXT:    [[CALL2:%.*]] = call contract double @__hipstdpar_remquo_f64(double noundef [[CALL]], double noundef [[CALL]], ptr noundef nonnull [[QUO_ADDR]]) #[[ATTR3:[0-9]+]]
+  %call2 = call contract double @remquo(double noundef %call, double noundef %call, ptr noundef nonnull %quo.addr) #5
+  ; CHECK-NEXT:    [[CALL3:%.*]] = call contract float @__hipstdpar_remquo_f32(float noundef [[CALL1]], float noundef [[CALL1]], ptr noundef nonnull [[QUO_ADDR]]) #[[ATTR3]]
+  %call3 = call contract float @remquof(float noundef %call1, float noundef %call1, ptr noundef nonnull %quo.addr) #5
+  ; CHECK-NEXT:    [[TMP2:%.*]] = call contract double @llvm.fma.f64(double [[CALL2]], double [[CALL2]], double [[CALL2]])
+  %2 = call contract double @llvm.fma.f64(double %call2, double %call2, double %call2)
+  ; CHECK-NEXT:    [[TMP3:%.*]] = call contract float @llvm.fma.f32(float [[CALL3]], float [[CALL3]], float [[CALL3]])
+  %3 = call contract float @llvm.fma.f32(float %call3, float %call3, float %call3)
+  ; CHECK-NEXT:    [[CALL4:%.*]] = call contract double @__hipstdpar_fdim_f64(double noundef [[TMP2]], double noundef [[TMP2]]) #[[ATTR4]]
+  %call4 = call contract double @fdim(double noundef %2, double noundef %2) #4
+  ; CHECK-NEXT:    [[CALL5:%.*]] = call contract float @__hipstdpar_fdim_f32(float noundef [[TMP3]], float noundef [[TMP3]]) #[[ATTR4]]
+  %call5 = call contract float @fdimf(float noundef %3, float noundef %3) #4
+  ; CHECK-NEXT:    [[TMP4:%.*]] = call contract double @__hipstdpar_exp_f64(double [[CALL4]])
+  %4 = call contract double @llvm.exp.f64(double %call4)
+  ; CHECK-NEXT:    [[TMP5:%.*]] = call contract float @llvm.exp.f32(float [[CALL5]])
+  %5 = call contract float @llvm.exp.f32(float %call5)
+  ; CHECK-NEXT:    [[TMP6:%.*]] = call contract double @__hipstdpar_exp2_f64(double [[TMP4]])
+  %6 = call contract double @llvm.exp2.f64(double %4)
+  ; CHECK-NEXT:    [[TMP7:%.*]] = call contract float @llvm.exp2.f32(float [[TMP5]])
+  %7 = call contract float @llvm.exp2.f32(float %5)
+  ; CHECK-NEXT:    [[CALL6:%.*]] = call contract double @__hipstdpar_expm1_f64(double noundef [[TMP6]]) #[[ATTR4]]
+  %call6 = call contract double @expm1(double noundef %6) #4
+  ; CHECK-NEXT:    [[CALL7:%.*]] = call contract float @__hipstdpar_expm1_f32(float noundef [[TMP7]]) #[[ATTR4]]
+  %call7 = call contract float @expm1f(float noundef %7) #4
+  ; CHECK-NEXT:    [[TMP8:%.*]] = call contract double @__hipstdpar_log_f64(double [[CALL6]])
+  %8 = call contract double @llvm.log.f64(double %call6)
+  ; CHECK-NEXT:    [[TMP9:%.*]] = call contract float @llvm.log.f32(float [[CALL7]])
+  %9 = call contract float @llvm.log.f32(float %call7)
+  ; CHECK-NEXT:    [[TMP10:%.*]] = call contract double @__hipstdpar_log10_f64(double [[TMP8]])
+  %10 = call contract double @llvm.log10.f64(double %8)
+  ; CHECK-NEXT:    [[TMP11:%.*]] = call contract float @llvm.log10.f32(float [[TMP9]])
+  %11 = call contract float @llvm.log10.f32(float %9)
+  ; CHECK-NEXT:    [[TMP12:%.*]] = call contract double @__hipstdpar_log2_f64(double [[TMP10]])
+  %12 = call contract double @llvm.log2.f64(double %10)
+  ; CHECK-NEXT:    [[TMP13:%.*]] = call contract float @llvm.log2.f32(float [[TMP11]])
+  %13 = call contract float @llvm.log2.f32(float %11)
+  ; CHECK-NEXT:    [[CALL8:%.*]] = call contract double @__hipstdpar_log1p_f64(double noundef [[TMP12]]) #[[ATTR4]]
+  %call8 = call contract double @log1p(double noundef %12) #4
+  ; CHECK-NEXT:    [[CALL9:%.*]] = call contract float @__hipstdpar_log1p_f32(float noundef [[TMP13]]) #[[ATTR4]]
+  %call9 = call contract float @log1pf(float noundef %13) #4
+  ; CHECK-NEXT:    [[TMP14:%.*]] = call contract float @llvm.pow.f32(float [[CALL9]], float [[CALL9]])
+  %14 = call contract float @llvm.pow.f32(float %call9, float %call9)
+  ; CHECK-NEXT:    [[TMP15:%.*]] = call contract double @llvm.sqrt.f64(double [[CALL8]])
+  %15 = call contract double @llvm.sqrt.f64(double %call8)
+  ; CHECK-NEXT:    [[TMP16:%.*]] = call contract float @llvm.sqrt.f32(float [[TMP14]])
+  %16 = call contract float @llvm.sqrt.f32(float %14)
+  ; CHECK-NEXT:    [[CALL10:%.*]] = call contract double @__hipstdpar_cbrt_f64(double noundef [[TMP15]]) #[[ATTR4]]
+  %call10 = call contract double @cbrt(double noundef %15) #4
+  ; CHECK-NEXT:    [[CALL11:%.*]] = call contract float @__hipstdpar_cbrt_f32(float noundef [[TMP16]]) #[[ATTR4]]
+  %call11 = call contract float @cbrtf(float noundef %16) #4
+  ; CHECK-NEXT:    [[CALL12:%.*]] = call contract double @__hipstdpar_hypot_f64(double noundef [[CALL10]], double noundef [[CALL10]]) #[[ATTR4]]
+  %call12 = call contract double @hypot(double noundef %call10, double noundef %call10) #4
+  ; CHECK-NEXT:    [[CALL13:%.*]] = call contract float @__hipstdpar_hypot_f32(float noundef [[CALL11]], float noundef [[CALL11]]) #[[ATTR4]]
+  %call13 = call contract float @hypotf(float noundef %call11, float noundef %call11) #4
+  ; CHECK-NEXT:    [[TMP17:%.*]] = call contract float @llvm.sin.f32(float [[CALL13]])
+  %17 = call contract float @llvm.sin.f32(float %call13)
+  ; CHECK-NEXT:    [[TMP18:%.*]] = call contract float @llvm.cos.f32(float [[TMP17]])
+  %18 = call contract float @llvm.cos.f32(float %17)
+  ; CHECK-NEXT:    [[TMP19:%.*]] = call contract double @__hipstdpar_tan_f64(double [[CALL12]])
+  %19 = call contract double @llvm.tan.f64(double %call12)
+  ; CHECK-NEXT:    [[TMP20:%.*]] = call contract double @__hipstdpar_asin_f64(double [[TMP19]])
+  %20 = call contract double @llvm.asin.f64(double %19)
+  ; CHECK-NEXT:    [[TMP21:%.*]] = call contract double @__hipstdpar_acos_f64(double [[TMP20]])
+  %21 = call contract double @llvm.acos.f64(double %20)
+  ; CHECK-NEXT:    [[TMP22:%.*]] = call contract double @__hipstdpar_atan_f64(double [[TMP21]])
+  %22 = call contract double @llvm.atan.f64(double %21)
+  ; CHECK-NEXT:    [[TMP23:%.*]] = call contract double @__hipstdpar_atan2_f64(double [[TMP22]], double [[TMP22]])
+  %23 = call contract double @llvm.atan2.f64(double %22, double %22)
+  ; CHECK-NEXT:    [[TMP24:%.*]] = call contract double @__hipstdpar_sinh_f64(double [[TMP23]])
+  %24 = call contract double @llvm.sinh.f64(double %23)
+  ; CHECK-NEXT:    [[TMP25:%.*]] = call contract double @__hipstdpar_cosh_f64(double [[TMP24]])
+  %25 = call contract double @llvm.cosh.f64(double %24)
+  ; CHECK-NEXT:    [[TMP26:%.*]] = call contract double @__hipstdpar_tanh_f64(double [[TMP25]])
+  %26 = call contract double @llvm.tanh.f64(double %25)
+  ; CHECK-NEXT:    [[CALL14:%.*]] = call contract double @__hipstdpar_asinh_f64(double noundef [[TMP26]]) #[[ATTR4]]
+  %call14 = call contract double @asinh(double noundef %26) #4
+  ; CHECK-NEXT:    [[CALL15:%.*]] = call contract float @__hipstdpar_asinh_f32(float noundef [[TMP18]]) #[[ATTR4]]
+  %call15 = call contract float @asinhf(float noundef %18) #4
+  ; CHECK-NEXT:    [[CALL16:%.*]] = call contract double @__hipstdpar_acosh_f64(double noundef [[CALL14]]) #[[ATTR4]]
+  %call16 = call contract double @acosh(double noundef %call14) #4
+  ; CHECK-NEXT:    [[CALL17:%.*]] = call contract float @__hipstdpar_acosh_f32(float noundef [[CALL15]]) #[[ATTR4]]
+  %call17 = call contract float @acoshf(float noundef %call15) #4
+  ; CHECK-NEXT:    [[CALL18:%.*]] = call contract double @__hipstdpar_atanh_f64(double noundef [[CALL16]]) #[[ATTR4]]
+  %call18 = call contract double @atanh(double noundef %call16) #4
+  ; CHECK-NEXT:    [[CALL19:%.*]] = call contract float @__hipstdpar_atanh_f32(float noundef [[CALL17]]) #[[ATTR4]]
+  %call19 = call contract float @atanhf(float noundef %call17) #4
+  ; CHECK-NEXT:    [[CALL20:%.*]] = call contract double @__hipstdpar_erf_f64(double noundef [[CALL18]]) #[[ATTR4]]
+  %call20 = call contract double @erf(double noundef %call18) #4
+  ; CHECK-NEXT:    [[CALL21:%.*]] = call contract float @__hipstdpar_erf_f32(float noundef [[CALL19]]) #[[ATTR4]]
+  %call21 = call contract float @erff(float noundef %call19) #4
+  ; CHECK-NEXT:    [[CALL22:%.*]] = call contract double @__hipstdpar_erfc_f64(double noundef [[CALL20]]) #[[ATTR4]]
+  %call22 = call contract double @erfc(double noundef %call20) #4
+  ; CHECK-NEXT:    [[CALL23:%.*]] = call contract float @__hipstdpar_erfc_f32(float noundef [[CALL21]]) #[[ATTR4]]
+  %call23 = call contract float @erfcf(float noundef %call21) #4
+  ; CHECK-NEXT:    [[CALL24:%.*]] = call contract double @__hipstdpar_tgamma_f64(double noundef [[CALL22]]) #[[ATTR4]]
+  %call24 = call contract double @tgamma(double noundef %call22) #4
+  ; CHECK-NEXT:    [[CALL25:%.*]] = call contract float @__hipstdpar_tgamma_f32(float noundef [[CALL23]]) #[[ATTR4]]
+  %call25 = call contract float @tgammaf(float noundef %call23) #4
+  ; CHECK-NEXT:    [[CALL26:%.*]] = call contract double @__hipstdpar_lgamma_f64(double noundef [[CALL24]]) #[[ATTR3]]
+  %call26 = call contract double @lgamma(double noundef %call24) #5
+  ; CHECK-NEXT:    [[CALL27:%.*]] = call contract float @__hipstdpar_lgamma_f32(float noundef [[CALL25]]) #[[ATTR3]]
+  %call27 = call contract float @lgammaf(float noundef %call25) #5
+  ret void
+}
+
+declare double @llvm.fabs.f64(double) #1
+
+declare float @llvm.fabs.f32(float) #1
+
+declare hidden double @remainder(double noundef, double noundef) local_unnamed_addr #2
+
+declare hidden float @remainderf(float noundef, float noundef) local_unnamed_addr #2
+
+declare hidden double @remquo(double noundef, double noundef, ptr noundef) local_unnamed_addr #3
+
+declare hidden float @remquof(float noundef, float noundef, ptr noundef) local_unnamed_addr #3
+
+declare double @llvm.fma.f64(double, double, double) #1
+
+declare float @llvm.fma.f32(float, float, float) #1
+
+declare hidden double @fdim(double noundef, double noundef) local_unnamed_addr #2
+
+declare hidden float @fdimf(float noundef, float noundef) local_unnamed_addr #2
+
+declare double @llvm.exp.f64(double) #1
+
+declare float @llvm.exp.f32(float) #1
+
+declare double @llvm.exp2.f64(double) #1
+
+declare float @llvm.exp2.f32(float) #1
+
+declare hidden double @expm1(double noundef) local_unnamed_addr #2
+
+declare hidden float @expm1f(float noundef) local_unnamed_addr #2
+
+declare double @llvm.log.f64(double) #1
+
+declare float @llvm.log.f32(float) #1
+
+declare double @llvm.log10.f64(double) #1
+
+declare float @llvm.log10.f32(float) #1
+
+declare double @llvm.log2.f64(double) #1
+
+declare float @llvm.log2.f32(float) #1
+
+declare hidden double @log1p(double noundef) local_unnamed_addr #2
+
+declare hidden float @log1pf(float noundef) local_unnamed_addr #2
+
+declare float @llvm.pow.f32(float, float) #1
+
+declare double @llvm.sqrt.f64(double) #1
+
+declare float @llvm.sqrt.f32(float) #1
+
+declare hidden double @cbrt(double noundef) local_unnamed_addr #2
+
+declare hidden float @cbrtf(float noundef) local_unnamed_addr #2
+
+declare hidden double @hypot(double noundef, double noundef) local_unnamed_addr #2
+
+declare hidden float @hypotf(float noundef, float noundef) local_unnamed_addr #2
+
+declare float @llvm.sin.f32(float) #1
+
+declare float @llvm.cos.f32(float) #1
+
+declare double @llvm.tan.f64(double) #1
+
+declare double @llvm.asin.f64(double) #1
+
+declare double @llvm.acos.f64(double) #1
+
+declare double @llvm.atan.f64(double) #1
+
+declare double @llvm.atan2.f64(double, double) #1
+
+declare double @llvm.sinh.f64(double) #1
+
+declare double @llvm.cosh.f64(double) #1
+
+declare double @llvm.tanh.f64(double) #1
+
+declare hidden double @asinh(double noundef) local_unnamed_addr #2
+
+declare hidden float @asinhf(float noundef) local_unnamed_addr #2
+
+declare hidden double @acosh(double noundef) local_unnamed_addr #2
+
+declare hidden float @acoshf(float noundef) local_unnamed_addr #2
+
+declare hidden double @atanh(double noundef) local_unnamed_addr #2
+
+declare hidden float @atanhf(float noundef) local_unnamed_addr #2
+
+declare hidden double @erf(double noundef) local_unnamed_addr #2
+
+declare hidden float @erff(float noundef) local_unnamed_addr #2
+
+declare hidden double @erfc(double noundef) local_unnamed_addr #2
+
+declare hidden float @erfcf(float noundef) local_unnamed_addr #2
+
+declare hidden double @tgamma(double noundef) local_unnamed_addr #2
+
+declare hidden float @tgammaf(float noundef) local_unnamed_addr #2
+
+declare hidden double @lgamma(double noundef) local_unnamed_addr #3
+
+declare hidden float @lgammaf(float noundef) local_unnamed_addr #3
+
+attributes #0 = { convergent mustprogress norecurse nounwind }
+attributes #1 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { convergent mustprogress nofree nounwind willreturn memory(none) }
+attributes #3 = { convergent nounwind }
+attributes #4 = { convergent nounwind willreturn memory(none) }
+attributes #5 = { convergent nounwind }

>From 46340578c9bc04b8ee9fd3681a30355527a47f03 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 10 Apr 2026 22:53:21 +0100
Subject: [PATCH 2/8] `gfx90a` MFMAs need `gfx90a-insts`.

---
 clang/include/clang/Basic/BuiltinsAMDGPU.td   | 14 ++++----
 .../builtins-amdgcn-mfma-gfx908-err.cl        | 33 +++++++++++++++++++
 2 files changed, 40 insertions(+), 7 deletions(-)
 create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-mfma-gfx908-err.cl

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.td b/clang/include/clang/Basic/BuiltinsAMDGPU.td
index 17654daf6a469..db0fbb4b048e2 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.td
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.td
@@ -603,13 +603,13 @@ def __builtin_amdgcn_mfma_f32_4x4x2bf16 : AMDGPUBuiltin<"_ExtVector<4, float>(_E
 def __builtin_amdgcn_mfma_f32_32x32x4bf16 : AMDGPUBuiltin<"_ExtVector<16, float>(_ExtVector<2, short>, _ExtVector<2, short>, _ExtVector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
 def __builtin_amdgcn_mfma_f32_16x16x8bf16 : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<2, short>, _ExtVector<2, short>, _ExtVector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
 
-def __builtin_amdgcn_mfma_f32_32x32x4bf16_1k : AMDGPUBuiltin<"_ExtVector<32, float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<32, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
-def __builtin_amdgcn_mfma_f32_16x16x4bf16_1k : AMDGPUBuiltin<"_ExtVector<16, float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
-def __builtin_amdgcn_mfma_f32_4x4x4bf16_1k : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
-def __builtin_amdgcn_mfma_f32_32x32x8bf16_1k : AMDGPUBuiltin<"_ExtVector<16, float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
-def __builtin_amdgcn_mfma_f32_16x16x16bf16_1k : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
-def __builtin_amdgcn_mfma_f64_16x16x4f64 : AMDGPUBuiltin<"_ExtVector<4, double>(double, double, _ExtVector<4, double>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
-def __builtin_amdgcn_mfma_f64_4x4x4f64 : AMDGPUBuiltin<"double(double, double, double, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
+def __builtin_amdgcn_mfma_f32_32x32x4bf16_1k : AMDGPUBuiltin<"_ExtVector<32, float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<32, float>, _Constant int, _Constant int, _Constant int)", [Const], "gfx90a-insts">;
+def __builtin_amdgcn_mfma_f32_16x16x4bf16_1k : AMDGPUBuiltin<"_ExtVector<16, float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "gfx90a-insts">;
+def __builtin_amdgcn_mfma_f32_4x4x4bf16_1k : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "gfx90a-insts">;
+def __builtin_amdgcn_mfma_f32_32x32x8bf16_1k : AMDGPUBuiltin<"_ExtVector<16, float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<16, float>, _Constant int, _Constant int, _Constant int)", [Const], "gfx90a-insts">;
+def __builtin_amdgcn_mfma_f32_16x16x16bf16_1k : AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, short>, _ExtVector<4, short>, _ExtVector<4, float>, _Constant int, _Constant int, _Constant int)", [Const], "gfx90a-insts">;
+def __builtin_amdgcn_mfma_f64_16x16x4f64 : AMDGPUBuiltin<"_ExtVector<4, double>(double, double, _ExtVector<4, double>, _Constant int, _Constant int, _Constant int)", [Const], "gfx90a-insts">;
+def __builtin_amdgcn_mfma_f64_4x4x4f64 : AMDGPUBuiltin<"double(double, double, double, _Constant int, _Constant int, _Constant int)", [Const], "gfx90a-insts">;
 
 def __builtin_amdgcn_mfma_i32_16x16x32_i8 : AMDGPUBuiltin<"_ExtVector<4, int>(int64_t, int64_t, _ExtVector<4, int>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
 def __builtin_amdgcn_mfma_i32_32x32x16_i8 : AMDGPUBuiltin<"_ExtVector<16, int>(int64_t, int64_t, _ExtVector<16, int>, _Constant int, _Constant int, _Constant int)", [Const], "mai-insts">;
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma-gfx908-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma-gfx908-err.cl
new file mode 100644
index 0000000000000..4e5baa51b08b8
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma-gfx908-err.cl
@@ -0,0 +1,33 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx908 \
+// RUN:   -verify -S -o - %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+
+typedef float  v4f   __attribute__((ext_vector_type(4)));
+typedef float  v16f  __attribute__((ext_vector_type(16)));
+typedef float  v32f  __attribute__((ext_vector_type(32)));
+typedef half   v4h   __attribute__((ext_vector_type(4)));
+typedef half   v16h  __attribute__((ext_vector_type(16)));
+typedef half   v32h  __attribute__((ext_vector_type(32)));
+typedef int    v4i   __attribute__((ext_vector_type(4)));
+typedef int    v16i  __attribute__((ext_vector_type(16)));
+typedef int    v32i  __attribute__((ext_vector_type(32)));
+typedef short  v2s   __attribute__((ext_vector_type(2)));
+typedef short  v4s   __attribute__((ext_vector_type(4)));
+typedef short  v16s  __attribute__((ext_vector_type(16)));
+typedef short  v32s  __attribute__((ext_vector_type(32)));
+typedef double v4d   __attribute__((ext_vector_type(4)));
+
+void test_mfma_f32_16x16x4bf16_1k(global v16f* out, global v4f* out1,
+                                  global v4d* out2, global double* out3, v4s a,
+                                  v4s b, v16f c, v4f e, double f, double g,
+                                  v4d h)
+{
+  *out = __builtin_amdgcn_mfma_f32_16x16x4bf16_1k(a, b, c, 0, 0, 0);   // expected-error{{'__builtin_amdgcn_mfma_f32_16x16x4bf16_1k' needs target feature gfx90a-insts}}
+  *out1 = __builtin_amdgcn_mfma_f32_4x4x4bf16_1k(a, b, e, 0, 0, 0);    // expected-error{{'__builtin_amdgcn_mfma_f32_4x4x4bf16_1k' needs target feature gfx90a-insts}}
+  *out = __builtin_amdgcn_mfma_f32_32x32x8bf16_1k(a, b, c, 0, 0, 0);   // expected-error{{'__builtin_amdgcn_mfma_f32_32x32x8bf16_1k' needs target feature gfx90a-insts}}
+  *out1 = __builtin_amdgcn_mfma_f32_16x16x16bf16_1k(a, b, e, 0, 0, 0); // expected-error{{'__builtin_amdgcn_mfma_f32_16x16x16bf16_1k' needs target feature gfx90a-insts}}
+  *out2 = __builtin_amdgcn_mfma_f64_16x16x4f64(f, g, h, 0, 0, 0);      // expected-error{{'__builtin_amdgcn_mfma_f64_16x16x4f64' needs target feature gfx90a-insts}}
+  *out3 = __builtin_amdgcn_mfma_f64_4x4x4f64(f, g, g, 0, 0, 0);        // expected-error{{'__builtin_amdgcn_mfma_f64_4x4x4f64' needs target feature gfx90a-insts}}
+}

>From c43b39883ae4f1a9eecc30af47cdb66ee1e63648 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 10 Apr 2026 23:44:48 +0100
Subject: [PATCH 3/8] Update test.

---
 .../CodeGenOpenCL/builtins-amdgcn-gfx12-err.cl     | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-err.cl
index 0c5a39c2c8520..c972b1611b6f7 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-err.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-err.cl
@@ -61,13 +61,13 @@ void builtin_test_unsupported(double a_double, float a_float,
   a_v4f = __builtin_amdgcn_mfma_f32_4x4x2bf16(a_v2s, a_v2s, a_v4f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_4x4x2bf16' needs target feature mai-insts}}
   a_v16f = __builtin_amdgcn_mfma_f32_32x32x4bf16(a_v2s, a_v2s, a_v16f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x4bf16' needs target feature mai-insts}}
   a_v4f = __builtin_amdgcn_mfma_f32_16x16x8bf16(a_v2s, a_v2s, a_v4f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x8bf16' needs target feature mai-insts}}
-  a_v32f = __builtin_amdgcn_mfma_f32_32x32x4bf16_1k(a_v4s, a_v4s, a_v32f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x4bf16_1k' needs target feature mai-insts}}
-  a_v16f = __builtin_amdgcn_mfma_f32_16x16x4bf16_1k(a_v4s, a_v4s, a_v16f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x4bf16_1k' needs target feature mai-insts}}
-  a_v4f = __builtin_amdgcn_mfma_f32_4x4x4bf16_1k(a_v4s, a_v4s, a_v4f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_4x4x4bf16_1k' needs target feature mai-insts}}
-  a_v16f = __builtin_amdgcn_mfma_f32_32x32x8bf16_1k(a_v4s, a_v4s, a_v16f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x8bf16_1k' needs target feature mai-insts}}
-  a_v4f = __builtin_amdgcn_mfma_f32_16x16x16bf16_1k(a_v4s, a_v4s, a_v4f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x16bf16_1k' needs target feature mai-insts}}
-  a_v4d = __builtin_amdgcn_mfma_f64_16x16x4f64(a_double, a_double, a_v4d, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f64_16x16x4f64' needs target feature mai-insts}}
-  a_double = __builtin_amdgcn_mfma_f64_4x4x4f64(a_double, a_double, a_double, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f64_4x4x4f64' needs target feature mai-insts}}
+  a_v32f = __builtin_amdgcn_mfma_f32_32x32x4bf16_1k(a_v4s, a_v4s, a_v32f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x4bf16_1k' needs target feature gfx90a-insts}}
+  a_v16f = __builtin_amdgcn_mfma_f32_16x16x4bf16_1k(a_v4s, a_v4s, a_v16f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x4bf16_1k' needs target feature gfx90a-insts}}
+  a_v4f = __builtin_amdgcn_mfma_f32_4x4x4bf16_1k(a_v4s, a_v4s, a_v4f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_4x4x4bf16_1k' needs target feature gfx90a-insts}}
+  a_v16f = __builtin_amdgcn_mfma_f32_32x32x8bf16_1k(a_v4s, a_v4s, a_v16f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_32x32x8bf16_1k' needs target feature gfx90a-insts}}
+  a_v4f = __builtin_amdgcn_mfma_f32_16x16x16bf16_1k(a_v4s, a_v4s, a_v4f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x16bf16_1k' needs target feature gfx90a-insts}}
+  a_v4d = __builtin_amdgcn_mfma_f64_16x16x4f64(a_double, a_double, a_v4d, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f64_16x16x4f64' needs target feature gfx90a-insts}}
+  a_double = __builtin_amdgcn_mfma_f64_4x4x4f64(a_double, a_double, a_double, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f64_4x4x4f64' needs target feature gfx90a-insts}}
   a_v4i = __builtin_amdgcn_mfma_i32_16x16x32_i8(a_long, a_long, a_v4i, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_i32_16x16x32_i8' needs target feature mai-insts}}
   a_v16i = __builtin_amdgcn_mfma_i32_32x32x16_i8(a_long, a_long, a_v16i, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_i32_32x32x16_i8' needs target feature mai-insts}}
   a_v4f = __builtin_amdgcn_mfma_f32_16x16x8_xf32(a_v2f, a_v2f, a_v4f, 0, 0, 0); // expected-error {{'__builtin_amdgcn_mfma_f32_16x16x8_xf32' needs target feature mai-insts}}

>From b3daf21e4b652c6a3a1b93b6cfca7f3fe74b9f6a Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Sat, 11 Apr 2026 15:41:59 +0100
Subject: [PATCH 4/8] SPIR-V Cannot handle visibility yet.

---
 clang/lib/Driver/ToolChains/AMDGPU.cpp | 3 ++-
 clang/lib/Driver/ToolChains/HIPAMD.cpp | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 03bd88f0d4f47..cd5d8937bbc07 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -844,7 +844,8 @@ void AMDGPUToolChain::addClangTargetOptions(
   // Default to "hidden" visibility, as object level linking will not be
   // supported for the foreseeable future.
   if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
-                         options::OPT_fvisibility_ms_compat)) {
+                         options::OPT_fvisibility_ms_compat) &&
+      !getEffectiveTriple().isSPIRV()) {
     CC1Args.push_back("-fvisibility=hidden");
     CC1Args.push_back("-fapply-global-visibility-to-externs");
   }
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp
index b08f610b21a16..765f4163033e4 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -261,7 +261,8 @@ void HIPAMDToolChain::addClangTargetOptions(
   // Default to "hidden" visibility, as object level linking will not be
   // supported for the foreseeable future.
   if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
-                         options::OPT_fvisibility_ms_compat)) {
+                         options::OPT_fvisibility_ms_compat) &&
+      !getEffectiveTriple().isSPIRV()) {
     CC1Args.append({"-fvisibility=hidden"});
     CC1Args.push_back("-fapply-global-visibility-to-externs");
   }

>From e3732c452f9a8ebb2ad8f0897598fda978be4b6a Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Mon, 13 Apr 2026 16:16:50 +0100
Subject: [PATCH 5/8] Add TODO.

---
 clang/lib/Driver/ToolChains/HIPAMD.cpp          | 1 +
 llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp
index 765f4163033e4..b4ff90c1d61f0 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -260,6 +260,7 @@ void HIPAMDToolChain::addClangTargetOptions(
 
   // Default to "hidden" visibility, as object level linking will not be
   // supported for the foreseeable future.
+  // TODO: remove the SPIR-V bypass once it can encode (hidden) visibility.
   if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
                          options::OPT_fvisibility_ms_compat) &&
       !getEffectiveTriple().isSPIRV()) {
diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
index a3b44ad6d31d5..aeffb4b7f63b6 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
@@ -683,6 +683,10 @@ bool SPIRVPrepareFunctions::runOnModule(Module &M) {
   }
 
   for (Function &F : M) {
+    if (F.hasAvailableExternallyLinkage()) {
+      F.setLinkage(GlobalValue::LinkageTypes::ExternalLinkage);
+      Changed = true;
+    }
     Changed |= substituteIntrinsicCalls(&F);
     Changed |= sortBlocks(F);
     Changed |= removeAggregateTypesFromCalls(&F);

>From a35e1f024972f41198a8f5b6fbb4c0b58787c94d Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Mon, 13 Apr 2026 17:37:22 +0100
Subject: [PATCH 6/8] Add missing TODO.

---
 clang/lib/Driver/ToolChains/AMDGPU.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index cd5d8937bbc07..b72d0e68f63f0 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -843,6 +843,7 @@ void AMDGPUToolChain::addClangTargetOptions(
     Action::OffloadKind DeviceOffloadingKind) const {
   // Default to "hidden" visibility, as object level linking will not be
   // supported for the foreseeable future.
+  // TODO: remove the SPIR-V bypass once it can encode (hidden) visibility.
   if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
                          options::OPT_fvisibility_ms_compat) &&
       !getEffectiveTriple().isSPIRV()) {

>From 37b60d640a6efc5fc9babec1f4bc822875e60ec0 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Mon, 13 Apr 2026 17:38:50 +0100
Subject: [PATCH 7/8] Remove unrelated change.

---
 llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
index aeffb4b7f63b6..a3b44ad6d31d5 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
@@ -683,10 +683,6 @@ bool SPIRVPrepareFunctions::runOnModule(Module &M) {
   }
 
   for (Function &F : M) {
-    if (F.hasAvailableExternallyLinkage()) {
-      F.setLinkage(GlobalValue::LinkageTypes::ExternalLinkage);
-      Changed = true;
-    }
     Changed |= substituteIntrinsicCalls(&F);
     Changed |= sortBlocks(F);
     Changed |= removeAggregateTypesFromCalls(&F);

>From fa7b2d91ff1152bb599b8ee0b9bc69d1daa55596 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Tue, 14 Apr 2026 15:24:59 +0100
Subject: [PATCH 8/8] Programmatically fill in AMDGCN flavoured SPIR-V feature
 map.

---
 .../CodeGen/amdgpu-builtin-is-invocable.c     |  2 +-
 .../CodeGen/amdgpu-builtin-processor-is.c     |  2 +-
 .../CodeGenCXX/dynamic-cast-address-space.cpp |  4 +-
 llvm/lib/TargetParser/TargetParser.cpp        | 74 +++----------------
 4 files changed, 15 insertions(+), 67 deletions(-)

diff --git a/clang/test/CodeGen/amdgpu-builtin-is-invocable.c b/clang/test/CodeGen/amdgpu-builtin-is-invocable.c
index 08bcc390fe2ba..5c6d7b25f3bf1 100644
--- a/clang/test/CodeGen/amdgpu-builtin-is-invocable.c
+++ b/clang/test/CodeGen/amdgpu-builtin-is-invocable.c
@@ -54,7 +54,7 @@ void foo() {
 // AMDGCN-GFX1010: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1010" }
 // AMDGCN-GFX1010: attributes #[[ATTR1:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) }
 //.
-// AMDGCNSPIRV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64" }
+// AMDGCNSPIRV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+mcast-load-insts,+permlane16-swap,+permlane32-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+s-wakeup-barrier-inst,+sad-insts,+setprio-inc-wg-inst,+swmmac-gfx1200-insts,+swmmac-gfx1250-insts,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64,+wmma-128b-insts,+wmma-256b-insts,+xf32-insts" }
 // AMDGCNSPIRV: attributes #[[ATTR1:[0-9]+]] = { nounwind }
 // AMDGCNSPIRV: attributes #[[ATTR2:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) }
 //.
diff --git a/clang/test/CodeGen/amdgpu-builtin-processor-is.c b/clang/test/CodeGen/amdgpu-builtin-processor-is.c
index 5d5e4a9df3995..b2f04f11ecb54 100644
--- a/clang/test/CodeGen/amdgpu-builtin-processor-is.c
+++ b/clang/test/CodeGen/amdgpu-builtin-processor-is.c
@@ -63,7 +63,7 @@ void foo() {
 //.
 // AMDGCN-GFX1010: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1010" }
 //.
-// AMDGCNSPIRV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64" }
+// AMDGCNSPIRV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+mcast-load-insts,+permlane16-swap,+permlane32-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+s-wakeup-barrier-inst,+sad-insts,+setprio-inc-wg-inst,+swmmac-gfx1200-insts,+swmmac-gfx1250-insts,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64,+wmma-128b-insts,+wmma-256b-insts,+xf32-insts" }
 // AMDGCNSPIRV: attributes #[[ATTR1:[0-9]+]] = { nounwind }
 // AMDGCNSPIRV: attributes #[[ATTR2:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) }
 //.
diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
index 405d233571b3c..355dde6f0cfda 100644
--- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
+++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
@@ -107,9 +107,9 @@ const B& f(A *a) {
 // CHECK: attributes #[[ATTR3]] = { nounwind }
 // CHECK: attributes #[[ATTR4]] = { noreturn }
 //.
-// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64" }
+// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+mcast-load-insts,+permlane16-swap,+permlane32-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+s-wakeup-barrier-inst,+sad-insts,+setprio-inc-wg-inst,+swmmac-gfx1200-insts,+swmmac-gfx1250-insts,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64,+wmma-128b-insts,+wmma-256b-insts,+xf32-insts" }
 // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR1:[0-9]+]] = { nounwind willreturn memory(read) }
-// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64" }
+// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+mcast-load-insts,+permlane16-swap,+permlane32-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+s-wakeup-barrier-inst,+sad-insts,+setprio-inc-wg-inst,+swmmac-gfx1200-insts,+swmmac-gfx1250-insts,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64,+wmma-128b-insts,+wmma-256b-insts,+xf32-insts" }
 // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR3]] = { nounwind }
 // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR4]] = { noreturn }
 //.
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index b697f6afa5408..be829006d051a 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -627,69 +627,17 @@ AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
                              StringMap<bool> &Features) {
   // XXX - What does the member GPU mean if device name string passed here?
   if (T.isSPIRV() && T.getOS() == Triple::OSType::AMDHSA) {
-    // AMDGCN SPIRV must support the union of all AMDGCN features. This list
-    // should be kept in sorted order and updated whenever new features are
-    // added.
-    Features["16-bit-insts"] = true;
-    Features["ashr-pk-insts"] = true;
-    Features["atomic-buffer-pk-add-bf16-inst"] = true;
-    Features["atomic-buffer-global-pk-add-f16-insts"] = true;
-    Features["atomic-ds-pk-add-16-insts"] = true;
-    Features["atomic-fadd-rtn-insts"] = true;
-    Features["atomic-flat-pk-add-16-insts"] = true;
-    Features["atomic-global-pk-add-bf16-inst"] = true;
-    Features["bf16-trans-insts"] = true;
-    Features["bf16-cvt-insts"] = true;
-    Features["bf8-cvt-scale-insts"] = true;
-    Features["bitop3-insts"] = true;
-    Features["ci-insts"] = true;
-    Features["dl-insts"] = true;
-    Features["dot1-insts"] = true;
-    Features["dot2-insts"] = true;
-    Features["dot3-insts"] = true;
-    Features["dot4-insts"] = true;
-    Features["dot5-insts"] = true;
-    Features["dot6-insts"] = true;
-    Features["dot7-insts"] = true;
-    Features["dot8-insts"] = true;
-    Features["dot9-insts"] = true;
-    Features["dot10-insts"] = true;
-    Features["dot11-insts"] = true;
-    Features["dot12-insts"] = true;
-    Features["dot13-insts"] = true;
-    Features["dpp"] = true;
-    Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true;
-    Features["f32-to-f16bf16-cvt-sr-insts"] = true;
-    Features["fp4-cvt-scale-insts"] = true;
-    Features["fp6bf6-cvt-scale-insts"] = true;
-    Features["fp8e5m3-insts"] = true;
-    Features["fp8-conversion-insts"] = true;
-    Features["fp8-cvt-scale-insts"] = true;
-    Features["fp8-insts"] = true;
-    Features["gfx8-insts"] = true;
-    Features["gfx9-insts"] = true;
-    Features["gfx90a-insts"] = true;
-    Features["gfx940-insts"] = true;
-    Features["gfx950-insts"] = true;
-    Features["gfx10-insts"] = true;
-    Features["gfx10-3-insts"] = true;
-    Features["gfx11-insts"] = true;
-    Features["gfx12-insts"] = true;
-    Features["gfx1250-insts"] = true;
-    Features["gws"] = true;
-    Features["image-insts"] = true;
-    Features["mai-insts"] = true;
-    Features["permlane16-swap"] = true;
-    Features["permlane32-swap"] = true;
-    Features["prng-inst"] = true;
-    Features["setprio-inc-wg-inst"] = true;
-    Features["s-memrealtime"] = true;
-    Features["s-memtime-inst"] = true;
-    Features["tanh-insts"] = true;
-    Features["tensor-cvt-lut-insts"] = true;
-    Features["transpose-load-f4f6-insts"] = true;
-    Features["vmem-pref-insts"] = true;
-    Features["vmem-to-lds-load-insts"] = true;
+    // AMDGCN SPIRV must support the union of all AMDGCN features.
+    SmallVector<StringRef> GPUs;
+    fillValidArchListAMDGCN(GPUs);
+
+    static const Triple AMDGCN("amdgcn-amd-amdhsa");
+    StringMap<bool> Tmp;
+    for (auto &&GPU : GPUs) {
+      fillAMDGCNFeatureMap(GPU, AMDGCN, Tmp);
+      for (auto &&[F, B] : Tmp)
+        Features[F] = B;
+    }
     Features["wavefrontsize32"] = true;
     Features["wavefrontsize64"] = true;
   } else if (T.isAMDGCN()) {



More information about the cfe-commits mailing list