[clang] [Clang][SYCL] Add AOT compilation support for Intel GPUs in clang-sycl-linker (PR #133194)
Justin Cai via cfe-commits
cfe-commits at lists.llvm.org
Thu Apr 3 11:37:01 PDT 2025
https://github.com/jzc updated https://github.com/llvm/llvm-project/pull/133194
>From 8f22fbe1f6272beec61e62bfae72832d75b4f25b Mon Sep 17 00:00:00 2001
From: "Cai, Justin" <justin.cai at intel.com>
Date: Fri, 14 Feb 2025 21:16:27 +0000
Subject: [PATCH 1/5] [SYCL] Add support AOT compilation support for Intel GPUs
in clang-sycl-linker
---
clang/include/clang/Basic/SYCL.h | 131 ++++++++++
clang/lib/Basic/CMakeLists.txt | 1 +
clang/lib/Basic/SYCL.cpp | 226 ++++++++++++++++++
clang/test/Driver/clang-sycl-linker-test.cpp | 36 +++
.../clang-sycl-linker/ClangSYCLLinker.cpp | 112 ++++++++-
clang/tools/clang-sycl-linker/SYCLLinkOpts.td | 8 +
6 files changed, 506 insertions(+), 8 deletions(-)
create mode 100644 clang/include/clang/Basic/SYCL.h
create mode 100644 clang/lib/Basic/SYCL.cpp
diff --git a/clang/include/clang/Basic/SYCL.h b/clang/include/clang/Basic/SYCL.h
new file mode 100644
index 0000000000000..c7cad37639b91
--- /dev/null
+++ b/clang/include/clang/Basic/SYCL.h
@@ -0,0 +1,131 @@
+//===--- SYCL.h -------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_BASIC_SYCL_H
+#define LLVM_CLANG_BASIC_SYCL_H
+
+#include "clang/Basic/Cuda.h"
+
+namespace llvm {
+class StringRef;
+template <unsigned InternalLen> class SmallString;
+} // namespace llvm
+
+namespace clang {
+// List of architectures (Intel CPUs and Intel GPUs)
+// that support SYCL offloading.
+enum class SYCLSupportedIntelArchs {
+ // Intel CPUs
+ UNKNOWN,
+ SKYLAKEAVX512,
+ COREAVX2,
+ COREI7AVX,
+ COREI7,
+ WESTMERE,
+ SANDYBRIDGE,
+ IVYBRIDGE,
+ BROADWELL,
+ COFFEELAKE,
+ ALDERLAKE,
+ SKYLAKE,
+ SKX,
+ CASCADELAKE,
+ ICELAKECLIENT,
+ ICELAKESERVER,
+ SAPPHIRERAPIDS,
+ GRANITERAPIDS,
+ // Intel GPUs
+ BDW,
+ SKL,
+ KBL,
+ CFL,
+ APL,
+ BXT,
+ GLK,
+ WHL,
+ AML,
+ CML,
+ ICLLP,
+ ICL,
+ EHL,
+ JSL,
+ TGLLP,
+ TGL,
+ RKL,
+ ADL_S,
+ RPL_S,
+ ADL_P,
+ ADL_N,
+ DG1,
+ ACM_G10,
+ DG2_G10,
+ ACM_G11,
+ DG2_G11,
+ ACM_G12,
+ DG2_G12,
+ PVC,
+ PVC_VG,
+ MTL_U,
+ MTL_S,
+ ARL_U,
+ ARL_S,
+ MTL_H,
+ ARL_H,
+ BMG_G21,
+ LNL_M,
+};
+
+// Check if the given Arch value is a Generic AMD GPU.
+// Currently GFX*_GENERIC AMD GPUs do not support SYCL offloading.
+// This list is used to filter out GFX*_GENERIC AMD GPUs in
+// `IsSYCLSupportedAMDGPUArch`.
+static inline bool IsAMDGenericGPUArch(OffloadArch Arch) {
+ return Arch == OffloadArch::GFX9_GENERIC ||
+ Arch == OffloadArch::GFX10_1_GENERIC ||
+ Arch == OffloadArch::GFX10_3_GENERIC ||
+ Arch == OffloadArch::GFX11_GENERIC ||
+ Arch == OffloadArch::GFX12_GENERIC;
+}
+
+// Check if the given Arch value is a valid SYCL supported AMD GPU.
+static inline bool IsSYCLSupportedAMDGPUArch(OffloadArch Arch) {
+ return Arch >= OffloadArch::GFX700 && Arch < OffloadArch::AMDGCNSPIRV &&
+ !IsAMDGenericGPUArch(Arch);
+}
+
+// Check if the given Arch value is a valid SYCL supported NVidia GPU.
+static inline bool IsSYCLSupportedNVidiaGPUArch(OffloadArch Arch) {
+ return Arch >= OffloadArch::SM_50 && Arch <= OffloadArch::SM_90a;
+}
+
+// Check if the given Arch value is a valid SYCL supported Intel CPU.
+static inline bool IsSYCLSupportedIntelCPUArch(SYCLSupportedIntelArchs Arch) {
+ return Arch >= SYCLSupportedIntelArchs::SKYLAKEAVX512 &&
+ Arch <= SYCLSupportedIntelArchs::GRANITERAPIDS;
+}
+
+// Check if the given Arch value is a valid SYCL supported Intel GPU.
+static inline bool IsSYCLSupportedIntelGPUArch(SYCLSupportedIntelArchs Arch) {
+ return Arch >= SYCLSupportedIntelArchs::BDW &&
+ Arch <= SYCLSupportedIntelArchs::LNL_M;
+}
+
+// Check if the user provided value for --offload-arch is a valid
+// SYCL supported Intel AOT target.
+SYCLSupportedIntelArchs
+StringToOffloadArchSYCL(llvm::StringRef ArchNameAsString);
+
+// This is a mapping between the user provided --offload-arch value for Intel
+// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU
+// AOT compiler).
+llvm::StringRef mapIntelGPUArchName(llvm::StringRef ArchName);
+llvm::SmallString<64> getGenDeviceMacro(llvm::StringRef DeviceName);
+
+} // namespace clang
+
+#endif // LLVM_CLANG_BASIC_SYCL_H
diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt
index 331dfbb3f4b67..be6d915e01b0a 100644
--- a/clang/lib/Basic/CMakeLists.txt
+++ b/clang/lib/Basic/CMakeLists.txt
@@ -90,6 +90,7 @@ add_clang_library(clangBasic
SourceMgrAdapter.cpp
Stack.cpp
StackExhaustionHandler.cpp
+ SYCL.cpp
TargetID.cpp
TargetInfo.cpp
Targets.cpp
diff --git a/clang/lib/Basic/SYCL.cpp b/clang/lib/Basic/SYCL.cpp
new file mode 100644
index 0000000000000..9ac5470cdbe5a
--- /dev/null
+++ b/clang/lib/Basic/SYCL.cpp
@@ -0,0 +1,226 @@
+#include "clang/Basic/SYCL.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+
+using namespace llvm;
+
+namespace clang {
+
+// Struct that relates an AOT target value with
+// Intel CPUs and Intel GPUs.
+struct StringToOffloadArchSYCLMap {
+ const char *ArchName;
+ SYCLSupportedIntelArchs IntelArch;
+};
+
+// Mapping of supported SYCL offloading architectures.
+static const StringToOffloadArchSYCLMap StringToArchNamesMap[] = {
+ // Intel CPU mapping.
+ {"skylake-avx512", SYCLSupportedIntelArchs::SKYLAKEAVX512},
+ {"core-avx2", SYCLSupportedIntelArchs::COREAVX2},
+ {"corei7-avx", SYCLSupportedIntelArchs::COREI7AVX},
+ {"corei7", SYCLSupportedIntelArchs::COREI7},
+ {"westmere", SYCLSupportedIntelArchs::WESTMERE},
+ {"sandybridge", SYCLSupportedIntelArchs::SANDYBRIDGE},
+ {"ivybridge", SYCLSupportedIntelArchs::IVYBRIDGE},
+ {"broadwell", SYCLSupportedIntelArchs::BROADWELL},
+ {"coffeelake", SYCLSupportedIntelArchs::COFFEELAKE},
+ {"alderlake", SYCLSupportedIntelArchs::ALDERLAKE},
+ {"skylake", SYCLSupportedIntelArchs::SKYLAKE},
+ {"skx", SYCLSupportedIntelArchs::SKX},
+ {"cascadelake", SYCLSupportedIntelArchs::CASCADELAKE},
+ {"icelake-client", SYCLSupportedIntelArchs::ICELAKECLIENT},
+ {"icelake-server", SYCLSupportedIntelArchs::ICELAKESERVER},
+ {"sapphirerapids", SYCLSupportedIntelArchs::SAPPHIRERAPIDS},
+ {"graniterapids", SYCLSupportedIntelArchs::GRANITERAPIDS},
+ // Intel GPU mapping.
+ {"bdw", SYCLSupportedIntelArchs::BDW},
+ {"skl", SYCLSupportedIntelArchs::SKL},
+ {"kbl", SYCLSupportedIntelArchs::KBL},
+ {"cfl", SYCLSupportedIntelArchs::CFL},
+ {"apl", SYCLSupportedIntelArchs::APL},
+ {"bxt", SYCLSupportedIntelArchs::BXT},
+ {"glk", SYCLSupportedIntelArchs::GLK},
+ {"whl", SYCLSupportedIntelArchs::WHL},
+ {"aml", SYCLSupportedIntelArchs::AML},
+ {"cml", SYCLSupportedIntelArchs::CML},
+ {"icllp", SYCLSupportedIntelArchs::ICLLP},
+ {"icl", SYCLSupportedIntelArchs::ICL},
+ {"ehl", SYCLSupportedIntelArchs::EHL},
+ {"jsl", SYCLSupportedIntelArchs::JSL},
+ {"tgllp", SYCLSupportedIntelArchs::TGLLP},
+ {"tgl", SYCLSupportedIntelArchs::TGL},
+ {"rkl", SYCLSupportedIntelArchs::RKL},
+ {"adl_s", SYCLSupportedIntelArchs::ADL_S},
+ {"rpl_s", SYCLSupportedIntelArchs::RPL_S},
+ {"adl_p", SYCLSupportedIntelArchs::ADL_P},
+ {"adl_n", SYCLSupportedIntelArchs::ADL_N},
+ {"dg1", SYCLSupportedIntelArchs::DG1},
+ {"acm_g10", SYCLSupportedIntelArchs::ACM_G10},
+ {"dg2_g10", SYCLSupportedIntelArchs::DG2_G10},
+ {"acm_g11", SYCLSupportedIntelArchs::ACM_G11},
+ {"dg2_g10", SYCLSupportedIntelArchs::DG2_G10},
+ {"dg2_g11", SYCLSupportedIntelArchs::DG2_G11},
+ {"acm_g12", SYCLSupportedIntelArchs::ACM_G12},
+ {"dg2_g12", SYCLSupportedIntelArchs::DG2_G12},
+ {"pvc", SYCLSupportedIntelArchs::PVC},
+ {"pvc_vg", SYCLSupportedIntelArchs::PVC_VG},
+ {"mtl_u", SYCLSupportedIntelArchs::MTL_U},
+ {"mtl_s", SYCLSupportedIntelArchs::MTL_S},
+ {"arl_u", SYCLSupportedIntelArchs::ARL_U},
+ {"arl_s", SYCLSupportedIntelArchs::ARL_S},
+ {"mtl_h", SYCLSupportedIntelArchs::MTL_H},
+ {"arl_h", SYCLSupportedIntelArchs::ARL_H},
+ {"bmg_g21", SYCLSupportedIntelArchs::BMG_G21},
+ {"lnl_m", SYCLSupportedIntelArchs::LNL_M}};
+
+// Check if the user provided value for --offload-arch is a valid
+// SYCL supported Intel AOT target.
+SYCLSupportedIntelArchs StringToOffloadArchSYCL(StringRef ArchNameAsString) {
+ auto result =
+ llvm::find_if(StringToArchNamesMap,
+ [ArchNameAsString](const StringToOffloadArchSYCLMap &map) {
+ return ArchNameAsString == map.ArchName;
+ });
+ if (result == std::end(StringToArchNamesMap))
+ return SYCLSupportedIntelArchs::UNKNOWN;
+ return result->IntelArch;
+}
+
+// This is a mapping between the user provided --offload-arch value for Intel
+// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU
+// AOT compiler).
+StringRef mapIntelGPUArchName(StringRef ArchName) {
+ StringRef Arch;
+ Arch = llvm::StringSwitch<StringRef>(ArchName)
+ .Case("bdw", "bdw")
+ .Case("skl", "skl")
+ .Case("kbl", "kbl")
+ .Case("cfl", "cfl")
+ .Cases("apl", "bxt", "apl")
+ .Case("glk", "glk")
+ .Case("whl", "whl")
+ .Case("aml", "aml")
+ .Case("cml", "cml")
+ .Cases("icllp", "icl", "icllp")
+ .Cases("ehl", "jsl", "ehl")
+ .Cases("tgllp", "tgl", "tgllp")
+ .Case("rkl", "rkl")
+ .Cases("adl_s", "rpl_s", "adl_s")
+ .Case("adl_p", "adl_p")
+ .Case("adl_n", "adl_n")
+ .Case("dg1", "dg1")
+ .Cases("acm_g10", "dg2_g10", "acm_g10")
+ .Cases("acm_g11", "dg2_g11", "acm_g11")
+ .Cases("acm_g12", "dg2_g12", "acm_g12")
+ .Case("pvc", "pvc")
+ .Case("pvc_vg", "pvc_vg")
+ .Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "mtl_u")
+ .Case("mtl_h", "mtl_h")
+ .Case("arl_h", "arl_h")
+ .Case("bmg_g21", "bmg_g21")
+ .Case("lnl_m", "lnl_m")
+ .Default("");
+ return Arch;
+}
+
+SmallString<64> getGenDeviceMacro(StringRef DeviceName) {
+ SmallString<64> Macro;
+ StringRef Ext = llvm::StringSwitch<StringRef>(DeviceName)
+ .Case("bdw", "INTEL_GPU_BDW")
+ .Case("skl", "INTEL_GPU_SKL")
+ .Case("kbl", "INTEL_GPU_KBL")
+ .Case("cfl", "INTEL_GPU_CFL")
+ .Case("apl", "INTEL_GPU_APL")
+ .Case("glk", "INTEL_GPU_GLK")
+ .Case("whl", "INTEL_GPU_WHL")
+ .Case("aml", "INTEL_GPU_AML")
+ .Case("cml", "INTEL_GPU_CML")
+ .Case("icllp", "INTEL_GPU_ICLLP")
+ .Case("ehl", "INTEL_GPU_EHL")
+ .Case("tgllp", "INTEL_GPU_TGLLP")
+ .Case("rkl", "INTEL_GPU_RKL")
+ .Case("adl_s", "INTEL_GPU_ADL_S")
+ .Case("adl_p", "INTEL_GPU_ADL_P")
+ .Case("adl_n", "INTEL_GPU_ADL_N")
+ .Case("dg1", "INTEL_GPU_DG1")
+ .Case("acm_g10", "INTEL_GPU_ACM_G10")
+ .Case("acm_g11", "INTEL_GPU_ACM_G11")
+ .Case("acm_g12", "INTEL_GPU_ACM_G12")
+ .Case("pvc", "INTEL_GPU_PVC")
+ .Case("pvc_vg", "INTEL_GPU_PVC_VG")
+ .Case("mtl_u", "INTEL_GPU_MTL_U")
+ .Case("mtl_h", "INTEL_GPU_MTL_H")
+ .Case("arl_h", "INTEL_GPU_ARL_H")
+ .Case("bmg_g21", "INTEL_GPU_BMG_G21")
+ .Case("lnl_m", "INTEL_GPU_LNL_M")
+ .Case("ptl_h", "INTEL_GPU_PTL_H")
+ .Case("ptl_u", "INTEL_GPU_PTL_U")
+ .Case("sm_50", "NVIDIA_GPU_SM_50")
+ .Case("sm_52", "NVIDIA_GPU_SM_52")
+ .Case("sm_53", "NVIDIA_GPU_SM_53")
+ .Case("sm_60", "NVIDIA_GPU_SM_60")
+ .Case("sm_61", "NVIDIA_GPU_SM_61")
+ .Case("sm_62", "NVIDIA_GPU_SM_62")
+ .Case("sm_70", "NVIDIA_GPU_SM_70")
+ .Case("sm_72", "NVIDIA_GPU_SM_72")
+ .Case("sm_75", "NVIDIA_GPU_SM_75")
+ .Case("sm_80", "NVIDIA_GPU_SM_80")
+ .Case("sm_86", "NVIDIA_GPU_SM_86")
+ .Case("sm_87", "NVIDIA_GPU_SM_87")
+ .Case("sm_89", "NVIDIA_GPU_SM_89")
+ .Case("sm_90", "NVIDIA_GPU_SM_90")
+ .Case("sm_90a", "NVIDIA_GPU_SM_90A")
+ .Case("gfx700", "AMD_GPU_GFX700")
+ .Case("gfx701", "AMD_GPU_GFX701")
+ .Case("gfx702", "AMD_GPU_GFX702")
+ .Case("gfx703", "AMD_GPU_GFX703")
+ .Case("gfx704", "AMD_GPU_GFX704")
+ .Case("gfx705", "AMD_GPU_GFX705")
+ .Case("gfx801", "AMD_GPU_GFX801")
+ .Case("gfx802", "AMD_GPU_GFX802")
+ .Case("gfx803", "AMD_GPU_GFX803")
+ .Case("gfx805", "AMD_GPU_GFX805")
+ .Case("gfx810", "AMD_GPU_GFX810")
+ .Case("gfx900", "AMD_GPU_GFX900")
+ .Case("gfx902", "AMD_GPU_GFX902")
+ .Case("gfx904", "AMD_GPU_GFX904")
+ .Case("gfx906", "AMD_GPU_GFX906")
+ .Case("gfx908", "AMD_GPU_GFX908")
+ .Case("gfx909", "AMD_GPU_GFX909")
+ .Case("gfx90a", "AMD_GPU_GFX90A")
+ .Case("gfx90c", "AMD_GPU_GFX90C")
+ .Case("gfx940", "AMD_GPU_GFX940")
+ .Case("gfx941", "AMD_GPU_GFX941")
+ .Case("gfx942", "AMD_GPU_GFX942")
+ .Case("gfx1010", "AMD_GPU_GFX1010")
+ .Case("gfx1011", "AMD_GPU_GFX1011")
+ .Case("gfx1012", "AMD_GPU_GFX1012")
+ .Case("gfx1013", "AMD_GPU_GFX1013")
+ .Case("gfx1030", "AMD_GPU_GFX1030")
+ .Case("gfx1031", "AMD_GPU_GFX1031")
+ .Case("gfx1032", "AMD_GPU_GFX1032")
+ .Case("gfx1033", "AMD_GPU_GFX1033")
+ .Case("gfx1034", "AMD_GPU_GFX1034")
+ .Case("gfx1035", "AMD_GPU_GFX1035")
+ .Case("gfx1036", "AMD_GPU_GFX1036")
+ .Case("gfx1100", "AMD_GPU_GFX1100")
+ .Case("gfx1101", "AMD_GPU_GFX1101")
+ .Case("gfx1102", "AMD_GPU_GFX1102")
+ .Case("gfx1103", "AMD_GPU_GFX1103")
+ .Case("gfx1150", "AMD_GPU_GFX1150")
+ .Case("gfx1151", "AMD_GPU_GFX1151")
+ .Case("gfx1200", "AMD_GPU_GFX1200")
+ .Case("gfx1201", "AMD_GPU_GFX1201")
+ .Default("");
+ if (!Ext.empty()) {
+ Macro = "__SYCL_TARGET_";
+ Macro += Ext;
+ Macro += "__";
+ }
+ return Macro;
+}
+
+} // namespace clang
diff --git a/clang/test/Driver/clang-sycl-linker-test.cpp b/clang/test/Driver/clang-sycl-linker-test.cpp
index f358900b4fbd8..07850dc41b4fc 100644
--- a/clang/test/Driver/clang-sycl-linker-test.cpp
+++ b/clang/test/Driver/clang-sycl-linker-test.cpp
@@ -46,3 +46,39 @@
// RUN: clang-sycl-linker --dry-run -triple spirv64 %t_1.bc %t_2.bc -o a.spv 2>&1 \
// RUN: | FileCheck %s --check-prefix=LLVMOPTSLIN
// LLVMOPTSLIN: -spirv-debug-info-version=nonsemantic-shader-200 -spirv-allow-unknown-intrinsics=llvm.genx. -spirv-ext=
+//
+// Test AOT compilation for an Intel GPU.
+// RUN: clang-sycl-linker --dry-run -arch pvc %t_1.bc %t_2.bc -o a.out 2>&1 \
+// RUN: | FileCheck %s --check-prefix=AOT-INTEL-GPU
+// AOT-INTEL-GPU: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings
+// AOT-INTEL-GPU-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc
+// AOT-INTEL-GPU-NEXT: "{{.*}}ocloc{{.*}}" {{.*}}-device pvc {{.*}}-output a.out -file [[SPIRVTRANSLATIONOUT]]
+//
+// Test AOT compilation for an Intel GPU with additional options.
+// RUN: clang-sycl-linker --dry-run -arch pvc %t_1.bc %t_2.bc -o a.out 2>&1 \
+// RUN: --ocloc-options="-a -b" \
+// RUN: | FileCheck %s --check-prefix=AOT-INTEL-GPU-2
+// AOT-INTEL-GPU-2: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings
+// AOT-INTEL-GPU-2-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc
+// AOT-INTEL-GPU-2-NEXT: "{{.*}}ocloc{{.*}}" {{.*}}-device pvc -a -b {{.*}}-output a.out -file [[SPIRVTRANSLATIONOUT]]
+//
+// Test AOT compilation for an Intel CPU.
+// RUN: clang-sycl-linker --dry-run -arch corei7 %t_1.bc %t_2.bc -o a.out 2>&1 \
+// RUN: | FileCheck %s --check-prefix=AOT-INTEL-CPU
+// AOT-INTEL-CPU: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings
+// AOT-INTEL-CPU-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc
+// AOT-INTEL-CPU-NEXT: "{{.*}}opencl-aot{{.*}}" {{.*}}--device=cpu {{.*}}-o a.out [[SPIRVTRANSLATIONOUT]]
+//
+// Test AOT compilation for an Intel CPU with additional options.
+// RUN: clang-sycl-linker --dry-run -arch corei7 %t_1.bc %t_2.bc -o a.out 2>&1 \
+// RUN: --opencl-aot-options="-a -b" \
+// RUN: | FileCheck %s --check-prefix=AOT-INTEL-CPU-2
+// AOT-INTEL-CPU-2: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings
+// AOT-INTEL-CPU-2-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc
+// AOT-INTEL-CPU-2-NEXT: "{{.*}}opencl-aot{{.*}}" {{.*}}--device=cpu -a -b {{.*}}-o a.out [[SPIRVTRANSLATIONOUT]]
+//
+// Check that the output file must be specified.
+// RUN: not clang-sycl-linker --dry-run %t_1.bc %t_2.bc 2>& 1 \
+// RUN: | FileCheck %s --check-prefix=NOOUTPUT
+// NOOUTPUT: Output file is not specified
+//
diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
index 2bcb3757d49d0..1798907c1f3e0 100644
--- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
+++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
@@ -14,6 +14,7 @@
// target-specific device code.
//===---------------------------------------------------------------------===//
+#include "clang/Basic/SYCL.h"
#include "clang/Basic/Version.h"
#include "llvm/ADT/StringExtras.h"
@@ -50,6 +51,7 @@
using namespace llvm;
using namespace llvm::opt;
using namespace llvm::object;
+using namespace clang;
/// Save intermediary results.
static bool SaveTemps = false;
@@ -66,6 +68,8 @@ static StringRef OutputFile;
/// Directory to dump SPIR-V IR if requested by user.
static SmallString<128> SPIRVDumpDir;
+static bool IsAOTCompileNeeded = false;
+
static void printVersion(raw_ostream &OS) {
OS << clang::getClangToolFullVersion("clang-sycl-linker") << '\n';
}
@@ -392,7 +396,15 @@ static Expected<StringRef> runLLVMToSPIRVTranslation(StringRef File,
LLVMToSPIRVOptions = A->getValue();
LLVMToSPIRVOptions.split(CmdArgs, " ", /* MaxSplit = */ -1,
/* KeepEmpty = */ false);
- CmdArgs.append({"-o", OutputFile});
+
+ Expected<StringRef> OutFileOrErr =
+ IsAOTCompileNeeded
+ ? createTempFile(Args, sys::path::filename(OutputFile), "spv")
+ : OutputFile;
+ if (!OutFileOrErr)
+ return OutFileOrErr.takeError();
+
+ CmdArgs.append({"-o", *OutFileOrErr});
CmdArgs.push_back(File);
if (Error Err = executeCommands(*LLVMToSPIRVProg, CmdArgs))
return std::move(Err);
@@ -406,7 +418,7 @@ static Expected<StringRef> runLLVMToSPIRVTranslation(StringRef File,
formatv("failed to create dump directory. path: {0}, error_code: {1}",
SPIRVDumpDir, EC.value()));
- StringRef Path = OutputFile;
+ StringRef Path = *OutFileOrErr;
StringRef Filename = llvm::sys::path::filename(Path);
SmallString<128> CopyPath = SPIRVDumpDir;
CopyPath.append(Filename);
@@ -419,7 +431,83 @@ static Expected<StringRef> runLLVMToSPIRVTranslation(StringRef File,
Path, CopyPath, EC.value()));
}
- return OutputFile;
+ return *OutFileOrErr;
+}
+
+/// Run AOT compilation for Intel CPU.
+/// Calls opencl-aot tool to generate device code for Intel CPU backend.
+/// 'InputFile' is the input SPIR-V file.
+/// 'Args' encompasses all arguments required for linking and wrapping device
+/// code and will be parsed to generate options required to be passed into the
+/// SYCL AOT compilation step.
+static Error runAOTCompileIntelCPU(StringRef InputFile, const ArgList &Args) {
+ SmallVector<StringRef, 8> CmdArgs;
+ Expected<std::string> OpenCLAOTPath =
+ findProgram(Args, "opencl-aot", {getMainExecutable("opencl-aot")});
+ if (!OpenCLAOTPath)
+ return OpenCLAOTPath.takeError();
+
+ CmdArgs.push_back(*OpenCLAOTPath);
+ CmdArgs.push_back("--device=cpu");
+ StringRef ExtraArgs = Args.getLastArgValue(OPT_opencl_aot_options_EQ);
+ ExtraArgs.split(CmdArgs, " ", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
+ CmdArgs.push_back("-o");
+ CmdArgs.push_back(OutputFile);
+ CmdArgs.push_back(InputFile);
+ if (Error Err = executeCommands(*OpenCLAOTPath, CmdArgs))
+ return std::move(Err);
+ return Error::success();
+}
+
+/// Run AOT compilation for Intel GPU
+/// Calls ocloc tool to generate device code for Intel GPU backend.
+/// 'InputFile' is the input SPIR-V file.
+/// 'Args' encompasses all arguments required for linking and wrapping device
+/// code and will be parsed to generate options required to be passed into the
+/// SYCL AOT compilation step.
+static Error runAOTCompileIntelGPU(StringRef InputFile, const ArgList &Args) {
+ SmallVector<StringRef, 8> CmdArgs;
+ Expected<std::string> OclocPath =
+ findProgram(Args, "ocloc", {getMainExecutable("ocloc")});
+ if (!OclocPath)
+ return OclocPath.takeError();
+
+ CmdArgs.push_back(*OclocPath);
+ // The next line prevents ocloc from modifying the image name
+ CmdArgs.push_back("-output_no_suffix");
+ CmdArgs.push_back("-spirv_input");
+
+ StringRef Arch(Args.getLastArgValue(OPT_arch));
+ assert(!Arch.empty() && "Arch must be specified for AOT compilation");
+ CmdArgs.push_back("-device");
+ CmdArgs.push_back(Arch);
+
+ StringRef ExtraArgs = Args.getLastArgValue(OPT_ocloc_options_EQ);
+ ExtraArgs.split(CmdArgs, " ", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
+
+ CmdArgs.push_back("-output");
+ CmdArgs.push_back(OutputFile);
+ CmdArgs.push_back("-file");
+ CmdArgs.push_back(InputFile);
+ if (Error Err = executeCommands(*OclocPath, CmdArgs))
+ return std::move(Err);
+ return Error::success();
+}
+
+/// Run AOT compilation for Intel CPU/GPU.
+/// 'InputFile' is the input SPIR-V file.
+/// 'Args' encompasses all arguments required for linking and wrapping device
+/// code and will be parsed to generate options required to be passed into the
+/// SYCL AOT compilation step.
+static Error runAOTCompile(StringRef InputFile, const ArgList &Args) {
+ StringRef Arch = Args.getLastArgValue(OPT_arch);
+ SYCLSupportedIntelArchs OffloadArch = StringToOffloadArchSYCL(Arch);
+ if (IsSYCLSupportedIntelGPUArch(OffloadArch))
+ return runAOTCompileIntelGPU(InputFile, Args);
+ if (IsSYCLSupportedIntelCPUArch(OffloadArch))
+ return runAOTCompileIntelCPU(InputFile, Args);
+
+ return createStringError(inconvertibleErrorCode(), "Unsupported arch");
}
Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) {
@@ -427,17 +515,23 @@ Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) {
// First llvm-link step
auto LinkedFile = linkDeviceInputFiles(Files, Args);
if (!LinkedFile)
- reportError(LinkedFile.takeError());
+ return LinkedFile.takeError();
// second llvm-link step
auto DeviceLinkedFile = linkDeviceLibFiles(*LinkedFile, Args);
if (!DeviceLinkedFile)
- reportError(DeviceLinkedFile.takeError());
+ return DeviceLinkedFile.takeError();
// LLVM to SPIR-V translation step
auto SPVFile = runLLVMToSPIRVTranslation(*DeviceLinkedFile, Args);
if (!SPVFile)
return SPVFile.takeError();
+
+ if (IsAOTCompileNeeded) {
+ if (Error Err = runAOTCompile(*SPVFile, Args))
+ return Err;
+ }
+
return Error::success();
}
@@ -474,9 +568,11 @@ int main(int argc, char **argv) {
DryRun = Args.hasArg(OPT_dry_run);
SaveTemps = Args.hasArg(OPT_save_temps);
- OutputFile = "a.spv";
- if (Args.hasArg(OPT_o))
- OutputFile = Args.getLastArgValue(OPT_o);
+ IsAOTCompileNeeded = Args.hasArg(OPT_arch);
+
+ if (!Args.hasArg(OPT_o))
+ reportError(createStringError("Output file is not specified"));
+ OutputFile = Args.getLastArgValue(OPT_o);
if (Args.hasArg(OPT_spirv_dump_device_code_EQ)) {
Arg *A = Args.getLastArg(OPT_spirv_dump_device_code_EQ);
diff --git a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td
index 959fd6c3e867c..abcacc9daed6e 100644
--- a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td
+++ b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td
@@ -50,3 +50,11 @@ def llvm_spirv_path_EQ : Joined<["--"], "llvm-spirv-path=">,
def llvm_spirv_options_EQ : Joined<["--", "-"], "llvm-spirv-options=">,
Flags<[LinkerOnlyOption]>,
HelpText<"Options that will control llvm-spirv step">;
+
+def ocloc_options_EQ : Joined<["--", "-"], "ocloc-options=">,
+ Flags<[LinkerOnlyOption]>,
+ HelpText<"Options passed to ocloc for Intel GPU AOT compilation">;
+
+def opencl_aot_options_EQ : Joined<["--", "-"], "opencl-aot-options=">,
+ Flags<[LinkerOnlyOption]>,
+ HelpText<"Options passed to opencl-aot for CPU AOT compilation">;
>From abf2b4be9d2f093f5c2e23f2b359836cf6824a64 Mon Sep 17 00:00:00 2001
From: "Cai, Justin" <justin.cai at intel.com>
Date: Wed, 2 Apr 2025 20:23:24 +0000
Subject: [PATCH 2/5] Return error instead of assert
---
clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
index be855c124e466..23c14c8f07200 100644
--- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
+++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
@@ -430,7 +430,9 @@ static Error runAOTCompileIntelGPU(StringRef InputFile, const ArgList &Args) {
CmdArgs.push_back("-spirv_input");
StringRef Arch(Args.getLastArgValue(OPT_arch_EQ));
- assert(!Arch.empty() && "Arch must be specified for AOT compilation");
+ if (Arch.empty())
+ return createStringError(inconvertibleErrorCode(),
+ "Arch must be specified for AOT compilation");
CmdArgs.push_back("-device");
CmdArgs.push_back(Arch);
>From ff997fab0f621d1bf195d6a854c9e2c59be5fd5a Mon Sep 17 00:00:00 2001
From: "Cai, Justin" <justin.cai at intel.com>
Date: Thu, 3 Apr 2025 07:34:28 +0000
Subject: [PATCH 3/5] Add Intel arches to OffloadArch enum
---
clang/include/clang/Basic/Cuda.h | 66 +++++
clang/include/clang/Basic/SYCL.h | 131 ----------
clang/lib/Basic/CMakeLists.txt | 1 -
clang/lib/Basic/Cuda.cpp | 59 +++++
clang/lib/Basic/SYCL.cpp | 226 ------------------
clang/lib/Basic/Targets/NVPTX.cpp | 55 +++++
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 55 +++++
.../clang-sycl-linker/ClangSYCLLinker.cpp | 8 +-
8 files changed, 239 insertions(+), 362 deletions(-)
delete mode 100644 clang/include/clang/Basic/SYCL.h
delete mode 100644 clang/lib/Basic/SYCL.cpp
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index c4eb7b7cac1d6..be8922be5167f 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -140,6 +140,63 @@ enum class OffloadArch {
AMDGCNSPIRV,
Generic, // A processor model named 'generic' if the target backend defines a
// public one.
+ // Intel CPUs
+ SKYLAKEAVX512,
+ COREAVX2,
+ COREI7AVX,
+ COREI7,
+ WESTMERE,
+ SANDYBRIDGE,
+ IVYBRIDGE,
+ BROADWELL,
+ COFFEELAKE,
+ ALDERLAKE,
+ SKYLAKE,
+ SKX,
+ CASCADELAKE,
+ ICELAKECLIENT,
+ ICELAKESERVER,
+ SAPPHIRERAPIDS,
+ GRANITERAPIDS,
+ // Intel GPUs
+ BDW,
+ SKL,
+ KBL,
+ CFL,
+ APL,
+ BXT,
+ GLK,
+ WHL,
+ AML,
+ CML,
+ ICLLP,
+ ICL,
+ EHL,
+ JSL,
+ TGLLP,
+ TGL,
+ RKL,
+ ADL_S,
+ RPL_S,
+ ADL_P,
+ ADL_N,
+ DG1,
+ ACM_G10,
+ DG2_G10,
+ ACM_G11,
+ DG2_G11,
+ ACM_G12,
+ DG2_G12,
+ PVC,
+ PVC_VG,
+ MTL_U,
+ MTL_S,
+ ARL_U,
+ ARL_S,
+ MTL_H,
+ ARL_H,
+ BMG_G21,
+ LNL_M,
LAST,
CudaDefault = OffloadArch::SM_52,
@@ -163,6 +220,15 @@ static inline bool IsAMDOffloadArch(OffloadArch A) {
return A >= OffloadArch::GFX600 && A < OffloadArch::Generic;
}
+static inline bool IsIntelCPUArch(OffloadArch Arch) {
+ return Arch >= OffloadArch::SKYLAKEAVX512 &&
+ Arch <= OffloadArch::GRANITERAPIDS;
+}
+
+static inline bool IsIntelGPUArch(OffloadArch Arch) {
+ return Arch >= OffloadArch::BDW && Arch <= OffloadArch::LNL_M;
+}
+
const char *OffloadArchToString(OffloadArch A);
const char *OffloadArchToVirtualArchString(OffloadArch A);
diff --git a/clang/include/clang/Basic/SYCL.h b/clang/include/clang/Basic/SYCL.h
deleted file mode 100644
index c7cad37639b91..0000000000000
--- a/clang/include/clang/Basic/SYCL.h
+++ /dev/null
@@ -1,131 +0,0 @@
-//===--- SYCL.h -------------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_BASIC_SYCL_H
-#define LLVM_CLANG_BASIC_SYCL_H
-
-#include "clang/Basic/Cuda.h"
-
-namespace llvm {
-class StringRef;
-template <unsigned InternalLen> class SmallString;
-} // namespace llvm
-
-namespace clang {
-// List of architectures (Intel CPUs and Intel GPUs)
-// that support SYCL offloading.
-enum class SYCLSupportedIntelArchs {
- // Intel CPUs
- UNKNOWN,
- SKYLAKEAVX512,
- COREAVX2,
- COREI7AVX,
- COREI7,
- WESTMERE,
- SANDYBRIDGE,
- IVYBRIDGE,
- BROADWELL,
- COFFEELAKE,
- ALDERLAKE,
- SKYLAKE,
- SKX,
- CASCADELAKE,
- ICELAKECLIENT,
- ICELAKESERVER,
- SAPPHIRERAPIDS,
- GRANITERAPIDS,
- // Intel GPUs
- BDW,
- SKL,
- KBL,
- CFL,
- APL,
- BXT,
- GLK,
- WHL,
- AML,
- CML,
- ICLLP,
- ICL,
- EHL,
- JSL,
- TGLLP,
- TGL,
- RKL,
- ADL_S,
- RPL_S,
- ADL_P,
- ADL_N,
- DG1,
- ACM_G10,
- DG2_G10,
- ACM_G11,
- DG2_G11,
- ACM_G12,
- DG2_G12,
- PVC,
- PVC_VG,
- MTL_U,
- MTL_S,
- ARL_U,
- ARL_S,
- MTL_H,
- ARL_H,
- BMG_G21,
- LNL_M,
-};
-
-// Check if the given Arch value is a Generic AMD GPU.
-// Currently GFX*_GENERIC AMD GPUs do not support SYCL offloading.
-// This list is used to filter out GFX*_GENERIC AMD GPUs in
-// `IsSYCLSupportedAMDGPUArch`.
-static inline bool IsAMDGenericGPUArch(OffloadArch Arch) {
- return Arch == OffloadArch::GFX9_GENERIC ||
- Arch == OffloadArch::GFX10_1_GENERIC ||
- Arch == OffloadArch::GFX10_3_GENERIC ||
- Arch == OffloadArch::GFX11_GENERIC ||
- Arch == OffloadArch::GFX12_GENERIC;
-}
-
-// Check if the given Arch value is a valid SYCL supported AMD GPU.
-static inline bool IsSYCLSupportedAMDGPUArch(OffloadArch Arch) {
- return Arch >= OffloadArch::GFX700 && Arch < OffloadArch::AMDGCNSPIRV &&
- !IsAMDGenericGPUArch(Arch);
-}
-
-// Check if the given Arch value is a valid SYCL supported NVidia GPU.
-static inline bool IsSYCLSupportedNVidiaGPUArch(OffloadArch Arch) {
- return Arch >= OffloadArch::SM_50 && Arch <= OffloadArch::SM_90a;
-}
-
-// Check if the given Arch value is a valid SYCL supported Intel CPU.
-static inline bool IsSYCLSupportedIntelCPUArch(SYCLSupportedIntelArchs Arch) {
- return Arch >= SYCLSupportedIntelArchs::SKYLAKEAVX512 &&
- Arch <= SYCLSupportedIntelArchs::GRANITERAPIDS;
-}
-
-// Check if the given Arch value is a valid SYCL supported Intel GPU.
-static inline bool IsSYCLSupportedIntelGPUArch(SYCLSupportedIntelArchs Arch) {
- return Arch >= SYCLSupportedIntelArchs::BDW &&
- Arch <= SYCLSupportedIntelArchs::LNL_M;
-}
-
-// Check if the user provided value for --offload-arch is a valid
-// SYCL supported Intel AOT target.
-SYCLSupportedIntelArchs
-StringToOffloadArchSYCL(llvm::StringRef ArchNameAsString);
-
-// This is a mapping between the user provided --offload-arch value for Intel
-// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU
-// AOT compiler).
-llvm::StringRef mapIntelGPUArchName(llvm::StringRef ArchName);
-llvm::SmallString<64> getGenDeviceMacro(llvm::StringRef DeviceName);
-
-} // namespace clang
-
-#endif // LLVM_CLANG_BASIC_SYCL_H
diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt
index be6d915e01b0a..331dfbb3f4b67 100644
--- a/clang/lib/Basic/CMakeLists.txt
+++ b/clang/lib/Basic/CMakeLists.txt
@@ -90,7 +90,6 @@ add_clang_library(clangBasic
SourceMgrAdapter.cpp
Stack.cpp
StackExhaustionHandler.cpp
- SYCL.cpp
TargetID.cpp
TargetInfo.cpp
Targets.cpp
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 68d042eca2492..f1015c47f314f 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -84,6 +84,7 @@ struct OffloadArchToStringMap {
#define SM2(sm, ca) {OffloadArch::SM_##sm, "sm_" #sm, ca}
#define SM(sm) SM2(sm, "compute_" #sm)
#define GFX(gpu) {OffloadArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn"}
+#define INTEL(name, value) {OffloadArch::value, #name, ""}
static const OffloadArchToStringMap arch_names[] = {
// clang-format off
{OffloadArch::UNUSED, "", ""},
@@ -156,12 +157,70 @@ static const OffloadArchToStringMap arch_names[] = {
GFX(1200), // gfx1200
GFX(1201), // gfx1201
{OffloadArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"},
+ // Intel CPUs
+ INTEL(skylake-avx512, SKYLAKEAVX512),
+ INTEL(core-avx2, COREAVX2),
+ INTEL(corei7-avx, COREI7AVX),
+ INTEL(corei7, COREI7),
+ INTEL(westmere, WESTMERE),
+ INTEL(sandybridge, SANDYBRIDGE),
+ INTEL(ivybridge, IVYBRIDGE),
+ INTEL(broadwell, BROADWELL),
+ INTEL(coffeelake, COFFEELAKE),
+ INTEL(alderlake, ALDERLAKE),
+ INTEL(skylake, SKYLAKE),
+ INTEL(skx, SKX),
+ INTEL(cascadelake, CASCADELAKE),
+ INTEL(icelake-client, ICELAKECLIENT),
+ INTEL(icelakeserver, ICELAKESERVER),
+ INTEL(sapphirerapids, SAPPHIRERAPIDS),
+ INTEL(graniterapids, GRANITERAPIDS),
+ // Intel GPUs
+ INTEL(bdw, BDW),
+ INTEL(skl, SKL),
+ INTEL(kbl, KBL),
+ INTEL(cfl, CFL),
+ INTEL(apl, APL),
+ INTEL(bxt, BXT),
+ INTEL(glk, GLK),
+ INTEL(whl, WHL),
+ INTEL(aml, AML),
+ INTEL(cml, CML),
+ INTEL(icllp, ICLLP),
+ INTEL(icl, ICL),
+ INTEL(ehl, EHL),
+ INTEL(jsl, JSL),
+ INTEL(tgllp, TGLLP),
+ INTEL(tgl, TGL),
+ INTEL(rkl, RKL),
+ INTEL(adl_s, ADL_S),
+ INTEL(rpl_s, RPL_S),
+ INTEL(adl_p, ADL_P),
+ INTEL(adl_n, ADL_N),
+ INTEL(dg1, DG1),
+ INTEL(acm_g10, ACM_G10),
+ INTEL(dg2_g10, DG2_G10),
+ INTEL(acm_g11, ACM_G11),
+ INTEL(dg2_g11, DG2_G11),
+ INTEL(acm_g12, ACM_G12),
+ INTEL(dg2_g12, DG2_G12),
+ INTEL(pvc, PVC),
+ INTEL(pvc_vg, PVC_VG),
+ INTEL(mtl_u, MTL_U),
+ INTEL(mtl_s, MTL_S),
+ INTEL(arl_u, ARL_U),
+ INTEL(arl_s, ARL_S),
+ INTEL(mtl_h, MTL_H),
+ INTEL(arl_h, ARL_H),
+ INTEL(bmg_g21, BMG_G21),
+ INTEL(lnl_m, LNL_M),
{OffloadArch::Generic, "generic", ""},
// clang-format on
};
#undef SM
#undef SM2
#undef GFX
+#undef INTEL
const char *OffloadArchToString(OffloadArch A) {
auto result = std::find_if(
diff --git a/clang/lib/Basic/SYCL.cpp b/clang/lib/Basic/SYCL.cpp
deleted file mode 100644
index 9ac5470cdbe5a..0000000000000
--- a/clang/lib/Basic/SYCL.cpp
+++ /dev/null
@@ -1,226 +0,0 @@
-#include "clang/Basic/SYCL.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSwitch.h"
-
-using namespace llvm;
-
-namespace clang {
-
-// Struct that relates an AOT target value with
-// Intel CPUs and Intel GPUs.
-struct StringToOffloadArchSYCLMap {
- const char *ArchName;
- SYCLSupportedIntelArchs IntelArch;
-};
-
-// Mapping of supported SYCL offloading architectures.
-static const StringToOffloadArchSYCLMap StringToArchNamesMap[] = {
- // Intel CPU mapping.
- {"skylake-avx512", SYCLSupportedIntelArchs::SKYLAKEAVX512},
- {"core-avx2", SYCLSupportedIntelArchs::COREAVX2},
- {"corei7-avx", SYCLSupportedIntelArchs::COREI7AVX},
- {"corei7", SYCLSupportedIntelArchs::COREI7},
- {"westmere", SYCLSupportedIntelArchs::WESTMERE},
- {"sandybridge", SYCLSupportedIntelArchs::SANDYBRIDGE},
- {"ivybridge", SYCLSupportedIntelArchs::IVYBRIDGE},
- {"broadwell", SYCLSupportedIntelArchs::BROADWELL},
- {"coffeelake", SYCLSupportedIntelArchs::COFFEELAKE},
- {"alderlake", SYCLSupportedIntelArchs::ALDERLAKE},
- {"skylake", SYCLSupportedIntelArchs::SKYLAKE},
- {"skx", SYCLSupportedIntelArchs::SKX},
- {"cascadelake", SYCLSupportedIntelArchs::CASCADELAKE},
- {"icelake-client", SYCLSupportedIntelArchs::ICELAKECLIENT},
- {"icelake-server", SYCLSupportedIntelArchs::ICELAKESERVER},
- {"sapphirerapids", SYCLSupportedIntelArchs::SAPPHIRERAPIDS},
- {"graniterapids", SYCLSupportedIntelArchs::GRANITERAPIDS},
- // Intel GPU mapping.
- {"bdw", SYCLSupportedIntelArchs::BDW},
- {"skl", SYCLSupportedIntelArchs::SKL},
- {"kbl", SYCLSupportedIntelArchs::KBL},
- {"cfl", SYCLSupportedIntelArchs::CFL},
- {"apl", SYCLSupportedIntelArchs::APL},
- {"bxt", SYCLSupportedIntelArchs::BXT},
- {"glk", SYCLSupportedIntelArchs::GLK},
- {"whl", SYCLSupportedIntelArchs::WHL},
- {"aml", SYCLSupportedIntelArchs::AML},
- {"cml", SYCLSupportedIntelArchs::CML},
- {"icllp", SYCLSupportedIntelArchs::ICLLP},
- {"icl", SYCLSupportedIntelArchs::ICL},
- {"ehl", SYCLSupportedIntelArchs::EHL},
- {"jsl", SYCLSupportedIntelArchs::JSL},
- {"tgllp", SYCLSupportedIntelArchs::TGLLP},
- {"tgl", SYCLSupportedIntelArchs::TGL},
- {"rkl", SYCLSupportedIntelArchs::RKL},
- {"adl_s", SYCLSupportedIntelArchs::ADL_S},
- {"rpl_s", SYCLSupportedIntelArchs::RPL_S},
- {"adl_p", SYCLSupportedIntelArchs::ADL_P},
- {"adl_n", SYCLSupportedIntelArchs::ADL_N},
- {"dg1", SYCLSupportedIntelArchs::DG1},
- {"acm_g10", SYCLSupportedIntelArchs::ACM_G10},
- {"dg2_g10", SYCLSupportedIntelArchs::DG2_G10},
- {"acm_g11", SYCLSupportedIntelArchs::ACM_G11},
- {"dg2_g10", SYCLSupportedIntelArchs::DG2_G10},
- {"dg2_g11", SYCLSupportedIntelArchs::DG2_G11},
- {"acm_g12", SYCLSupportedIntelArchs::ACM_G12},
- {"dg2_g12", SYCLSupportedIntelArchs::DG2_G12},
- {"pvc", SYCLSupportedIntelArchs::PVC},
- {"pvc_vg", SYCLSupportedIntelArchs::PVC_VG},
- {"mtl_u", SYCLSupportedIntelArchs::MTL_U},
- {"mtl_s", SYCLSupportedIntelArchs::MTL_S},
- {"arl_u", SYCLSupportedIntelArchs::ARL_U},
- {"arl_s", SYCLSupportedIntelArchs::ARL_S},
- {"mtl_h", SYCLSupportedIntelArchs::MTL_H},
- {"arl_h", SYCLSupportedIntelArchs::ARL_H},
- {"bmg_g21", SYCLSupportedIntelArchs::BMG_G21},
- {"lnl_m", SYCLSupportedIntelArchs::LNL_M}};
-
-// Check if the user provided value for --offload-arch is a valid
-// SYCL supported Intel AOT target.
-SYCLSupportedIntelArchs StringToOffloadArchSYCL(StringRef ArchNameAsString) {
- auto result =
- llvm::find_if(StringToArchNamesMap,
- [ArchNameAsString](const StringToOffloadArchSYCLMap &map) {
- return ArchNameAsString == map.ArchName;
- });
- if (result == std::end(StringToArchNamesMap))
- return SYCLSupportedIntelArchs::UNKNOWN;
- return result->IntelArch;
-}
-
-// This is a mapping between the user provided --offload-arch value for Intel
-// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU
-// AOT compiler).
-StringRef mapIntelGPUArchName(StringRef ArchName) {
- StringRef Arch;
- Arch = llvm::StringSwitch<StringRef>(ArchName)
- .Case("bdw", "bdw")
- .Case("skl", "skl")
- .Case("kbl", "kbl")
- .Case("cfl", "cfl")
- .Cases("apl", "bxt", "apl")
- .Case("glk", "glk")
- .Case("whl", "whl")
- .Case("aml", "aml")
- .Case("cml", "cml")
- .Cases("icllp", "icl", "icllp")
- .Cases("ehl", "jsl", "ehl")
- .Cases("tgllp", "tgl", "tgllp")
- .Case("rkl", "rkl")
- .Cases("adl_s", "rpl_s", "adl_s")
- .Case("adl_p", "adl_p")
- .Case("adl_n", "adl_n")
- .Case("dg1", "dg1")
- .Cases("acm_g10", "dg2_g10", "acm_g10")
- .Cases("acm_g11", "dg2_g11", "acm_g11")
- .Cases("acm_g12", "dg2_g12", "acm_g12")
- .Case("pvc", "pvc")
- .Case("pvc_vg", "pvc_vg")
- .Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "mtl_u")
- .Case("mtl_h", "mtl_h")
- .Case("arl_h", "arl_h")
- .Case("bmg_g21", "bmg_g21")
- .Case("lnl_m", "lnl_m")
- .Default("");
- return Arch;
-}
-
-SmallString<64> getGenDeviceMacro(StringRef DeviceName) {
- SmallString<64> Macro;
- StringRef Ext = llvm::StringSwitch<StringRef>(DeviceName)
- .Case("bdw", "INTEL_GPU_BDW")
- .Case("skl", "INTEL_GPU_SKL")
- .Case("kbl", "INTEL_GPU_KBL")
- .Case("cfl", "INTEL_GPU_CFL")
- .Case("apl", "INTEL_GPU_APL")
- .Case("glk", "INTEL_GPU_GLK")
- .Case("whl", "INTEL_GPU_WHL")
- .Case("aml", "INTEL_GPU_AML")
- .Case("cml", "INTEL_GPU_CML")
- .Case("icllp", "INTEL_GPU_ICLLP")
- .Case("ehl", "INTEL_GPU_EHL")
- .Case("tgllp", "INTEL_GPU_TGLLP")
- .Case("rkl", "INTEL_GPU_RKL")
- .Case("adl_s", "INTEL_GPU_ADL_S")
- .Case("adl_p", "INTEL_GPU_ADL_P")
- .Case("adl_n", "INTEL_GPU_ADL_N")
- .Case("dg1", "INTEL_GPU_DG1")
- .Case("acm_g10", "INTEL_GPU_ACM_G10")
- .Case("acm_g11", "INTEL_GPU_ACM_G11")
- .Case("acm_g12", "INTEL_GPU_ACM_G12")
- .Case("pvc", "INTEL_GPU_PVC")
- .Case("pvc_vg", "INTEL_GPU_PVC_VG")
- .Case("mtl_u", "INTEL_GPU_MTL_U")
- .Case("mtl_h", "INTEL_GPU_MTL_H")
- .Case("arl_h", "INTEL_GPU_ARL_H")
- .Case("bmg_g21", "INTEL_GPU_BMG_G21")
- .Case("lnl_m", "INTEL_GPU_LNL_M")
- .Case("ptl_h", "INTEL_GPU_PTL_H")
- .Case("ptl_u", "INTEL_GPU_PTL_U")
- .Case("sm_50", "NVIDIA_GPU_SM_50")
- .Case("sm_52", "NVIDIA_GPU_SM_52")
- .Case("sm_53", "NVIDIA_GPU_SM_53")
- .Case("sm_60", "NVIDIA_GPU_SM_60")
- .Case("sm_61", "NVIDIA_GPU_SM_61")
- .Case("sm_62", "NVIDIA_GPU_SM_62")
- .Case("sm_70", "NVIDIA_GPU_SM_70")
- .Case("sm_72", "NVIDIA_GPU_SM_72")
- .Case("sm_75", "NVIDIA_GPU_SM_75")
- .Case("sm_80", "NVIDIA_GPU_SM_80")
- .Case("sm_86", "NVIDIA_GPU_SM_86")
- .Case("sm_87", "NVIDIA_GPU_SM_87")
- .Case("sm_89", "NVIDIA_GPU_SM_89")
- .Case("sm_90", "NVIDIA_GPU_SM_90")
- .Case("sm_90a", "NVIDIA_GPU_SM_90A")
- .Case("gfx700", "AMD_GPU_GFX700")
- .Case("gfx701", "AMD_GPU_GFX701")
- .Case("gfx702", "AMD_GPU_GFX702")
- .Case("gfx703", "AMD_GPU_GFX703")
- .Case("gfx704", "AMD_GPU_GFX704")
- .Case("gfx705", "AMD_GPU_GFX705")
- .Case("gfx801", "AMD_GPU_GFX801")
- .Case("gfx802", "AMD_GPU_GFX802")
- .Case("gfx803", "AMD_GPU_GFX803")
- .Case("gfx805", "AMD_GPU_GFX805")
- .Case("gfx810", "AMD_GPU_GFX810")
- .Case("gfx900", "AMD_GPU_GFX900")
- .Case("gfx902", "AMD_GPU_GFX902")
- .Case("gfx904", "AMD_GPU_GFX904")
- .Case("gfx906", "AMD_GPU_GFX906")
- .Case("gfx908", "AMD_GPU_GFX908")
- .Case("gfx909", "AMD_GPU_GFX909")
- .Case("gfx90a", "AMD_GPU_GFX90A")
- .Case("gfx90c", "AMD_GPU_GFX90C")
- .Case("gfx940", "AMD_GPU_GFX940")
- .Case("gfx941", "AMD_GPU_GFX941")
- .Case("gfx942", "AMD_GPU_GFX942")
- .Case("gfx1010", "AMD_GPU_GFX1010")
- .Case("gfx1011", "AMD_GPU_GFX1011")
- .Case("gfx1012", "AMD_GPU_GFX1012")
- .Case("gfx1013", "AMD_GPU_GFX1013")
- .Case("gfx1030", "AMD_GPU_GFX1030")
- .Case("gfx1031", "AMD_GPU_GFX1031")
- .Case("gfx1032", "AMD_GPU_GFX1032")
- .Case("gfx1033", "AMD_GPU_GFX1033")
- .Case("gfx1034", "AMD_GPU_GFX1034")
- .Case("gfx1035", "AMD_GPU_GFX1035")
- .Case("gfx1036", "AMD_GPU_GFX1036")
- .Case("gfx1100", "AMD_GPU_GFX1100")
- .Case("gfx1101", "AMD_GPU_GFX1101")
- .Case("gfx1102", "AMD_GPU_GFX1102")
- .Case("gfx1103", "AMD_GPU_GFX1103")
- .Case("gfx1150", "AMD_GPU_GFX1150")
- .Case("gfx1151", "AMD_GPU_GFX1151")
- .Case("gfx1200", "AMD_GPU_GFX1200")
- .Case("gfx1201", "AMD_GPU_GFX1201")
- .Default("");
- if (!Ext.empty()) {
- Macro = "__SYCL_TARGET_";
- Macro += Ext;
- Macro += "__";
- }
- return Macro;
-}
-
-} // namespace clang
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index 5931a77a85fec..4f04d83c9c068 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -240,6 +240,61 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case OffloadArch::GFX1201:
case OffloadArch::AMDGCNSPIRV:
case OffloadArch::Generic:
+ case OffloadArch::SKYLAKEAVX512:
+ case OffloadArch::COREAVX2:
+ case OffloadArch::COREI7AVX:
+ case OffloadArch::COREI7:
+ case OffloadArch::WESTMERE:
+ case OffloadArch::SANDYBRIDGE:
+ case OffloadArch::IVYBRIDGE:
+ case OffloadArch::BROADWELL:
+ case OffloadArch::COFFEELAKE:
+ case OffloadArch::ALDERLAKE:
+ case OffloadArch::SKYLAKE:
+ case OffloadArch::SKX:
+ case OffloadArch::CASCADELAKE:
+ case OffloadArch::ICELAKECLIENT:
+ case OffloadArch::ICELAKESERVER:
+ case OffloadArch::SAPPHIRERAPIDS:
+ case OffloadArch::GRANITERAPIDS:
+ case OffloadArch::BDW:
+ case OffloadArch::SKL:
+ case OffloadArch::KBL:
+ case OffloadArch::CFL:
+ case OffloadArch::APL:
+ case OffloadArch::BXT:
+ case OffloadArch::GLK:
+ case OffloadArch::WHL:
+ case OffloadArch::AML:
+ case OffloadArch::CML:
+ case OffloadArch::ICLLP:
+ case OffloadArch::ICL:
+ case OffloadArch::EHL:
+ case OffloadArch::JSL:
+ case OffloadArch::TGLLP:
+ case OffloadArch::TGL:
+ case OffloadArch::RKL:
+ case OffloadArch::ADL_S:
+ case OffloadArch::RPL_S:
+ case OffloadArch::ADL_P:
+ case OffloadArch::ADL_N:
+ case OffloadArch::DG1:
+ case OffloadArch::ACM_G10:
+ case OffloadArch::DG2_G10:
+ case OffloadArch::ACM_G11:
+ case OffloadArch::DG2_G11:
+ case OffloadArch::ACM_G12:
+ case OffloadArch::DG2_G12:
+ case OffloadArch::PVC:
+ case OffloadArch::PVC_VG:
+ case OffloadArch::MTL_U:
+ case OffloadArch::MTL_S:
+ case OffloadArch::ARL_U:
+ case OffloadArch::ARL_S:
+ case OffloadArch::MTL_H:
+ case OffloadArch::ARL_H:
+ case OffloadArch::BMG_G21:
+ case OffloadArch::LNL_M:
case OffloadArch::LAST:
break;
case OffloadArch::UNKNOWN:
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index feb2448297542..80990eeed7511 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -2335,6 +2335,61 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
case OffloadArch::GFX1201:
case OffloadArch::AMDGCNSPIRV:
case OffloadArch::Generic:
+ case OffloadArch::SKYLAKEAVX512:
+ case OffloadArch::COREAVX2:
+ case OffloadArch::COREI7AVX:
+ case OffloadArch::COREI7:
+ case OffloadArch::WESTMERE:
+ case OffloadArch::SANDYBRIDGE:
+ case OffloadArch::IVYBRIDGE:
+ case OffloadArch::BROADWELL:
+ case OffloadArch::COFFEELAKE:
+ case OffloadArch::ALDERLAKE:
+ case OffloadArch::SKYLAKE:
+ case OffloadArch::SKX:
+ case OffloadArch::CASCADELAKE:
+ case OffloadArch::ICELAKECLIENT:
+ case OffloadArch::ICELAKESERVER:
+ case OffloadArch::SAPPHIRERAPIDS:
+ case OffloadArch::GRANITERAPIDS:
+ case OffloadArch::BDW:
+ case OffloadArch::SKL:
+ case OffloadArch::KBL:
+ case OffloadArch::CFL:
+ case OffloadArch::APL:
+ case OffloadArch::BXT:
+ case OffloadArch::GLK:
+ case OffloadArch::WHL:
+ case OffloadArch::AML:
+ case OffloadArch::CML:
+ case OffloadArch::ICLLP:
+ case OffloadArch::ICL:
+ case OffloadArch::EHL:
+ case OffloadArch::JSL:
+ case OffloadArch::TGLLP:
+ case OffloadArch::TGL:
+ case OffloadArch::RKL:
+ case OffloadArch::ADL_S:
+ case OffloadArch::RPL_S:
+ case OffloadArch::ADL_P:
+ case OffloadArch::ADL_N:
+ case OffloadArch::DG1:
+ case OffloadArch::ACM_G10:
+ case OffloadArch::DG2_G10:
+ case OffloadArch::ACM_G11:
+ case OffloadArch::DG2_G11:
+ case OffloadArch::ACM_G12:
+ case OffloadArch::DG2_G12:
+ case OffloadArch::PVC:
+ case OffloadArch::PVC_VG:
+ case OffloadArch::MTL_U:
+ case OffloadArch::MTL_S:
+ case OffloadArch::ARL_U:
+ case OffloadArch::ARL_S:
+ case OffloadArch::MTL_H:
+ case OffloadArch::ARL_H:
+ case OffloadArch::BMG_G21:
+ case OffloadArch::LNL_M:
case OffloadArch::UNUSED:
case OffloadArch::UNKNOWN:
break;
diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
index 23c14c8f07200..20e677e6f84f1 100644
--- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
+++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
@@ -14,7 +14,7 @@
// target-specific device code.
//===---------------------------------------------------------------------===//
-#include "clang/Basic/SYCL.h"
+#include "clang/Basic/Cuda.h"
#include "clang/Basic/Version.h"
#include "llvm/ADT/StringExtras.h"
@@ -455,10 +455,10 @@ static Error runAOTCompileIntelGPU(StringRef InputFile, const ArgList &Args) {
/// SYCL AOT compilation step.
static Error runAOTCompile(StringRef InputFile, const ArgList &Args) {
StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
- SYCLSupportedIntelArchs OffloadArch = StringToOffloadArchSYCL(Arch);
- if (IsSYCLSupportedIntelGPUArch(OffloadArch))
+ OffloadArch OffloadArch = StringToOffloadArch(Arch);
+ if (IsIntelGPUArch(OffloadArch))
return runAOTCompileIntelGPU(InputFile, Args);
- if (IsSYCLSupportedIntelCPUArch(OffloadArch))
+ if (IsIntelCPUArch(OffloadArch))
return runAOTCompileIntelCPU(InputFile, Args);
return createStringError(inconvertibleErrorCode(), "Unsupported arch");
>From e64a41738269886bc2b8e77a266516c897ef33cc Mon Sep 17 00:00:00 2001
From: "Cai, Justin" <justin.cai at intel.com>
Date: Thu, 3 Apr 2025 18:17:17 +0000
Subject: [PATCH 4/5] Fix test failures
---
clang/test/Driver/clang-sycl-linker-test.cpp | 32 +++++++++++--------
.../clang-sycl-linker/ClangSYCLLinker.cpp | 9 ++++--
2 files changed, 24 insertions(+), 17 deletions(-)
diff --git a/clang/test/Driver/clang-sycl-linker-test.cpp b/clang/test/Driver/clang-sycl-linker-test.cpp
index 4b566eb5b4a2f..2ef7afaa69aac 100644
--- a/clang/test/Driver/clang-sycl-linker-test.cpp
+++ b/clang/test/Driver/clang-sycl-linker-test.cpp
@@ -20,7 +20,7 @@
//
// Test a simple case with a random file (not bitcode) as input.
// RUN: touch %t.o
-// RUN: not clang-sycl-linker -triple spirv64 %t.o -o a.spv 2>&1 \
+// RUN: not clang-sycl-linker -triple=spirv64 %t.o -o a.spv 2>&1 \
// RUN: | FileCheck %s --check-prefix=FILETYPEERROR
// FILETYPEERROR: Unsupported file type
//
@@ -33,37 +33,41 @@
// DEVLIBSERR2: '{{.*}}lib3.bc' SYCL device library file is not found
//
// Test AOT compilation for an Intel GPU.
-// RUN: clang-sycl-linker --dry-run -arch pvc %t_1.bc %t_2.bc -o a.out 2>&1 \
+// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=pvc %t_1.bc %t_2.bc -o a.out 2>&1 \
// RUN: | FileCheck %s --check-prefix=AOT-INTEL-GPU
-// AOT-INTEL-GPU: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings
-// AOT-INTEL-GPU-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc
+// AOT-INTEL-GPU: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc
+// AOT-INTEL-GPU-NEXT: SPIR-V Backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]].spv
// AOT-INTEL-GPU-NEXT: "{{.*}}ocloc{{.*}}" {{.*}}-device pvc {{.*}}-output a.out -file [[SPIRVTRANSLATIONOUT]]
//
// Test AOT compilation for an Intel GPU with additional options.
-// RUN: clang-sycl-linker --dry-run -arch pvc %t_1.bc %t_2.bc -o a.out 2>&1 \
+// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=pvc %t_1.bc %t_2.bc -o a.out 2>&1 \
// RUN: --ocloc-options="-a -b" \
// RUN: | FileCheck %s --check-prefix=AOT-INTEL-GPU-2
-// AOT-INTEL-GPU-2: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings
-// AOT-INTEL-GPU-2-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc
+// AOT-INTEL-GPU-2: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc
+// AOT-INTEL-GPU-2-NEXT: SPIR-V Backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]].spv
// AOT-INTEL-GPU-2-NEXT: "{{.*}}ocloc{{.*}}" {{.*}}-device pvc -a -b {{.*}}-output a.out -file [[SPIRVTRANSLATIONOUT]]
//
// Test AOT compilation for an Intel CPU.
-// RUN: clang-sycl-linker --dry-run -arch corei7 %t_1.bc %t_2.bc -o a.out 2>&1 \
+// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=corei7 %t_1.bc %t_2.bc -o a.out 2>&1 \
// RUN: | FileCheck %s --check-prefix=AOT-INTEL-CPU
-// AOT-INTEL-CPU: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings
-// AOT-INTEL-CPU-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc
+// AOT-INTEL-CPU: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc
+// AOT-INTEL-CPU-NEXT: SPIR-V Backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]].spv
// AOT-INTEL-CPU-NEXT: "{{.*}}opencl-aot{{.*}}" {{.*}}--device=cpu {{.*}}-o a.out [[SPIRVTRANSLATIONOUT]]
//
// Test AOT compilation for an Intel CPU with additional options.
-// RUN: clang-sycl-linker --dry-run -arch corei7 %t_1.bc %t_2.bc -o a.out 2>&1 \
+// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=corei7 %t_1.bc %t_2.bc -o a.out 2>&1 \
// RUN: --opencl-aot-options="-a -b" \
// RUN: | FileCheck %s --check-prefix=AOT-INTEL-CPU-2
-// AOT-INTEL-CPU-2: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings
-// AOT-INTEL-CPU-2-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc
+// AOT-INTEL-CPU-2: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc
+// AOT-INTEL-CPU-2-NEXT: SPIR-V Backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]].spv
// AOT-INTEL-CPU-2-NEXT: "{{.*}}opencl-aot{{.*}}" {{.*}}--device=cpu -a -b {{.*}}-o a.out [[SPIRVTRANSLATIONOUT]]
//
// Check that the output file must be specified.
// RUN: not clang-sycl-linker --dry-run %t_1.bc %t_2.bc 2>& 1 \
// RUN: | FileCheck %s --check-prefix=NOOUTPUT
-// NOOUTPUT: Output file is not specified
+// NOOUTPUT: Output file must be specified
//
+// Check that the target triple must be.
+// RUN: not clang-sycl-linker --dry-run %t_1.bc %t_2.bc -o a.out 2>& 1 \
+// RUN: | FileCheck %s --check-prefix=NOTARGET
+// NOTARGET: Target triple must be specified
diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
index 20e677e6f84f1..6a9ef4c0fc42c 100644
--- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
+++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
@@ -381,9 +381,9 @@ static Expected<StringRef> runSPIRVCodeGen(StringRef File, const ArgList &Args,
if (Verbose)
errs() << formatv("SPIR-V Backend: input: {0}, output: {1}\n", File,
- OutputFile);
+ *OutFileOrErr);
- return OutputFile;
+ return *OutFileOrErr;
}
/// Run AOT compilation for Intel CPU.
@@ -531,9 +531,12 @@ int main(int argc, char **argv) {
IsAOTCompileNeeded = Args.hasArg(OPT_arch_EQ);
if (!Args.hasArg(OPT_o))
- reportError(createStringError("Output file is not specified"));
+ reportError(createStringError("Output file must be specified"));
OutputFile = Args.getLastArgValue(OPT_o);
+ if (!Args.hasArg(OPT_triple_EQ))
+ reportError(createStringError("Target triple must be specified"));
+
if (Args.hasArg(OPT_spirv_dump_device_code_EQ)) {
Arg *A = Args.getLastArg(OPT_spirv_dump_device_code_EQ);
SmallString<128> Dir(A->getValue());
>From 9002f9a743b83077eeeb0fe316eadd33f4880e96 Mon Sep 17 00:00:00 2001
From: "Cai, Justin" <justin.cai at intel.com>
Date: Thu, 3 Apr 2025 18:36:49 +0000
Subject: [PATCH 5/5] Add Offloading.cpp/h
---
clang/include/clang/Basic/Cuda.h | 175 +---------------------
clang/include/clang/Basic/Offloading.h | 193 +++++++++++++++++++++++++
clang/lib/Basic/CMakeLists.txt | 1 +
clang/lib/Basic/Cuda.cpp | 176 ----------------------
clang/lib/Basic/Offloading.cpp | 185 ++++++++++++++++++++++++
5 files changed, 381 insertions(+), 349 deletions(-)
create mode 100644 clang/include/clang/Basic/Offloading.h
create mode 100644 clang/lib/Basic/Offloading.cpp
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index be8922be5167f..007cf80223dec 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -9,6 +9,8 @@
#ifndef LLVM_CLANG_BASIC_CUDA_H
#define LLVM_CLANG_BASIC_CUDA_H
+#include "clang/Basic/Offloading.h"
+
namespace llvm {
class StringRef;
class Twine;
@@ -54,155 +56,6 @@ const char *CudaVersionToString(CudaVersion V);
// Input is "Major.Minor"
CudaVersion CudaStringToVersion(const llvm::Twine &S);
-enum class OffloadArch {
- UNUSED,
- UNKNOWN,
- // TODO: Deprecate and remove GPU architectures older than sm_52.
- SM_20,
- SM_21,
- SM_30,
- // This has a name conflict with sys/mac.h on AIX, rename it as a workaround.
- SM_32_,
- SM_35,
- SM_37,
- SM_50,
- SM_52,
- SM_53,
- SM_60,
- SM_61,
- SM_62,
- SM_70,
- SM_72,
- SM_75,
- SM_80,
- SM_86,
- SM_87,
- SM_89,
- SM_90,
- SM_90a,
- SM_100,
- SM_100a,
- SM_101,
- SM_101a,
- SM_120,
- SM_120a,
- GFX600,
- GFX601,
- GFX602,
- GFX700,
- GFX701,
- GFX702,
- GFX703,
- GFX704,
- GFX705,
- GFX801,
- GFX802,
- GFX803,
- GFX805,
- GFX810,
- GFX9_GENERIC,
- GFX900,
- GFX902,
- GFX904,
- GFX906,
- GFX908,
- GFX909,
- GFX90a,
- GFX90c,
- GFX9_4_GENERIC,
- GFX942,
- GFX950,
- GFX10_1_GENERIC,
- GFX1010,
- GFX1011,
- GFX1012,
- GFX1013,
- GFX10_3_GENERIC,
- GFX1030,
- GFX1031,
- GFX1032,
- GFX1033,
- GFX1034,
- GFX1035,
- GFX1036,
- GFX11_GENERIC,
- GFX1100,
- GFX1101,
- GFX1102,
- GFX1103,
- GFX1150,
- GFX1151,
- GFX1152,
- GFX1153,
- GFX12_GENERIC,
- GFX1200,
- GFX1201,
- AMDGCNSPIRV,
- Generic, // A processor model named 'generic' if the target backend defines a
- // public one.
- // Intel CPUs
- SKYLAKEAVX512,
- COREAVX2,
- COREI7AVX,
- COREI7,
- WESTMERE,
- SANDYBRIDGE,
- IVYBRIDGE,
- BROADWELL,
- COFFEELAKE,
- ALDERLAKE,
- SKYLAKE,
- SKX,
- CASCADELAKE,
- ICELAKECLIENT,
- ICELAKESERVER,
- SAPPHIRERAPIDS,
- GRANITERAPIDS,
- // Intel GPUs
- BDW,
- SKL,
- KBL,
- CFL,
- APL,
- BXT,
- GLK,
- WHL,
- AML,
- CML,
- ICLLP,
- ICL,
- EHL,
- JSL,
- TGLLP,
- TGL,
- RKL,
- ADL_S,
- RPL_S,
- ADL_P,
- ADL_N,
- DG1,
- ACM_G10,
- DG2_G10,
- ACM_G11,
- DG2_G11,
- ACM_G12,
- DG2_G12,
- PVC,
- PVC_VG,
- MTL_U,
- MTL_S,
- ARL_U,
- ARL_S,
- MTL_H,
- ARL_H,
- BMG_G21,
- LNL_M,
- LAST,
-
- CudaDefault = OffloadArch::SM_52,
- HIPDefault = OffloadArch::GFX906,
-};
-
enum class CUDAFunctionTarget {
Device,
Global,
@@ -211,30 +64,6 @@ enum class CUDAFunctionTarget {
InvalidTarget
};
-static inline bool IsNVIDIAOffloadArch(OffloadArch A) {
- return A >= OffloadArch::SM_20 && A < OffloadArch::GFX600;
-}
-
-static inline bool IsAMDOffloadArch(OffloadArch A) {
- // Generic processor model is for testing only.
- return A >= OffloadArch::GFX600 && A < OffloadArch::Generic;
-}
-
-static inline bool IsIntelCPUArch(OffloadArch Arch) {
- return Arch >= OffloadArch::SKYLAKEAVX512 &&
- Arch <= OffloadArch::GRANITERAPIDS;
-}
-
-static inline bool IsIntelGPUArch(OffloadArch Arch) {
- return Arch >= OffloadArch::BDW && Arch <= OffloadArch::LNL_M;
-}
-
-const char *OffloadArchToString(OffloadArch A);
-const char *OffloadArchToVirtualArchString(OffloadArch A);
-
-// The input should have the form "sm_20".
-OffloadArch StringToOffloadArch(llvm::StringRef S);
-
/// Get the earliest CudaVersion that supports the given OffloadArch.
CudaVersion MinVersionForOffloadArch(OffloadArch A);
diff --git a/clang/include/clang/Basic/Offloading.h b/clang/include/clang/Basic/Offloading.h
new file mode 100644
index 0000000000000..2b49973775991
--- /dev/null
+++ b/clang/include/clang/Basic/Offloading.h
@@ -0,0 +1,193 @@
+//===--- Offloading.h - Utilities for offloading ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_BASIC_OFFLOADING_H
+#define LLVM_CLANG_BASIC_OFFLOADING_H
+
+namespace llvm {
+class StringRef;
+} // namespace llvm
+
+namespace clang {
+
+enum class OffloadArch {
+ UNUSED,
+ UNKNOWN,
+ // TODO: Deprecate and remove GPU architectures older than sm_52.
+ SM_20,
+ SM_21,
+ SM_30,
+ // This has a name conflict with sys/mac.h on AIX, rename it as a workaround.
+ SM_32_,
+ SM_35,
+ SM_37,
+ SM_50,
+ SM_52,
+ SM_53,
+ SM_60,
+ SM_61,
+ SM_62,
+ SM_70,
+ SM_72,
+ SM_75,
+ SM_80,
+ SM_86,
+ SM_87,
+ SM_89,
+ SM_90,
+ SM_90a,
+ SM_100,
+ SM_100a,
+ SM_101,
+ SM_101a,
+ SM_120,
+ SM_120a,
+ GFX600,
+ GFX601,
+ GFX602,
+ GFX700,
+ GFX701,
+ GFX702,
+ GFX703,
+ GFX704,
+ GFX705,
+ GFX801,
+ GFX802,
+ GFX803,
+ GFX805,
+ GFX810,
+ GFX9_GENERIC,
+ GFX900,
+ GFX902,
+ GFX904,
+ GFX906,
+ GFX908,
+ GFX909,
+ GFX90a,
+ GFX90c,
+ GFX9_4_GENERIC,
+ GFX942,
+ GFX950,
+ GFX10_1_GENERIC,
+ GFX1010,
+ GFX1011,
+ GFX1012,
+ GFX1013,
+ GFX10_3_GENERIC,
+ GFX1030,
+ GFX1031,
+ GFX1032,
+ GFX1033,
+ GFX1034,
+ GFX1035,
+ GFX1036,
+ GFX11_GENERIC,
+ GFX1100,
+ GFX1101,
+ GFX1102,
+ GFX1103,
+ GFX1150,
+ GFX1151,
+ GFX1152,
+ GFX1153,
+ GFX12_GENERIC,
+ GFX1200,
+ GFX1201,
+ AMDGCNSPIRV,
+ Generic, // A processor model named 'generic' if the target backend defines a
+ // public one.
+ // Intel CPUs
+ SKYLAKEAVX512,
+ COREAVX2,
+ COREI7AVX,
+ COREI7,
+ WESTMERE,
+ SANDYBRIDGE,
+ IVYBRIDGE,
+ BROADWELL,
+ COFFEELAKE,
+ ALDERLAKE,
+ SKYLAKE,
+ SKX,
+ CASCADELAKE,
+ ICELAKECLIENT,
+ ICELAKESERVER,
+ SAPPHIRERAPIDS,
+ GRANITERAPIDS,
+ // Intel GPUs
+ BDW,
+ SKL,
+ KBL,
+ CFL,
+ APL,
+ BXT,
+ GLK,
+ WHL,
+ AML,
+ CML,
+ ICLLP,
+ ICL,
+ EHL,
+ JSL,
+ TGLLP,
+ TGL,
+ RKL,
+ ADL_S,
+ RPL_S,
+ ADL_P,
+ ADL_N,
+ DG1,
+ ACM_G10,
+ DG2_G10,
+ ACM_G11,
+ DG2_G11,
+ ACM_G12,
+ DG2_G12,
+ PVC,
+ PVC_VG,
+ MTL_U,
+ MTL_S,
+ ARL_U,
+ ARL_S,
+ MTL_H,
+ ARL_H,
+ BMG_G21,
+ LNL_M,
+ LAST,
+
+ CudaDefault = OffloadArch::SM_52,
+ HIPDefault = OffloadArch::GFX906,
+};
+
+static inline bool IsNVIDIAOffloadArch(OffloadArch A) {
+ return A >= OffloadArch::SM_20 && A < OffloadArch::GFX600;
+}
+
+static inline bool IsAMDOffloadArch(OffloadArch A) {
+ // Generic processor model is for testing only.
+ return A >= OffloadArch::GFX600 && A < OffloadArch::Generic;
+}
+
+static inline bool IsIntelCPUArch(OffloadArch Arch) {
+ return Arch >= OffloadArch::SKYLAKEAVX512 &&
+ Arch <= OffloadArch::GRANITERAPIDS;
+}
+
+static inline bool IsIntelGPUArch(OffloadArch Arch) {
+ return Arch >= OffloadArch::BDW && Arch <= OffloadArch::LNL_M;
+}
+
+const char *OffloadArchToString(OffloadArch A);
+const char *OffloadArchToVirtualArchString(OffloadArch A);
+
+// The input should have the form "sm_20".
+OffloadArch StringToOffloadArch(llvm::StringRef S);
+
+} // namespace clang
+
+#endif // LLVM_CLANG_BASIC_OFFLOADING_H
diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt
index 331dfbb3f4b67..5c91dc43df9b6 100644
--- a/clang/lib/Basic/CMakeLists.txt
+++ b/clang/lib/Basic/CMakeLists.txt
@@ -76,6 +76,7 @@ add_clang_library(clangBasic
MakeSupport.cpp
Module.cpp
ObjCRuntime.cpp
+ Offloading.cpp
OpenCLOptions.cpp
OpenMPKinds.cpp
OperatorPrecedence.cpp
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index f1015c47f314f..8ea242911a2ba 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -73,182 +73,6 @@ CudaVersion ToCudaVersion(llvm::VersionTuple Version) {
return CudaVersion::UNKNOWN;
}
-namespace {
-struct OffloadArchToStringMap {
- OffloadArch arch;
- const char *arch_name;
- const char *virtual_arch_name;
-};
-} // namespace
-
-#define SM2(sm, ca) {OffloadArch::SM_##sm, "sm_" #sm, ca}
-#define SM(sm) SM2(sm, "compute_" #sm)
-#define GFX(gpu) {OffloadArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn"}
-#define INTEL(name, value) {OffloadArch::value, #name, ""}
-static const OffloadArchToStringMap arch_names[] = {
- // clang-format off
- {OffloadArch::UNUSED, "", ""},
- SM2(20, "compute_20"), SM2(21, "compute_20"), // Fermi
- SM(30), {OffloadArch::SM_32_, "sm_32", "compute_32"}, SM(35), SM(37), // Kepler
- SM(50), SM(52), SM(53), // Maxwell
- SM(60), SM(61), SM(62), // Pascal
- SM(70), SM(72), // Volta
- SM(75), // Turing
- SM(80), SM(86), // Ampere
- SM(87), // Jetson/Drive AGX Orin
- SM(89), // Ada Lovelace
- SM(90), // Hopper
- SM(90a), // Hopper
- SM(100), // Blackwell
- SM(100a), // Blackwell
- SM(101), // Blackwell
- SM(101a), // Blackwell
- SM(120), // Blackwell
- SM(120a), // Blackwell
- GFX(600), // gfx600
- GFX(601), // gfx601
- GFX(602), // gfx602
- GFX(700), // gfx700
- GFX(701), // gfx701
- GFX(702), // gfx702
- GFX(703), // gfx703
- GFX(704), // gfx704
- GFX(705), // gfx705
- GFX(801), // gfx801
- GFX(802), // gfx802
- GFX(803), // gfx803
- GFX(805), // gfx805
- GFX(810), // gfx810
- {OffloadArch::GFX9_GENERIC, "gfx9-generic", "compute_amdgcn"},
- GFX(900), // gfx900
- GFX(902), // gfx902
- GFX(904), // gfx903
- GFX(906), // gfx906
- GFX(908), // gfx908
- GFX(909), // gfx909
- GFX(90a), // gfx90a
- GFX(90c), // gfx90c
- {OffloadArch::GFX9_4_GENERIC, "gfx9-4-generic", "compute_amdgcn"},
- GFX(942), // gfx942
- GFX(950), // gfx950
- {OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"},
- GFX(1010), // gfx1010
- GFX(1011), // gfx1011
- GFX(1012), // gfx1012
- GFX(1013), // gfx1013
- {OffloadArch::GFX10_3_GENERIC, "gfx10-3-generic", "compute_amdgcn"},
- GFX(1030), // gfx1030
- GFX(1031), // gfx1031
- GFX(1032), // gfx1032
- GFX(1033), // gfx1033
- GFX(1034), // gfx1034
- GFX(1035), // gfx1035
- GFX(1036), // gfx1036
- {OffloadArch::GFX11_GENERIC, "gfx11-generic", "compute_amdgcn"},
- GFX(1100), // gfx1100
- GFX(1101), // gfx1101
- GFX(1102), // gfx1102
- GFX(1103), // gfx1103
- GFX(1150), // gfx1150
- GFX(1151), // gfx1151
- GFX(1152), // gfx1152
- GFX(1153), // gfx1153
- {OffloadArch::GFX12_GENERIC, "gfx12-generic", "compute_amdgcn"},
- GFX(1200), // gfx1200
- GFX(1201), // gfx1201
- {OffloadArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"},
- // Intel CPUs
- INTEL(skylake-avx512, SKYLAKEAVX512),
- INTEL(core-avx2, COREAVX2),
- INTEL(corei7-avx, COREI7AVX),
- INTEL(corei7, COREI7),
- INTEL(westmere, WESTMERE),
- INTEL(sandybridge, SANDYBRIDGE),
- INTEL(ivybridge, IVYBRIDGE),
- INTEL(broadwell, BROADWELL),
- INTEL(coffeelake, COFFEELAKE),
- INTEL(alderlake, ALDERLAKE),
- INTEL(skylake, SKYLAKE),
- INTEL(skx, SKX),
- INTEL(cascadelake, CASCADELAKE),
- INTEL(icelake-client, ICELAKECLIENT),
- INTEL(icelakeserver, ICELAKESERVER),
- INTEL(sapphirerapids, SAPPHIRERAPIDS),
- INTEL(graniterapids, GRANITERAPIDS),
- // Intel GPUs
- INTEL(bdw, BDW),
- INTEL(skl, SKL),
- INTEL(kbl, KBL),
- INTEL(cfl, CFL),
- INTEL(apl, APL),
- INTEL(bxt, BXT),
- INTEL(glk, GLK),
- INTEL(whl, WHL),
- INTEL(aml, AML),
- INTEL(cml, CML),
- INTEL(icllp, ICLLP),
- INTEL(icl, ICL),
- INTEL(ehl, EHL),
- INTEL(jsl, JSL),
- INTEL(tgllp, TGLLP),
- INTEL(tgl, TGL),
- INTEL(rkl, RKL),
- INTEL(adl_s, ADL_S),
- INTEL(rpl_s, RPL_S),
- INTEL(adl_p, ADL_P),
- INTEL(adl_n, ADL_N),
- INTEL(dg1, DG1),
- INTEL(acm_g10, ACM_G10),
- INTEL(dg2_g10, DG2_G10),
- INTEL(acm_g11, ACM_G11),
- INTEL(dg2_g11, DG2_G11),
- INTEL(acm_g12, ACM_G12),
- INTEL(dg2_g12, DG2_G12),
- INTEL(pvc, PVC),
- INTEL(pvc_vg, PVC_VG),
- INTEL(mtl_u, MTL_U),
- INTEL(mtl_s, MTL_S),
- INTEL(arl_u, ARL_U),
- INTEL(arl_s, ARL_S),
- INTEL(mtl_h, MTL_H),
- INTEL(arl_h, ARL_H),
- INTEL(bmg_g21, BMG_G21),
- INTEL(lnl_m, LNL_M),
- {OffloadArch::Generic, "generic", ""},
- // clang-format on
-};
-#undef SM
-#undef SM2
-#undef GFX
-#undef INTEL
-
-const char *OffloadArchToString(OffloadArch A) {
- auto result = std::find_if(
- std::begin(arch_names), std::end(arch_names),
- [A](const OffloadArchToStringMap &map) { return A == map.arch; });
- if (result == std::end(arch_names))
- return "unknown";
- return result->arch_name;
-}
-
-const char *OffloadArchToVirtualArchString(OffloadArch A) {
- auto result = std::find_if(
- std::begin(arch_names), std::end(arch_names),
- [A](const OffloadArchToStringMap &map) { return A == map.arch; });
- if (result == std::end(arch_names))
- return "unknown";
- return result->virtual_arch_name;
-}
-
-OffloadArch StringToOffloadArch(llvm::StringRef S) {
- auto result = std::find_if(
- std::begin(arch_names), std::end(arch_names),
- [S](const OffloadArchToStringMap &map) { return S == map.arch_name; });
- if (result == std::end(arch_names))
- return OffloadArch::UNKNOWN;
- return result->arch;
-}
-
CudaVersion MinVersionForOffloadArch(OffloadArch A) {
if (A == OffloadArch::UNKNOWN)
return CudaVersion::UNKNOWN;
diff --git a/clang/lib/Basic/Offloading.cpp b/clang/lib/Basic/Offloading.cpp
new file mode 100644
index 0000000000000..63313a6777dd8
--- /dev/null
+++ b/clang/lib/Basic/Offloading.cpp
@@ -0,0 +1,185 @@
+#include "clang/Basic/Offloading.h"
+
+#include "llvm/ADT/StringRef.h"
+
+#include <algorithm>
+
+namespace clang {
+
+namespace {
+struct OffloadArchToStringMap {
+ OffloadArch arch;
+ const char *arch_name;
+ const char *virtual_arch_name;
+};
+} // namespace
+
+#define SM2(sm, ca) {OffloadArch::SM_##sm, "sm_" #sm, ca}
+#define SM(sm) SM2(sm, "compute_" #sm)
+#define GFX(gpu) {OffloadArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn"}
+#define INTEL(name, value) {OffloadArch::value, #name, ""}
+static const OffloadArchToStringMap arch_names[] = {
+ // clang-format off
+ {OffloadArch::UNUSED, "", ""},
+ SM2(20, "compute_20"), SM2(21, "compute_20"), // Fermi
+ SM(30), {OffloadArch::SM_32_, "sm_32", "compute_32"}, SM(35), SM(37), // Kepler
+ SM(50), SM(52), SM(53), // Maxwell
+ SM(60), SM(61), SM(62), // Pascal
+ SM(70), SM(72), // Volta
+ SM(75), // Turing
+ SM(80), SM(86), // Ampere
+ SM(87), // Jetson/Drive AGX Orin
+ SM(89), // Ada Lovelace
+ SM(90), // Hopper
+ SM(90a), // Hopper
+ SM(100), // Blackwell
+ SM(100a), // Blackwell
+ SM(101), // Blackwell
+ SM(101a), // Blackwell
+ SM(120), // Blackwell
+ SM(120a), // Blackwell
+ GFX(600), // gfx600
+ GFX(601), // gfx601
+ GFX(602), // gfx602
+ GFX(700), // gfx700
+ GFX(701), // gfx701
+ GFX(702), // gfx702
+ GFX(703), // gfx703
+ GFX(704), // gfx704
+ GFX(705), // gfx705
+ GFX(801), // gfx801
+ GFX(802), // gfx802
+ GFX(803), // gfx803
+ GFX(805), // gfx805
+ GFX(810), // gfx810
+ {OffloadArch::GFX9_GENERIC, "gfx9-generic", "compute_amdgcn"},
+ GFX(900), // gfx900
+ GFX(902), // gfx902
+ GFX(904), // gfx903
+ GFX(906), // gfx906
+ GFX(908), // gfx908
+ GFX(909), // gfx909
+ GFX(90a), // gfx90a
+ GFX(90c), // gfx90c
+ {OffloadArch::GFX9_4_GENERIC, "gfx9-4-generic", "compute_amdgcn"},
+ GFX(942), // gfx942
+ GFX(950), // gfx950
+ {OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"},
+ GFX(1010), // gfx1010
+ GFX(1011), // gfx1011
+ GFX(1012), // gfx1012
+ GFX(1013), // gfx1013
+ {OffloadArch::GFX10_3_GENERIC, "gfx10-3-generic", "compute_amdgcn"},
+ GFX(1030), // gfx1030
+ GFX(1031), // gfx1031
+ GFX(1032), // gfx1032
+ GFX(1033), // gfx1033
+ GFX(1034), // gfx1034
+ GFX(1035), // gfx1035
+ GFX(1036), // gfx1036
+ {OffloadArch::GFX11_GENERIC, "gfx11-generic", "compute_amdgcn"},
+ GFX(1100), // gfx1100
+ GFX(1101), // gfx1101
+ GFX(1102), // gfx1102
+ GFX(1103), // gfx1103
+ GFX(1150), // gfx1150
+ GFX(1151), // gfx1151
+ GFX(1152), // gfx1152
+ GFX(1153), // gfx1153
+ {OffloadArch::GFX12_GENERIC, "gfx12-generic", "compute_amdgcn"},
+ GFX(1200), // gfx1200
+ GFX(1201), // gfx1201
+ {OffloadArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"},
+ // Intel CPUs
+ INTEL(skylake-avx512, SKYLAKEAVX512),
+ INTEL(core-avx2, COREAVX2),
+ INTEL(corei7-avx, COREI7AVX),
+ INTEL(corei7, COREI7),
+ INTEL(westmere, WESTMERE),
+ INTEL(sandybridge, SANDYBRIDGE),
+ INTEL(ivybridge, IVYBRIDGE),
+ INTEL(broadwell, BROADWELL),
+ INTEL(coffeelake, COFFEELAKE),
+ INTEL(alderlake, ALDERLAKE),
+ INTEL(skylake, SKYLAKE),
+ INTEL(skx, SKX),
+ INTEL(cascadelake, CASCADELAKE),
+ INTEL(icelake-client, ICELAKECLIENT),
+ INTEL(icelakeserver, ICELAKESERVER),
+ INTEL(sapphirerapids, SAPPHIRERAPIDS),
+ INTEL(graniterapids, GRANITERAPIDS),
+ // Intel GPUs
+ INTEL(bdw, BDW),
+ INTEL(skl, SKL),
+ INTEL(kbl, KBL),
+ INTEL(cfl, CFL),
+ INTEL(apl, APL),
+ INTEL(bxt, BXT),
+ INTEL(glk, GLK),
+ INTEL(whl, WHL),
+ INTEL(aml, AML),
+ INTEL(cml, CML),
+ INTEL(icllp, ICLLP),
+ INTEL(icl, ICL),
+ INTEL(ehl, EHL),
+ INTEL(jsl, JSL),
+ INTEL(tgllp, TGLLP),
+ INTEL(tgl, TGL),
+ INTEL(rkl, RKL),
+ INTEL(adl_s, ADL_S),
+ INTEL(rpl_s, RPL_S),
+ INTEL(adl_p, ADL_P),
+ INTEL(adl_n, ADL_N),
+ INTEL(dg1, DG1),
+ INTEL(acm_g10, ACM_G10),
+ INTEL(dg2_g10, DG2_G10),
+ INTEL(acm_g11, ACM_G11),
+ INTEL(dg2_g11, DG2_G11),
+ INTEL(acm_g12, ACM_G12),
+ INTEL(dg2_g12, DG2_G12),
+ INTEL(pvc, PVC),
+ INTEL(pvc_vg, PVC_VG),
+ INTEL(mtl_u, MTL_U),
+ INTEL(mtl_s, MTL_S),
+ INTEL(arl_u, ARL_U),
+ INTEL(arl_s, ARL_S),
+ INTEL(mtl_h, MTL_H),
+ INTEL(arl_h, ARL_H),
+ INTEL(bmg_g21, BMG_G21),
+ INTEL(lnl_m, LNL_M),
+ {OffloadArch::Generic, "generic", ""},
+ // clang-format on
+};
+#undef SM
+#undef SM2
+#undef GFX
+#undef INTEL
+
+const char *OffloadArchToString(OffloadArch A) {
+ auto result = std::find_if(
+ std::begin(arch_names), std::end(arch_names),
+ [A](const OffloadArchToStringMap &map) { return A == map.arch; });
+ if (result == std::end(arch_names))
+ return "unknown";
+ return result->arch_name;
+}
+
+const char *OffloadArchToVirtualArchString(OffloadArch A) {
+ auto result = std::find_if(
+ std::begin(arch_names), std::end(arch_names),
+ [A](const OffloadArchToStringMap &map) { return A == map.arch; });
+ if (result == std::end(arch_names))
+ return "unknown";
+ return result->virtual_arch_name;
+}
+
+OffloadArch StringToOffloadArch(llvm::StringRef S) {
+ auto result = std::find_if(
+ std::begin(arch_names), std::end(arch_names),
+ [S](const OffloadArchToStringMap &map) { return S == map.arch_name; });
+ if (result == std::end(arch_names))
+ return OffloadArch::UNKNOWN;
+ return result->arch;
+}
+
+} // namespace clang
More information about the cfe-commits
mailing list