[clang] [Clang][SYCL] Add support AOT compilation support for Intel GPUs in clang-sycl-linker (PR #133194)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Mar 26 19:14:58 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-driver
Author: Justin Cai (jzc)
<details>
<summary>Changes</summary>
This PR adds support for AOT compilation for Intel CPUs and GPUs in clang-sycl-linker. When no `-arch` is passed to `clang-sycl-linker`, the output of the tool will be the resulting linked SPIR-V bytecode. If the `-arch` is passed to `clang-sycl-linker` and the value is a supported Intel CPU or GPU, then SPIR-V bytecode is then further passed to the respective tool (`opencl-aot` or `ocloc`) for AOT compilation.
---
Patch is 24.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/133194.diff
6 Files Affected:
- (added) clang/include/clang/Basic/SYCL.h (+131)
- (modified) clang/lib/Basic/CMakeLists.txt (+1)
- (added) clang/lib/Basic/SYCL.cpp (+226)
- (modified) clang/test/Driver/clang-sycl-linker-test.cpp (+36)
- (modified) clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp (+104-8)
- (modified) clang/tools/clang-sycl-linker/SYCLLinkOpts.td (+8)
``````````diff
diff --git a/clang/include/clang/Basic/SYCL.h b/clang/include/clang/Basic/SYCL.h
new file mode 100644
index 0000000000000..c7cad37639b91
--- /dev/null
+++ b/clang/include/clang/Basic/SYCL.h
@@ -0,0 +1,131 @@
+//===--- SYCL.h -------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_BASIC_SYCL_H
+#define LLVM_CLANG_BASIC_SYCL_H
+
+#include "clang/Basic/Cuda.h"
+
+namespace llvm {
+class StringRef;
+template <unsigned InternalLen> class SmallString;
+} // namespace llvm
+
+namespace clang {
+// List of architectures (Intel CPUs and Intel GPUs)
+// that support SYCL offloading.
+enum class SYCLSupportedIntelArchs {
+ // Intel CPUs
+ UNKNOWN,
+ SKYLAKEAVX512,
+ COREAVX2,
+ COREI7AVX,
+ COREI7,
+ WESTMERE,
+ SANDYBRIDGE,
+ IVYBRIDGE,
+ BROADWELL,
+ COFFEELAKE,
+ ALDERLAKE,
+ SKYLAKE,
+ SKX,
+ CASCADELAKE,
+ ICELAKECLIENT,
+ ICELAKESERVER,
+ SAPPHIRERAPIDS,
+ GRANITERAPIDS,
+ // Intel GPUs
+ BDW,
+ SKL,
+ KBL,
+ CFL,
+ APL,
+ BXT,
+ GLK,
+ WHL,
+ AML,
+ CML,
+ ICLLP,
+ ICL,
+ EHL,
+ JSL,
+ TGLLP,
+ TGL,
+ RKL,
+ ADL_S,
+ RPL_S,
+ ADL_P,
+ ADL_N,
+ DG1,
+ ACM_G10,
+ DG2_G10,
+ ACM_G11,
+ DG2_G11,
+ ACM_G12,
+ DG2_G12,
+ PVC,
+ PVC_VG,
+ MTL_U,
+ MTL_S,
+ ARL_U,
+ ARL_S,
+ MTL_H,
+ ARL_H,
+ BMG_G21,
+ LNL_M,
+};
+
+// Check if the given Arch value is a Generic AMD GPU.
+// Currently GFX*_GENERIC AMD GPUs do not support SYCL offloading.
+// This list is used to filter out GFX*_GENERIC AMD GPUs in
+// `IsSYCLSupportedAMDGPUArch`.
+static inline bool IsAMDGenericGPUArch(OffloadArch Arch) {
+ return Arch == OffloadArch::GFX9_GENERIC ||
+ Arch == OffloadArch::GFX10_1_GENERIC ||
+ Arch == OffloadArch::GFX10_3_GENERIC ||
+ Arch == OffloadArch::GFX11_GENERIC ||
+ Arch == OffloadArch::GFX12_GENERIC;
+}
+
+// Check if the given Arch value is a valid SYCL supported AMD GPU.
+static inline bool IsSYCLSupportedAMDGPUArch(OffloadArch Arch) {
+ return Arch >= OffloadArch::GFX700 && Arch < OffloadArch::AMDGCNSPIRV &&
+ !IsAMDGenericGPUArch(Arch);
+}
+
+// Check if the given Arch value is a valid SYCL supported NVidia GPU.
+static inline bool IsSYCLSupportedNVidiaGPUArch(OffloadArch Arch) {
+ return Arch >= OffloadArch::SM_50 && Arch <= OffloadArch::SM_90a;
+}
+
+// Check if the given Arch value is a valid SYCL supported Intel CPU.
+static inline bool IsSYCLSupportedIntelCPUArch(SYCLSupportedIntelArchs Arch) {
+ return Arch >= SYCLSupportedIntelArchs::SKYLAKEAVX512 &&
+ Arch <= SYCLSupportedIntelArchs::GRANITERAPIDS;
+}
+
+// Check if the given Arch value is a valid SYCL supported Intel GPU.
+static inline bool IsSYCLSupportedIntelGPUArch(SYCLSupportedIntelArchs Arch) {
+ return Arch >= SYCLSupportedIntelArchs::BDW &&
+ Arch <= SYCLSupportedIntelArchs::LNL_M;
+}
+
+// Check if the user provided value for --offload-arch is a valid
+// SYCL supported Intel AOT target.
+SYCLSupportedIntelArchs
+StringToOffloadArchSYCL(llvm::StringRef ArchNameAsString);
+
+// This is a mapping between the user provided --offload-arch value for Intel
+// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU
+// AOT compiler).
+llvm::StringRef mapIntelGPUArchName(llvm::StringRef ArchName);
+llvm::SmallString<64> getGenDeviceMacro(llvm::StringRef DeviceName);
+
+} // namespace clang
+
+#endif // LLVM_CLANG_BASIC_SYCL_H
diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt
index 331dfbb3f4b67..be6d915e01b0a 100644
--- a/clang/lib/Basic/CMakeLists.txt
+++ b/clang/lib/Basic/CMakeLists.txt
@@ -90,6 +90,7 @@ add_clang_library(clangBasic
SourceMgrAdapter.cpp
Stack.cpp
StackExhaustionHandler.cpp
+ SYCL.cpp
TargetID.cpp
TargetInfo.cpp
Targets.cpp
diff --git a/clang/lib/Basic/SYCL.cpp b/clang/lib/Basic/SYCL.cpp
new file mode 100644
index 0000000000000..9ac5470cdbe5a
--- /dev/null
+++ b/clang/lib/Basic/SYCL.cpp
@@ -0,0 +1,226 @@
+#include "clang/Basic/SYCL.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+
+using namespace llvm;
+
+namespace clang {
+
+// Struct that relates an AOT target value with
+// Intel CPUs and Intel GPUs.
+struct StringToOffloadArchSYCLMap {
+ const char *ArchName;
+ SYCLSupportedIntelArchs IntelArch;
+};
+
+// Mapping of supported SYCL offloading architectures.
+static const StringToOffloadArchSYCLMap StringToArchNamesMap[] = {
+ // Intel CPU mapping.
+ {"skylake-avx512", SYCLSupportedIntelArchs::SKYLAKEAVX512},
+ {"core-avx2", SYCLSupportedIntelArchs::COREAVX2},
+ {"corei7-avx", SYCLSupportedIntelArchs::COREI7AVX},
+ {"corei7", SYCLSupportedIntelArchs::COREI7},
+ {"westmere", SYCLSupportedIntelArchs::WESTMERE},
+ {"sandybridge", SYCLSupportedIntelArchs::SANDYBRIDGE},
+ {"ivybridge", SYCLSupportedIntelArchs::IVYBRIDGE},
+ {"broadwell", SYCLSupportedIntelArchs::BROADWELL},
+ {"coffeelake", SYCLSupportedIntelArchs::COFFEELAKE},
+ {"alderlake", SYCLSupportedIntelArchs::ALDERLAKE},
+ {"skylake", SYCLSupportedIntelArchs::SKYLAKE},
+ {"skx", SYCLSupportedIntelArchs::SKX},
+ {"cascadelake", SYCLSupportedIntelArchs::CASCADELAKE},
+ {"icelake-client", SYCLSupportedIntelArchs::ICELAKECLIENT},
+ {"icelake-server", SYCLSupportedIntelArchs::ICELAKESERVER},
+ {"sapphirerapids", SYCLSupportedIntelArchs::SAPPHIRERAPIDS},
+ {"graniterapids", SYCLSupportedIntelArchs::GRANITERAPIDS},
+ // Intel GPU mapping.
+ {"bdw", SYCLSupportedIntelArchs::BDW},
+ {"skl", SYCLSupportedIntelArchs::SKL},
+ {"kbl", SYCLSupportedIntelArchs::KBL},
+ {"cfl", SYCLSupportedIntelArchs::CFL},
+ {"apl", SYCLSupportedIntelArchs::APL},
+ {"bxt", SYCLSupportedIntelArchs::BXT},
+ {"glk", SYCLSupportedIntelArchs::GLK},
+ {"whl", SYCLSupportedIntelArchs::WHL},
+ {"aml", SYCLSupportedIntelArchs::AML},
+ {"cml", SYCLSupportedIntelArchs::CML},
+ {"icllp", SYCLSupportedIntelArchs::ICLLP},
+ {"icl", SYCLSupportedIntelArchs::ICL},
+ {"ehl", SYCLSupportedIntelArchs::EHL},
+ {"jsl", SYCLSupportedIntelArchs::JSL},
+ {"tgllp", SYCLSupportedIntelArchs::TGLLP},
+ {"tgl", SYCLSupportedIntelArchs::TGL},
+ {"rkl", SYCLSupportedIntelArchs::RKL},
+ {"adl_s", SYCLSupportedIntelArchs::ADL_S},
+ {"rpl_s", SYCLSupportedIntelArchs::RPL_S},
+ {"adl_p", SYCLSupportedIntelArchs::ADL_P},
+ {"adl_n", SYCLSupportedIntelArchs::ADL_N},
+ {"dg1", SYCLSupportedIntelArchs::DG1},
+ {"acm_g10", SYCLSupportedIntelArchs::ACM_G10},
+ {"dg2_g10", SYCLSupportedIntelArchs::DG2_G10},
+ {"acm_g11", SYCLSupportedIntelArchs::ACM_G11},
+ {"dg2_g10", SYCLSupportedIntelArchs::DG2_G10},
+ {"dg2_g11", SYCLSupportedIntelArchs::DG2_G11},
+ {"acm_g12", SYCLSupportedIntelArchs::ACM_G12},
+ {"dg2_g12", SYCLSupportedIntelArchs::DG2_G12},
+ {"pvc", SYCLSupportedIntelArchs::PVC},
+ {"pvc_vg", SYCLSupportedIntelArchs::PVC_VG},
+ {"mtl_u", SYCLSupportedIntelArchs::MTL_U},
+ {"mtl_s", SYCLSupportedIntelArchs::MTL_S},
+ {"arl_u", SYCLSupportedIntelArchs::ARL_U},
+ {"arl_s", SYCLSupportedIntelArchs::ARL_S},
+ {"mtl_h", SYCLSupportedIntelArchs::MTL_H},
+ {"arl_h", SYCLSupportedIntelArchs::ARL_H},
+ {"bmg_g21", SYCLSupportedIntelArchs::BMG_G21},
+ {"lnl_m", SYCLSupportedIntelArchs::LNL_M}};
+
+// Check if the user provided value for --offload-arch is a valid
+// SYCL supported Intel AOT target.
+SYCLSupportedIntelArchs StringToOffloadArchSYCL(StringRef ArchNameAsString) {
+ auto result =
+ llvm::find_if(StringToArchNamesMap,
+ [ArchNameAsString](const StringToOffloadArchSYCLMap &map) {
+ return ArchNameAsString == map.ArchName;
+ });
+ if (result == std::end(StringToArchNamesMap))
+ return SYCLSupportedIntelArchs::UNKNOWN;
+ return result->IntelArch;
+}
+
+// This is a mapping between the user provided --offload-arch value for Intel
+// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU
+// AOT compiler).
+StringRef mapIntelGPUArchName(StringRef ArchName) {
+ StringRef Arch;
+ Arch = llvm::StringSwitch<StringRef>(ArchName)
+ .Case("bdw", "bdw")
+ .Case("skl", "skl")
+ .Case("kbl", "kbl")
+ .Case("cfl", "cfl")
+ .Cases("apl", "bxt", "apl")
+ .Case("glk", "glk")
+ .Case("whl", "whl")
+ .Case("aml", "aml")
+ .Case("cml", "cml")
+ .Cases("icllp", "icl", "icllp")
+ .Cases("ehl", "jsl", "ehl")
+ .Cases("tgllp", "tgl", "tgllp")
+ .Case("rkl", "rkl")
+ .Cases("adl_s", "rpl_s", "adl_s")
+ .Case("adl_p", "adl_p")
+ .Case("adl_n", "adl_n")
+ .Case("dg1", "dg1")
+ .Cases("acm_g10", "dg2_g10", "acm_g10")
+ .Cases("acm_g11", "dg2_g11", "acm_g11")
+ .Cases("acm_g12", "dg2_g12", "acm_g12")
+ .Case("pvc", "pvc")
+ .Case("pvc_vg", "pvc_vg")
+ .Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "mtl_u")
+ .Case("mtl_h", "mtl_h")
+ .Case("arl_h", "arl_h")
+ .Case("bmg_g21", "bmg_g21")
+ .Case("lnl_m", "lnl_m")
+ .Default("");
+ return Arch;
+}
+
+SmallString<64> getGenDeviceMacro(StringRef DeviceName) {
+ SmallString<64> Macro;
+ StringRef Ext = llvm::StringSwitch<StringRef>(DeviceName)
+ .Case("bdw", "INTEL_GPU_BDW")
+ .Case("skl", "INTEL_GPU_SKL")
+ .Case("kbl", "INTEL_GPU_KBL")
+ .Case("cfl", "INTEL_GPU_CFL")
+ .Case("apl", "INTEL_GPU_APL")
+ .Case("glk", "INTEL_GPU_GLK")
+ .Case("whl", "INTEL_GPU_WHL")
+ .Case("aml", "INTEL_GPU_AML")
+ .Case("cml", "INTEL_GPU_CML")
+ .Case("icllp", "INTEL_GPU_ICLLP")
+ .Case("ehl", "INTEL_GPU_EHL")
+ .Case("tgllp", "INTEL_GPU_TGLLP")
+ .Case("rkl", "INTEL_GPU_RKL")
+ .Case("adl_s", "INTEL_GPU_ADL_S")
+ .Case("adl_p", "INTEL_GPU_ADL_P")
+ .Case("adl_n", "INTEL_GPU_ADL_N")
+ .Case("dg1", "INTEL_GPU_DG1")
+ .Case("acm_g10", "INTEL_GPU_ACM_G10")
+ .Case("acm_g11", "INTEL_GPU_ACM_G11")
+ .Case("acm_g12", "INTEL_GPU_ACM_G12")
+ .Case("pvc", "INTEL_GPU_PVC")
+ .Case("pvc_vg", "INTEL_GPU_PVC_VG")
+ .Case("mtl_u", "INTEL_GPU_MTL_U")
+ .Case("mtl_h", "INTEL_GPU_MTL_H")
+ .Case("arl_h", "INTEL_GPU_ARL_H")
+ .Case("bmg_g21", "INTEL_GPU_BMG_G21")
+ .Case("lnl_m", "INTEL_GPU_LNL_M")
+ .Case("ptl_h", "INTEL_GPU_PTL_H")
+ .Case("ptl_u", "INTEL_GPU_PTL_U")
+ .Case("sm_50", "NVIDIA_GPU_SM_50")
+ .Case("sm_52", "NVIDIA_GPU_SM_52")
+ .Case("sm_53", "NVIDIA_GPU_SM_53")
+ .Case("sm_60", "NVIDIA_GPU_SM_60")
+ .Case("sm_61", "NVIDIA_GPU_SM_61")
+ .Case("sm_62", "NVIDIA_GPU_SM_62")
+ .Case("sm_70", "NVIDIA_GPU_SM_70")
+ .Case("sm_72", "NVIDIA_GPU_SM_72")
+ .Case("sm_75", "NVIDIA_GPU_SM_75")
+ .Case("sm_80", "NVIDIA_GPU_SM_80")
+ .Case("sm_86", "NVIDIA_GPU_SM_86")
+ .Case("sm_87", "NVIDIA_GPU_SM_87")
+ .Case("sm_89", "NVIDIA_GPU_SM_89")
+ .Case("sm_90", "NVIDIA_GPU_SM_90")
+ .Case("sm_90a", "NVIDIA_GPU_SM_90A")
+ .Case("gfx700", "AMD_GPU_GFX700")
+ .Case("gfx701", "AMD_GPU_GFX701")
+ .Case("gfx702", "AMD_GPU_GFX702")
+ .Case("gfx703", "AMD_GPU_GFX703")
+ .Case("gfx704", "AMD_GPU_GFX704")
+ .Case("gfx705", "AMD_GPU_GFX705")
+ .Case("gfx801", "AMD_GPU_GFX801")
+ .Case("gfx802", "AMD_GPU_GFX802")
+ .Case("gfx803", "AMD_GPU_GFX803")
+ .Case("gfx805", "AMD_GPU_GFX805")
+ .Case("gfx810", "AMD_GPU_GFX810")
+ .Case("gfx900", "AMD_GPU_GFX900")
+ .Case("gfx902", "AMD_GPU_GFX902")
+ .Case("gfx904", "AMD_GPU_GFX904")
+ .Case("gfx906", "AMD_GPU_GFX906")
+ .Case("gfx908", "AMD_GPU_GFX908")
+ .Case("gfx909", "AMD_GPU_GFX909")
+ .Case("gfx90a", "AMD_GPU_GFX90A")
+ .Case("gfx90c", "AMD_GPU_GFX90C")
+ .Case("gfx940", "AMD_GPU_GFX940")
+ .Case("gfx941", "AMD_GPU_GFX941")
+ .Case("gfx942", "AMD_GPU_GFX942")
+ .Case("gfx1010", "AMD_GPU_GFX1010")
+ .Case("gfx1011", "AMD_GPU_GFX1011")
+ .Case("gfx1012", "AMD_GPU_GFX1012")
+ .Case("gfx1013", "AMD_GPU_GFX1013")
+ .Case("gfx1030", "AMD_GPU_GFX1030")
+ .Case("gfx1031", "AMD_GPU_GFX1031")
+ .Case("gfx1032", "AMD_GPU_GFX1032")
+ .Case("gfx1033", "AMD_GPU_GFX1033")
+ .Case("gfx1034", "AMD_GPU_GFX1034")
+ .Case("gfx1035", "AMD_GPU_GFX1035")
+ .Case("gfx1036", "AMD_GPU_GFX1036")
+ .Case("gfx1100", "AMD_GPU_GFX1100")
+ .Case("gfx1101", "AMD_GPU_GFX1101")
+ .Case("gfx1102", "AMD_GPU_GFX1102")
+ .Case("gfx1103", "AMD_GPU_GFX1103")
+ .Case("gfx1150", "AMD_GPU_GFX1150")
+ .Case("gfx1151", "AMD_GPU_GFX1151")
+ .Case("gfx1200", "AMD_GPU_GFX1200")
+ .Case("gfx1201", "AMD_GPU_GFX1201")
+ .Default("");
+ if (!Ext.empty()) {
+ Macro = "__SYCL_TARGET_";
+ Macro += Ext;
+ Macro += "__";
+ }
+ return Macro;
+}
+
+} // namespace clang
diff --git a/clang/test/Driver/clang-sycl-linker-test.cpp b/clang/test/Driver/clang-sycl-linker-test.cpp
index f358900b4fbd8..07850dc41b4fc 100644
--- a/clang/test/Driver/clang-sycl-linker-test.cpp
+++ b/clang/test/Driver/clang-sycl-linker-test.cpp
@@ -46,3 +46,39 @@
// RUN: clang-sycl-linker --dry-run -triple spirv64 %t_1.bc %t_2.bc -o a.spv 2>&1 \
// RUN: | FileCheck %s --check-prefix=LLVMOPTSLIN
// LLVMOPTSLIN: -spirv-debug-info-version=nonsemantic-shader-200 -spirv-allow-unknown-intrinsics=llvm.genx. -spirv-ext=
+//
+// Test AOT compilation for an Intel GPU.
+// RUN: clang-sycl-linker --dry-run -arch pvc %t_1.bc %t_2.bc -o a.out 2>&1 \
+// RUN: | FileCheck %s --check-prefix=AOT-INTEL-GPU
+// AOT-INTEL-GPU: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings
+// AOT-INTEL-GPU-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc
+// AOT-INTEL-GPU-NEXT: "{{.*}}ocloc{{.*}}" {{.*}}-device pvc {{.*}}-output a.out -file [[SPIRVTRANSLATIONOUT]]
+//
+// Test AOT compilation for an Intel GPU with additional options.
+// RUN: clang-sycl-linker --dry-run -arch pvc %t_1.bc %t_2.bc -o a.out 2>&1 \
+// RUN: --ocloc-options="-a -b" \
+// RUN: | FileCheck %s --check-prefix=AOT-INTEL-GPU-2
+// AOT-INTEL-GPU-2: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings
+// AOT-INTEL-GPU-2-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc
+// AOT-INTEL-GPU-2-NEXT: "{{.*}}ocloc{{.*}}" {{.*}}-device pvc -a -b {{.*}}-output a.out -file [[SPIRVTRANSLATIONOUT]]
+//
+// Test AOT compilation for an Intel CPU.
+// RUN: clang-sycl-linker --dry-run -arch corei7 %t_1.bc %t_2.bc -o a.out 2>&1 \
+// RUN: | FileCheck %s --check-prefix=AOT-INTEL-CPU
+// AOT-INTEL-CPU: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings
+// AOT-INTEL-CPU-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc
+// AOT-INTEL-CPU-NEXT: "{{.*}}opencl-aot{{.*}}" {{.*}}--device=cpu {{.*}}-o a.out [[SPIRVTRANSLATIONOUT]]
+//
+// Test AOT compilation for an Intel CPU with additional options.
+// RUN: clang-sycl-linker --dry-run -arch corei7 %t_1.bc %t_2.bc -o a.out 2>&1 \
+// RUN: --opencl-aot-options="-a -b" \
+// RUN: | FileCheck %s --check-prefix=AOT-INTEL-CPU-2
+// AOT-INTEL-CPU-2: "{{.*}}llvm-link{{.*}}" {{.*}}.bc {{.*}}.bc -o [[LLVMLINKOUT:.*]].bc --suppress-warnings
+// AOT-INTEL-CPU-2-NEXT: "{{.*}}llvm-spirv{{.*}}" {{.*}}-o [[SPIRVTRANSLATIONOUT:.*]] [[LLVMLINKOUT]].bc
+// AOT-INTEL-CPU-2-NEXT: "{{.*}}opencl-aot{{.*}}" {{.*}}--device=cpu -a -b {{.*}}-o a.out [[SPIRVTRANSLATIONOUT]]
+//
+// Check that the output file must be specified.
+// RUN: not clang-sycl-linker --dry-run %t_1.bc %t_2.bc 2>& 1 \
+// RUN: | FileCheck %s --check-prefix=NOOUTPUT
+// NOOUTPUT: Output file is not specified
+//
diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
index 2bcb3757d49d0..1798907c1f3e0 100644
--- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
+++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
@@ -14,6 +14,7 @@
// target-specific device code.
//===---------------------------------------------------------------------===//
+#include "clang/Basic/SYCL.h"
#include "clang/Basic/Version.h"
#include "llvm/ADT/StringExtras.h"
@@ -50,6 +51,7 @@
using namespace llvm;
using namespace llvm::opt;
using namespace llvm::object;
+using namespace clang;
/// Save intermediary results.
static bool SaveTemps = false;
@@ -66,6 +68,8 @@ static StringRef OutputFile;
/// Directory to dump SPIR-V IR if requested by user.
static SmallString<128> SPIRVDumpDir;
+static bool IsAOTCompileNeeded = false;
+
static void printVersion(raw_ostream &OS) {
OS << clang::getClangToolFullVersion("clang-sycl-linker") << '\n';
}
@@ -392,7 +396,15 @@ static Expected<StringRef> runLLVMToSPIRVTranslation(StringRef File,
LLVMToSPIRVOptions = A->getValue();
LLVMToSPIRVOptions.split(CmdArgs, " ", /* MaxSplit = */ -1,
/* KeepEmpty = */ false);
- CmdArgs.append({"-o", OutputFile});
+
+ Expected<StringRef> OutFileOrErr =
+ IsAOTCompileNeeded
+ ? createTempFile(Args, sys::path::filename(OutputFile), "spv")
+ : OutputFile;
+ if (!OutFileOrErr)
+ return OutFileOrErr.takeError();
+
+ CmdArgs.append({"-o", *OutFileOrErr});
CmdArgs.push_back(File);
if (Error Err = executeCommands(*LLVMToSPIRVProg, CmdArgs))
return std::move(Err);
@@ -406,7 +418,7 @@ static Expected<StringRef> runLLVMToSPIRVTranslation(StringRef File,
formatv("failed to create dump directory. path: {0}, error_code: {1}",
SPIRVDumpDir, EC.value()));
- StringRef Path = OutputFile;
+ StringRef Path = *OutFileOrErr;
StringRef Filename = llvm::sys::path::filename(Path);
SmallString<128> CopyPath = SPIRVDumpDir;
CopyPath.append(Filename);
@@ -419,7 +431,83 @@ static Expected<StringRef> runLLVMToSPIRVTranslation(StringRef File,
Path, CopyPath, EC.value()));
}
- return OutputFile;
+ return *OutFileOrErr;
+}
+
+/// Run AOT compilation for Intel CPU.
+/// Calls opencl-aot tool to generate device code for Intel CPU backend.
+/// 'InputFile' is the input SPIR-V file.
+/// 'Args' encompasses all arguments required for linking and wrapping device
+/// code and will be parsed to generate options required to be passed into the
+/// SYCL AOT compilation step.
+static Error runAOTCompileIntelCPU(StringRef InputFile, const ArgL...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/133194
More information about the cfe-commits
mailing list