[clang] [llvm] [SYCL] Add offload wrapping for SYCL kind. (PR #147508)

Yury Plyakhin via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 17 14:01:31 PDT 2025


================
@@ -620,6 +635,422 @@ void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc,
   // Add this function to constructors.
   appendToGlobalCtors(M, CtorFunc, /*Priority=*/101);
 }
+
+/// SYCLWrapper helper class that creates all LLVM IRs wrapping given images.
+struct SYCLWrapper {
+  Module &M;
+  LLVMContext &C;
+  SYCLWrappingOptions Options;
+
+  StructType *EntryTy = nullptr;
+  StructType *SyclDeviceImageTy = nullptr;
+  StructType *SyclBinDescTy = nullptr;
+
+  SYCLWrapper(Module &M, const SYCLWrappingOptions &Options)
+      : M(M), C(M.getContext()), Options(Options) {
+    EntryTy = offloading::getEntryTy(M);
+    SyclDeviceImageTy = getSyclDeviceImageTy();
+    SyclBinDescTy = getSyclBinDescTy();
+  }
+
+  IntegerType *getSizeTTy() {
+    switch (M.getDataLayout().getPointerSize()) {
+    case 4:
+      return Type::getInt32Ty(C);
+    case 8:
+      return Type::getInt64Ty(C);
+    }
+    llvm_unreachable("unsupported pointer type size");
+  }
+
+  SmallVector<Constant *, 2> getSizetConstPair(size_t First, size_t Second) {
+    IntegerType *SizeTTy = getSizeTTy();
+    return SmallVector<Constant *, 2>{ConstantInt::get(SizeTTy, First),
+                                      ConstantInt::get(SizeTTy, Second)};
+  }
+
+  /// Note: Properties aren't supported and the support is going
+  /// to be added later.
+  /// Creates a structure corresponding to:
+  /// SYCL specific image descriptor type.
+  /// \code
+  /// struct __sycl.tgt_device_image {
+  ///   // version of this structure - for backward compatibility;
+  ///   // all modifications which change order/type/offsets of existing fields
+  ///   // should increment the version.
+  ///   uint16_t Version;
+  ///   // the kind of offload model the image employs.
+  ///   uint8_t OffloadKind;
+  ///   // format of the image data - SPIRV, LLVMIR bitcode, etc
+  ///   uint8_t Format;
+  ///   // null-terminated string representation of the device's target
+  ///   // architecture
+  ///   const char *Arch;
+  ///   // a null-terminated string; target- and compiler-specific options
+  ///   // which are suggested to use to "compile" program at runtime
+  ///   const char *CompileOptions;
+  ///   // a null-terminated string; target- and compiler-specific options
+  ///   // which are suggested to use to "link" program at runtime
+  ///   const char *LinkOptions;
+  ///   // Pointer to the device binary image start
+  ///   void *ImageStart;
+  ///   // Pointer to the device binary image end
+  ///   void *ImageEnd;
+  ///   // the entry table
+  ///   __tgt_offload_entry *EntriesBegin;
+  ///   __tgt_offload_entry *EntriesEnd;
+  ///   const char *PropertiesBegin;
+  ///   const char *PropertiesEnd;
+  /// };
+  /// \endcode
+  StructType *getSyclDeviceImageTy() {
+    return StructType::create(
+        {
+            Type::getInt16Ty(C),       // Version
+            Type::getInt8Ty(C),        // OffloadKind
+            Type::getInt8Ty(C),        // Format
+            PointerType::getUnqual(C), // Arch
+            PointerType::getUnqual(C), // CompileOptions
+            PointerType::getUnqual(C), // LinkOptions
+            PointerType::getUnqual(C), // ImageStart
+            PointerType::getUnqual(C), // ImageEnd
+            PointerType::getUnqual(C), // EntriesBegin
+            PointerType::getUnqual(C), // EntriesEnd
+            PointerType::getUnqual(C), // PropertiesBegin
+            PointerType::getUnqual(C)  // PropertiesEnd
+        },
+        "__sycl.tgt_device_image");
+  }
+
+  /// Creates a structure for SYCL specific binary descriptor type. Corresponds
+  /// to:
+  ///
+  /// \code
+  ///  struct __sycl.tgt_bin_desc {
+  ///    // version of this structure - for backward compatibility;
+  ///    // all modifications which change order/type/offsets of existing fields
+  ///    // should increment the version.
+  ///    uint16_t Version;
+  ///    uint16_t NumDeviceImages;
+  ///    __sycl.tgt_device_image *DeviceImages;
+  ///    // the offload entry table
+  ///    __tgt_offload_entry *HostEntriesBegin;
+  ///    __tgt_offload_entry *HostEntriesEnd;
+  ///  };
+  /// \endcode
+  StructType *getSyclBinDescTy() {
+    return StructType::create(
+        {Type::getInt16Ty(C), Type::getInt16Ty(C), PointerType::getUnqual(C),
+         PointerType::getUnqual(C), PointerType::getUnqual(C)},
+        "__sycl.tgt_bin_desc");
+  }
+
+  /// Adds a global readonly variable that is initialized by given
+  /// \p Initializer to the module.
+  GlobalVariable *addGlobalArrayVariable(const Twine &Name,
+                                         ArrayRef<char> Initializer,
+                                         const Twine &Section = "") {
+    auto *Arr = ConstantDataArray::get(M.getContext(), Initializer);
+    auto *Var = new GlobalVariable(M, Arr->getType(), /*isConstant*/ true,
+                                   GlobalVariable::InternalLinkage, Arr, Name);
+    Var->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+
+    SmallVector<char, 32> NameBuf;
+    auto SectionName = Section.toStringRef(NameBuf);
+    if (!SectionName.empty())
+      Var->setSection(SectionName);
+    return Var;
+  }
+
+  /// Adds given \p Buf as a global variable into the module.
+  /// \returns Pair of pointers that point at the beginning and the end of the
+  /// variable.
+  std::pair<Constant *, Constant *>
+  addArrayToModule(ArrayRef<char> Buf, const Twine &Name,
+                   const Twine &Section = "") {
+    auto *Var = addGlobalArrayVariable(Name, Buf, Section);
+    auto *ImageB = ConstantExpr::getGetElementPtr(Var->getValueType(), Var,
+                                                  getSizetConstPair(0, 0));
+    auto *ImageE = ConstantExpr::getGetElementPtr(
+        Var->getValueType(), Var, getSizetConstPair(0, Buf.size()));
+    return std::make_pair(ImageB, ImageE);
+  }
+
+  /// Adds given \p Data as constant byte array in the module.
+  /// \returns Constant pointer to the added data. The pointer type does not
+  /// carry size information.
+  Constant *addRawDataToModule(ArrayRef<char> Data, const Twine &Name) {
+    auto *Var = addGlobalArrayVariable(Name, Data);
+    auto *DataPtr = ConstantExpr::getGetElementPtr(Var->getValueType(), Var,
+                                                   getSizetConstPair(0, 0));
+    return DataPtr;
+  }
+
+  /// Creates a global variable of const char* type and creates an
+  /// initializer that initializes it with \p Str.
+  ///
+  /// \returns Link-time constant pointer (constant expr) to that
+  /// variable.
+  Constant *addStringToModule(StringRef Str, const Twine &Name) {
+    auto *Arr = ConstantDataArray::getString(C, Str);
+    auto *Var = new GlobalVariable(M, Arr->getType(), /*isConstant*/ true,
+                                   GlobalVariable::InternalLinkage, Arr, Name);
+    Var->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+    auto *Zero = ConstantInt::get(getSizeTTy(), 0);
+    Constant *ZeroZero[] = {Zero, Zero};
+    return ConstantExpr::getGetElementPtr(Var->getValueType(), Var, ZeroZero);
+  }
+
+  /// Creates a global variable that is initiazed with the given \p Entries.
+  ///
+  /// \returns Pair of Constants that point at entries content.
+  std::pair<Constant *, Constant *>
+  addOffloadEntriesToModule(StringRef Entries) {
----------------
YuriPlyakhin wrote:

@jhuber6, we replaced duplicated code with usage of `emitOffloadingEntry` and updated name and description of `addOffloadEntriesToModule` (now it is `initOffloadEntriesPerImage`) to avoid confusion with `getOffloadEntryArray`, because initially similar names created perception that the functions are doing the same, but in reality, they are different.

We need to initialize offload entries per image, because, unlike other targets, each SYCL device image contains its own set of symbols, which may contain different symbols than other images. The reason is per-kernel device code split, which solves the following problems:
1. SYCL 2020 spec requirement: “An implementation may not raise a compile time diagnostic or a run time exception merely due to speculative compilation of a kernel for a device when the application does not actually submit the kernel to that device.” To satisfy this requirement, we split kernels into different images based on the features required.
2. Compilation overhead. There are use cases, when module contains 1000s of kernels. Providing only specific symbols in each of the images allows to compile/run 1 specific kernel/TU, so no need to JIT compile all 1000s.

Does that address all your concerns for this PR?

https://github.com/llvm/llvm-project/pull/147508


More information about the llvm-commits mailing list