[clang] b8d7413 - Revert "[OffloadBundler] Bound compressed bundles by header size, not magic scan (#205587)" (#206678)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Jun 30 01:22:25 PDT 2026
Author: Nikita Popov
Date: 2026-06-30T10:22:20+02:00
New Revision: b8d7413fccb6574d721bbebcfd3b8d5de8a4570d
URL: https://github.com/llvm/llvm-project/commit/b8d7413fccb6574d721bbebcfd3b8d5de8a4570d
DIFF: https://github.com/llvm/llvm-project/commit/b8d7413fccb6574d721bbebcfd3b8d5de8a4570d.diff
LOG: Revert "[OffloadBundler] Bound compressed bundles by header size, not magic scan (#205587)" (#206678)
Causes test failures on big-endian.
This reverts commit d2d80787af6e1d671563654171f34e8eec35fd1b.
This reverts commit bac7ca5f170f25517870e0876d710cc4226c3602.
Added:
Modified:
clang/lib/Driver/OffloadBundler.cpp
llvm/lib/Object/OffloadBundle.cpp
Removed:
clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.co
clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.py
clang/test/Driver/clang-offload-bundler-magic-collision.c
llvm/test/tools/llvm-objdump/Offloading/fatbin-magic-collision.test
################################################################################
diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp
index 70b5668c35a30..9d6f32c4a4e8f 100644
--- a/clang/lib/Driver/OffloadBundler.cpp
+++ b/clang/lib/Driver/OffloadBundler.cpp
@@ -1343,18 +1343,6 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
llvm::toStringRef(DecompressedData));
}
-// Returns the on-disk size recorded in the compressed offload bundle header at
-// the start of \p Blob, or std::nullopt if the header carries no size field.
-static std::optional<size_t> getCompressedBundleSize(StringRef Blob) {
- Expected<CompressedOffloadBundle::CompressedBundleHeader> HeaderOrErr =
- CompressedOffloadBundle::CompressedBundleHeader::tryParse(Blob);
- if (!HeaderOrErr) {
- consumeError(HeaderOrErr.takeError());
- return std::nullopt;
- }
- return HeaderOrErr->FileSize;
-}
-
// List bundle IDs. Return true if an error was found.
Error OffloadBundler::ListBundleIDsInFile(
StringRef InputFileName, const OffloadBundlerConfig &BundlerConfig) {
@@ -1376,25 +1364,15 @@ Error OffloadBundler::ListBundleIDsInFile(
(**Contents).getBuffer().drop_front(Offset), "",
/*RequiresNullTerminator=*/false);
- size_t CurBundleEnd = StringRef::npos;
if (identify_magic((*Buffer).getBuffer()) ==
file_magic::offload_bundle_compressed) {
- // Locate this bundle's end and the next bundle from the header size.
- if (std::optional<size_t> Size =
- getCompressedBundleSize((*Buffer).getBuffer())) {
- CurBundleEnd = *Size;
- NextBundleStart = (*Buffer).getBuffer().find("CCOB", *Size);
- } else {
- // Legacy bundle without a recorded size: fall back to magic scanning.
- NextBundleStart = (*Buffer).getBuffer().find("CCOB", 4);
- CurBundleEnd = NextBundleStart;
- }
+ NextBundleStart = (*Buffer).getBuffer().find("CCOB", 4);
} else
NextBundleStart = StringRef::npos;
ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
MemoryBuffer::getMemBuffer(
- (*Buffer).getBuffer().take_front(CurBundleEnd),
+ (*Buffer).getBuffer().take_front(NextBundleStart),
InputFileName, // FileName,
false);
if (std::error_code EC = CodeOrErr.getError())
@@ -1635,30 +1613,19 @@ Error OffloadBundler::UnbundleFiles() {
(**CodeOrErr).getBuffer().drop_front(Offset), "",
/*RequiresNullTerminator=*/false);
- size_t CurBundleEnd = StringRef::npos;
if (identify_magic((*Buffer).getBuffer()) ==
file_magic::offload_bundle_compressed) {
- // Locate this bundle's end and the next bundle from the header size.
- if (std::optional<size_t> Size =
- getCompressedBundleSize((*Buffer).getBuffer())) {
- CurBundleEnd = *Size;
- NextBundleStart = (*Buffer).getBuffer().find("CCOB", *Size);
- } else {
- // Legacy bundle without a recorded size: fall back to magic scanning.
- NextBundleStart = (*Buffer).getBuffer().find("CCOB", 4);
- CurBundleEnd = NextBundleStart;
- }
+ NextBundleStart = (*Buffer).getBuffer().find("CCOB", 4);
} else if (identify_magic((*Buffer).getBuffer()) ==
file_magic::offload_bundle) {
NextBundleStart = (*Buffer).getBuffer().find(
OFFLOAD_BUNDLER_MAGIC_STR, sizeof(OFFLOAD_BUNDLER_MAGIC_STR));
- CurBundleEnd = NextBundleStart;
} else
NextBundleStart = StringRef::npos;
ErrorOr<std::unique_ptr<MemoryBuffer>> BlobOrErr =
MemoryBuffer::getMemBuffer(
- (*Buffer).getBuffer().take_front(CurBundleEnd),
+ (*Buffer).getBuffer().take_front(NextBundleStart),
BundlerConfig.InputFileNames.front(),
/*RequiresNullTerminator=*/false);
if (std::error_code EC = BlobOrErr.getError())
diff --git a/clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.co b/clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.co
deleted file mode 100644
index 4a7c7e90ab084..0000000000000
Binary files a/clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.co and /dev/null
diff er
diff --git a/clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.py b/clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.py
deleted file mode 100644
index 309a9af25a33b..0000000000000
--- a/clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.py
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env python3
-"""Regenerate clang-offload-bundler-magic-collision.co.
-
-This input reproduces a magic-byte collision in the compressed offload bundle
-(CCOB) reader. The reader used to find the boundary between concatenated
-bundles by scanning for the literal 4-byte magic "CCOB"; because that magic can
-appear by chance inside a compressed payload, the reader could truncate a valid
-single bundle and then fail to decompress it.
-
-To make the collision deterministic we take a real compressed bundle produced
-by clang-offload-bundler and splice a zstd *skippable frame* whose body is the
-bytes "CCOB" into the compressed region. A zstd skippable frame is ignored by
-the decompressor, so the bundle still decompresses correctly, but a naive
-find("CCOB", 4) scan stops at the planted bytes.
-
-The bundle's header FileSize is updated so it remains authoritative. A reader
-that advances by FileSize handles this file correctly; a reader that scans for
-"CCOB" truncates and fails.
-
-Usage:
- clang-offload-bundler-magic-collision.py <clang-offload-bundler> [output.co]
-"""
-
-import os
-import struct
-import subprocess
-import sys
-import tempfile
-
-# zstd skippable frame magic numbers are 0x184D2A50..0x184D2A5F.
-ZSTD_SKIPPABLE_MAGIC = 0x184D2A50
-PLANTED_MAGIC = b"CCOB"
-
-
-def main():
- if len(sys.argv) < 2:
- sys.exit("usage: %s <clang-offload-bundler> [output.co]" % sys.argv[0])
- bundler = sys.argv[1]
- out = (
- sys.argv[2]
- if len(sys.argv) > 2
- else os.path.join(
- os.path.dirname(os.path.abspath(__file__)),
- "clang-offload-bundler-magic-collision.co",
- )
- )
-
- with tempfile.TemporaryDirectory() as d:
- dev1 = os.path.join(d, "dev1")
- dev2 = os.path.join(d, "dev2")
- with open(dev1, "wb") as f:
- f.write(b"Content of device file 1\n")
- with open(dev2, "wb") as f:
- f.write(b"Content of device file 2\n")
- base = os.path.join(d, "base.co")
- subprocess.run(
- [
- bundler,
- "-compress",
- "-type=bc",
- "-targets=hip-amdgcn-amd-amdhsa--gfx906,"
- "hip-amdgcn-amd-amdhsa--gfx908",
- "-input=" + dev1,
- "-input=" + dev2,
- "-output=" + base,
- ],
- check=True,
- )
- data = bytearray(open(base, "rb").read())
-
- if data[:4] != b"CCOB":
- sys.exit("unexpected magic in compressed bundle")
- version = struct.unpack_from("<H", data, 4)[0]
- if version == 2:
- header_size, fs_off, fs_fmt = 24, 8, "<I"
- elif version == 3:
- header_size, fs_off, fs_fmt = 32, 8, "<Q"
- else:
- sys.exit("unsupported compressed bundle version %d" % version)
-
- file_size = struct.unpack_from(fs_fmt, data, fs_off)[0]
- if file_size != len(data):
- sys.exit("FileSize %d != file length %d" % (file_size, len(data)))
-
- # Splice a zstd skippable frame carrying "CCOB" right after the header, so
- # the planted magic lands inside the compressed region.
- skip = struct.pack("<II", ZSTD_SKIPPABLE_MAGIC, len(PLANTED_MAGIC)) + PLANTED_MAGIC
- data = data[:header_size] + skip + data[header_size:]
- struct.pack_into(fs_fmt, data, fs_off, file_size + len(skip))
-
- planted = bytes(data).find(PLANTED_MAGIC, 4)
- if planted != header_size + 8:
- sys.exit("planted magic at unexpected offset %d" % planted)
-
- with open(out, "wb") as f:
- f.write(data)
- print(
- "wrote %s: version=%d header=%d FileSize=%d planted 'CCOB' at %d"
- % (out, version, header_size, file_size + len(skip), planted)
- )
-
-
-if __name__ == "__main__":
- main()
diff --git a/clang/test/Driver/clang-offload-bundler-magic-collision.c b/clang/test/Driver/clang-offload-bundler-magic-collision.c
deleted file mode 100644
index 97863162647a4..0000000000000
--- a/clang/test/Driver/clang-offload-bundler-magic-collision.c
+++ /dev/null
@@ -1,27 +0,0 @@
-// REQUIRES: zstd
-
-// Regression test for a magic-byte collision in the compressed offload bundle
-// (CCOB) reader. To find the boundary between concatenated bundles the reader
-// used to scan for the literal 4-byte magic "CCOB"; those bytes can appear by
-// chance inside a compressed payload, which truncated a valid single bundle and
-// made decompression fail with "Src size is incorrect".
-//
-// Inputs/clang-offload-bundler-magic-collision.co is a single compressed bundle
-// whose compressed region embeds the bytes "CCOB" inside a zstd skippable frame
-// (regenerate with Inputs/clang-offload-bundler-magic-collision.py). The frame
-// is ignored by the decompressor, so the bundle is valid, but a naive
-// find("CCOB", 4) scan stops at the planted bytes. A reader that advances by
-// the header's FileSize handles this file correctly.
-
-// RUN: clang-offload-bundler -type=bc -list \
-// RUN: -input=%S/Inputs/clang-offload-bundler-magic-collision.co \
-// RUN: | FileCheck %s --check-prefix=LIST
-// LIST-DAG: hip-amdgcn-amd-amdhsa--gfx906
-// LIST-DAG: hip-amdgcn-amd-amdhsa--gfx908
-
-// RUN: clang-offload-bundler -type=bc -unbundle \
-// RUN: -targets=hip-amdgcn-amd-amdhsa--gfx906 \
-// RUN: -input=%S/Inputs/clang-offload-bundler-magic-collision.co \
-// RUN: -output=%t.gfx906
-// RUN: FileCheck %s --check-prefix=EXTRACT --input-file=%t.gfx906
-// EXTRACT: Content of device file 1
diff --git a/llvm/lib/Object/OffloadBundle.cpp b/llvm/lib/Object/OffloadBundle.cpp
index cec97a330927a..93fb2ab1affc7 100644
--- a/llvm/lib/Object/OffloadBundle.cpp
+++ b/llvm/lib/Object/OffloadBundle.cpp
@@ -28,18 +28,6 @@ using namespace llvm::object;
static TimerGroup OffloadBundlerTimerGroup("Offload Bundler Timer Group",
"Timer group for offload bundler");
-// Returns the on-disk size recorded in the compressed offload bundle header at
-// the start of \p Blob, or std::nullopt if the header carries no size field.
-static std::optional<size_t> getCompressedBundleSize(StringRef Blob) {
- Expected<CompressedOffloadBundle::CompressedBundleHeader> HeaderOrErr =
- CompressedOffloadBundle::CompressedBundleHeader::tryParse(Blob);
- if (!HeaderOrErr) {
- consumeError(HeaderOrErr.takeError());
- return std::nullopt;
- }
- return HeaderOrErr->FileSize;
-}
-
// Extract an Offload bundle (usually a Offload Bundle) from a fat_bin
// section.
Error extractOffloadBundle(MemoryBufferRef Contents, uint64_t SectionOffset,
@@ -61,26 +49,15 @@ Error extractOffloadBundle(MemoryBufferRef Contents, uint64_t SectionOffset,
if (identify_magic((*Buffer).getBuffer()) ==
file_magic::offload_bundle_compressed) {
Magic = "CCOB";
- // Locate this bundle's end and the next bundle from the header size.
- size_t CurBundleEnd;
- if (std::optional<size_t> Size =
- getCompressedBundleSize((*Buffer).getBuffer())) {
- CurBundleEnd = *Size;
- NextbundleStart = (*Buffer).getBuffer().find(Magic, *Size);
- } else {
- // Legacy bundle without a recorded size: fall back to magic scanning.
- NextbundleStart = (*Buffer).getBuffer().find(Magic, Magic.size());
- CurBundleEnd = NextbundleStart;
- }
- if (NextbundleStart == StringRef::npos) {
+ // Decompress this bundle first.
+ NextbundleStart = (*Buffer).getBuffer().find(Magic, Magic.size());
+ if (NextbundleStart == StringRef::npos)
NextbundleStart = (*Buffer).getBuffer().size();
- if (CurBundleEnd == StringRef::npos)
- CurBundleEnd = (*Buffer).getBuffer().size();
- }
ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
MemoryBuffer::getMemBuffer(
- (*Buffer).getBuffer().take_front(CurBundleEnd), FileName, false);
+ (*Buffer).getBuffer().take_front(NextbundleStart), FileName,
+ false);
if (std::error_code EC = CodeOrErr.getError())
return createFileError(FileName, EC);
diff --git a/llvm/test/tools/llvm-objdump/Offloading/fatbin-magic-collision.test b/llvm/test/tools/llvm-objdump/Offloading/fatbin-magic-collision.test
deleted file mode 100644
index 155282e8a160e..0000000000000
--- a/llvm/test/tools/llvm-objdump/Offloading/fatbin-magic-collision.test
+++ /dev/null
@@ -1,36 +0,0 @@
-## Regression test for a magic-byte collision in the compressed offload bundle
-## (CCOB) reader used by --offloading (llvm::object::extractOffloadBundle).
-##
-## The reader used to find the boundary between concatenated bundles by scanning
-## for the literal 4-byte magic "CCOB". Those bytes can appear by chance inside a
-## compressed payload, which truncated a valid single bundle so it failed to
-## decompress. The .hip_fatbin below is a single compressed bundle whose
-## compressed region embeds the bytes "CCOB" inside a zstd skippable frame
-## (ignored by the decompressor); a reader that advances by the header's FileSize
-## handles it correctly. Regenerate the bundle with
-## clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.py.
-
-# Compressed payload is little-endian.
-# REQUIRES: zstd, host-byteorder-little-endian
-# RUN: yaml2obj %s -o %t.elf
-# RUN: llvm-objdump --offloading %t.elf | FileCheck %s
-# RUN: FileCheck %s --check-prefix=DEV1 \
-# RUN: --input-file=%t.elf.0.hip-amdgcn-amd-amdhsa--gfx906
-
-# CHECK: Extracting offload bundle: {{.*}}.elf.0.hip-amdgcn-amd-amdhsa--gfx906
-# CHECK: Extracting offload bundle: {{.*}}.elf.0.hip-amdgcn-amd-amdhsa--gfx908
-# DEV1: Content of device file 1
-
---- !ELF
-FileHeader:
- Class: ELFCLASS64
- Data: ELFDATA2LSB
- Type: ET_EXEC
- Machine: EM_X86_64
-Sections:
- - Name: .hip_fatbin
- Type: SHT_PROGBITS
- Flags: [ SHF_ALLOC ]
- AddressAlign: 0x1000
- Content: 43434F42030001009D00000000000000BC00000000000000C4C20F3D5905B03E502A4D180400000043434F4228B52FFD20BC45030024055F5F434C414E475F4F46464C4F41445F42554E444C455F5F02008A0019001D006869702D616D6467636E6873612D2D676678393036A338436F6E74656E74206F66206465766963652066696C6520310A320A0800F2AC07B03ADAED52591D600C0606033B8031
-...
More information about the cfe-commits
mailing list