[clang] [LinkerWrapper] Remove special handling for archives (PR #114843)

Mon Nov 4 10:00:19 PST 2024

llvmbot wrote:



@llvm/pr-subscribers-clang

@llvm/pr-subscribers-clang-driver

Author: Joseph Huber (jhuber6)

<details>
<summary>Changes</summary>

Summary:
Previously we extracted archives and did special symbol resolution on
them, this was mostly done because the nvlink executable couldn't handle
archives natively. Since I have added a wrapper around it that handles
that, we can now remove a lot of this complexity and just create a new
archive and pass it to the device linker.

The one issue here is that for offloading code, we often have references
to kernels that do not come from the device link job, but from the host
that wishes to call them. Under normal circumstances we would ignore
them because they are not referenced, but we need them. For now I am
just passing `--whole-archive`. In a future patch I will likely add some
logic to create a linker script that will tell the linker which symbols
we want to be extracted. That won't work for NVPTX but it's the most
canonical solution.

Also some ugliness in this patch where I need to juggle whether or not
the input is an archive and throw pairs around, I may merge that into
the `OffloadFile` struct in a later patch.


---

Patch is 34.16 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114843.diff


3 Files Affected:

- (removed) clang/test/Driver/linker-wrapper-libs.c (-191) 
- (modified) clang/test/Driver/linker-wrapper.c (+8-8) 
- (modified) clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp (+76-240) 


``````````diff

diff --git a/clang/test/Driver/linker-wrapper-libs.c b/clang/test/Driver/linker-wrapper-libs.c
deleted file mode 100644
index cb5c7c137a0bab..00000000000000
--- a/clang/test/Driver/linker-wrapper-libs.c
+++ /dev/null
@@ -1,191 +0,0 @@
-// REQUIRES: x86-registered-target
-// REQUIRES: nvptx-registered-target
-// REQUIRES: amdgpu-registered-target
-
-// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.elf.o
-
-#if defined(RESOLVES)
-int __attribute__((visibility("hidden"))) sym;
-#elif defined(GLOBAL)
-int __attribute__((visibility("protected"))) global;
-#elif defined(WEAK)
-int __attribute__((visibility("hidden"))) weak;
-#elif defined(HIDDEN)
-int __attribute__((visibility("hidden"))) hidden;
-#elif defined(UNDEFINED)
-extern int sym;
-int baz() { return sym; }
-#else
-extern int sym;
-
-extern int __attribute__((weak)) weak;
-
-int foo() { return sym; }
-int bar() { return weak; }
-#endif
-
-//
-// Check that we extract a static library defining an undefined symbol.
-//
-// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -DRESOLVES -o %t.nvptx.resolves.bc
-// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -DRESOLVES -o %t.amdgpu.resolves.bc
-// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -DUNDEFINED -o %t.nvptx.undefined.bc
-// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -DUNDEFINED -o %t.amdgpu.undefined.bc
-// RUN: clang-offload-packager -o %t-lib.out \
-// RUN:   --image=file=%t.nvptx.undefined.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
-// RUN:   --image=file=%t.amdgpu.undefined.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 \
-// RUN:   --image=file=%t.nvptx.resolves.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
-// RUN:   --image=file=%t.amdgpu.resolves.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030
-// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out
-// RUN: llvm-ar rcs %t.a %t.o
-// RUN: clang-offload-packager -o %t.out \
-// RUN:   --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
-// RUN:   --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030
-// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
-// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
-// RUN:   --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \
-// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
-// RUN:   --linker-path=/usr/bin/ld %t.a %t.o -o a.out 2>&1 \
-// RUN: | FileCheck %s --check-prefix=LIBRARY-RESOLVES
-
-// LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
-// LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
-
-//
-// Check that we extract a static library that defines a global visibile to the
-// host.
-//
-// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -DGLOBAL -o %t.nvptx.global.bc
-// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -DGLOBAL -o %t.amdgpu.global.bc
-// RUN: clang-offload-packager -o %t-lib.out \
-// RUN:   --image=file=%t.nvptx.global.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
-// RUN:   --image=file=%t.amdgpu.global.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030
-// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out
-// RUN: llvm-ar rcs %t.a %t.o
-// RUN: clang-offload-packager -o %t.out \
-// RUN:   --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
-// RUN:   --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030
-// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
-// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
-// RUN:   --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \
-// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
-// RUN:   --linker-path=/usr/bin/ld %t.a %t.o -o a.out 2>&1 \
-// RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL
-
-// LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
-// LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
-
-//
-// Check that we do not extract a global symbol if the source file was not
-// created by an offloading language that expects there to be a host version of
-// the symbol.
-//
-// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -DGLOBAL -o %t.nvptx.global.bc
-// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -DGLOBAL -o %t.amdgpu.global.bc
-// RUN: clang-offload-packager -o %t-lib.out \
-// RUN:   --image=file=%t.nvptx.global.bc,triple=nvptx64-nvidia-cuda,arch=sm_70 \
-// RUN:   --image=file=%t.amdgpu.global.bc,triple=amdgcn-amd-amdhsa,arch=gfx1030
-// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out
-// RUN: llvm-ar rcs %t.a %t.o
-// RUN: clang-offload-packager -o %t.out \
-// RUN:   --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
-// RUN:   --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030
-// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
-// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
-// RUN:   --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \
-// RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL-NONE
-
-// LIBRARY-GLOBAL-NONE-NOT: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
-// LIBRARY-GLOBAL-NONE-NOT: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
-
-//
-// Check that we do not extract an external weak symbol.
-//
-// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -DWEAK -o %t.nvptx.weak.bc
-// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -DWEAK -o %t.amdgpu.weak.bc
-// RUN: clang-offload-packager -o %t-lib.out \
-// RUN:   --image=file=%t.nvptx.weak.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
-// RUN:   --image=file=%t.amdgpu.weak.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030
-// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out
-// RUN: llvm-ar rcs %t.a %t.o
-// RUN: clang-offload-packager -o %t.out \
-// RUN:   --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
-// RUN:   --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030
-// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
-// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
-// RUN:   --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \
-// RUN: | FileCheck %s --check-prefix=LIBRARY-WEAK
-
-// LIBRARY-WEAK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70
-// LIBRARY-WEAK-NOT: {{.*}}.o {{.*}}.o
-// LIBRARY-WEAK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030
-
-//
-// Check that we do not extract an unneeded hidden symbol.
-//
-// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -DHIDDEN -o %t.nvptx.hidden.bc
-// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -DHIDDEN -o %t.amdgpu.hidden.bc
-// RUN: clang-offload-packager -o %t-lib.out \
-// RUN:   --image=file=%t.nvptx.hidden.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
-// RUN:   --image=file=%t.amdgpu.hidden.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030
-// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out
-// RUN: llvm-ar rcs %t.a %t.o
-// RUN: clang-offload-packager -o %t.out \
-// RUN:   --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
-// RUN:   --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030
-// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
-// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
-// RUN:   --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \
-// RUN: | FileCheck %s --check-prefix=LIBRARY-HIDDEN
-
-// LIBRARY-HIDDEN: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70
-// LIBRARY-HIDDEN-NOT: {{.*}}.o {{.*}}.o
-// LIBRARY-HIDDEN: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030
-
-//
-// Check that we do not extract a static library that defines a global visibile
-// to the host that is already defined.
-//
-// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -DGLOBAL -o %t.nvptx.global.bc
-// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -DGLOBAL -o %t.amdgpu.global.bc
-// RUN: clang-offload-packager -o %t-lib.out \
-// RUN:   --image=file=%t.nvptx.global.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
-// RUN:   --image=file=%t.amdgpu.global.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030
-// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out
-// RUN: llvm-ar rcs %t.a %t.o
-// RUN: clang-offload-packager -o %t.out \
-// RUN:   --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
-// RUN:   --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030
-// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
-// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
-// RUN:   --linker-path=/usr/bin/ld %t.o %t.a %t.a -o a.out 2>&1 \
-// RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL-DEFINED
-
-// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
-// LIBRARY-GLOBAL-DEFINED-NOT: {{.*}}gfx1030{{.*}}.o
-// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
-
-//
-// Check that we can use --[no-]whole-archive to control extraction.
-//
-// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -DGLOBAL -o %t.nvptx.global.bc
-// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -DGLOBAL -o %t.amdgpu.global.bc
-// RUN: clang-offload-packager -o %t-lib.out \
-// RUN:   --image=file=%t.nvptx.global.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
-// RUN:   --image=file=%t.nvptx.global.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_52 \
-// RUN:   --image=file=%t.amdgpu.global.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 \
-// RUN:   --image=file=%t.amdgpu.global.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a
-// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out
-// RUN: llvm-ar rcs %t.a %t.o
-// RUN: clang-offload-packager -o %t.out \
-// RUN:   --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
-// RUN:   --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030
-// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
-// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
-// RUN:   --linker-path=/usr/bin/ld %t.o --whole-archive %t.a -o a.out 2>&1 \
-// RUN: | FileCheck %s --check-prefix=LIBRARY-WHOLE-ARCHIVE
-
-// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
-// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
-// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_52 {{.*}}.o
-// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a {{.*}}.o
diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c
index 470af4d5d70cac..fad188be0dd861 100644
--- a/clang/test/Driver/linker-wrapper.c
+++ b/clang/test/Driver/linker-wrapper.c
@@ -30,7 +30,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --device-debug -O0 \
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK-DEBUG
 
-// NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -flto {{.*}}.o {{.*}}.o -g 
+// NVPTX-LINK-DEBUG: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 -O2 -flto -Wl,--whole-archive {{.*}}.o {{.*}}.o -Wl,--no-whole-archive -g 
 
 // RUN: clang-offload-packager -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \
@@ -39,7 +39,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LINK
 
-// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -flto -Wl,--no-undefined -Wl,--whole-archive {{.*}}.o {{.*}}.o
 
 // RUN: clang-offload-packager -o %t.out \
 // RUN:   --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 \
@@ -48,7 +48,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --save-temps -O2 \
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-TEMPS
 
-// AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -flto -Wl,--no-undefined {{.*}}.o -save-temps
+// AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -flto -Wl,--no-undefined -Wl,--whole-archive {{.*}}.o -Wl,--no-whole-archive -save-temps
 
 // RUN: clang-offload-packager -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \
@@ -59,7 +59,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN:   --linker-path=/usr/bin/ld.lld --whole-archive %t.a --no-whole-archive \
 // RUN:   %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CPU-LINK
 
-// CPU-LINK: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu -march=native -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o -Wl,-Bsymbolic -shared -Wl,--whole-archive {{.*}}.a -Wl,--no-whole-archive
+// CPU-LINK: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu -march=native -O2 -flto -Wl,--no-undefined -Wl,--whole-archive {{.*}}.o {{.*}}.o {{.*}}.o {{.*}}.o -Wl,--no-whole-archive -Wl,-Bsymbolic -shared -Wl,--whole-archive {{.*}}.a -Wl,--no-whole-archive
 
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o
 // RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu -mllvm -openmp-opt-disable \
@@ -171,8 +171,8 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
 // RUN:   --linker-path=/usr/bin/ld %t-on.o %t-off.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=AMD-TARGET-ID
 
-// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
-// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -O2 -flto -Wl,--no-undefined -Wl,--whole-archive {{.*}}.o {{.*}}.a
+// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -O2 -flto -Wl,--no-undefined -Wl,--whole-archive {{.*}}.o {{.*}}.a
 
 // RUN: clang-offload-packager -o %t-lib.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=generic
@@ -187,8 +187,8 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
 // RUN:   --linker-path=/usr/bin/ld %t1.o %t2.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=ARCH-ALL
 
-// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
-// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a -O2 -flto -Wl,--no-undefined -Wl,--whole-archive {{.*}}.o {{.*}}.a
+// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -flto -Wl,--no-undefined -Wl,--whole-archive {{.*}}.o {{.*}}.a
 
 // RUN: clang-offload-packager -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index ebafd7eb7774ec..ef3dea10300898 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -505,8 +505,13 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
   if (!Triple.isNVPTX())
     CmdArgs.push_back("-Wl,--no-undefined");
 
+  // FIXME: Any input archives need to be extracted fully, otherwise device
+  // kernels will not always be extracted. This should be handled with a linker
+  // script on targets that support it.
+  CmdArgs.push_back("-Wl,--whole-archive");
   for (StringRef InputFile : InputFiles)
     CmdArgs.push_back(InputFile);
+  CmdArgs.push_back("-Wl,--no-whole-archive");
 
   // If this is CPU offloading we copy the input libraries.
   if (!Triple.isAMDGPU() && !Triple.isNVPTX()) {
@@ -808,7 +813,7 @@ bundleLinkedOutput(ArrayRef<OffloadingImage> Images, const ArgList &Args,
 }
 
 /// Returns a new ArgList containg arguments used for the device linking phase.
-DerivedArgList getLinkerArgs(ArrayRef<OffloadFile> Input,
+DerivedArgList getLinkerArgs(ArrayRef<std::pair<bool, OffloadFile>> Input,
                              const InputArgList &Args) {
   DerivedArgList DAL = DerivedArgList(DerivedArgList(Args));
   for (Arg *A : Args)
@@ -816,15 +821,18 @@ DerivedArgList getLinkerArgs(ArrayRef<OffloadFile> Input,
 
   // Set the subarchitecture and target triple for this compilation.
   const OptTable &Tbl = getOptTable();
-  DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_arch_EQ),
-                   Args.MakeArgString(Input.front().getBinary()->getArch()));
-  DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_triple_EQ),
-                   Args.MakeArgString(Input.front().getBinary()->getTriple()));
+  DAL.AddJoinedArg(
+      nullptr, Tbl.getOption(OPT_arch_EQ),
+      Args.MakeArgString(Input.front().second.getBinary()->getArch()));
+  DAL.AddJoinedArg(
+      nullptr, Tbl.getOption(OPT_triple_EQ),
+      Args.MakeArgString(Input.front().second.getBinary()->getTriple()));
 
   // If every input file is bitcode we have whole program visibility as we
   // do only support static linking with bitcode.
-  auto ContainsBitcode = [](const OffloadFile &F) {
-    return identify_magic(F.getBinary()->getImage()) == file_magic::bitcode;
+  auto ContainsBitcode = [](const std::pair<bool, OffloadFile> &F) {
+    return identify_magic(F.second.getBinary()->getImage()) ==
+           file_magic::bitcode;
   };
   if (llvm::all_of(Input, ContainsBitcode))
     DAL.AddFlagArg(nullptr, Tbl.getOption(OPT_whole_program));
@@ -900,7 +908,8 @@ Error handleOverrideImages(
 /// Transforms all the extracted offloading input files into an image that can
 /// be registered by the runtime.
 Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
-    SmallVectorImpl<SmallVector<OffloadFile>> &LinkerInputFiles,
+    SmallVectorImpl<SmallVector<std::pair<bool, OffloadFile>>>
+        &LinkerInputFiles,
     const InputArgList &Args, char **Argv, int Argc) {
   llvm::TimeTraceScope TimeScope("Handle all device input");
 
@@ -927,17 +936,44 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
     auto LinkerArgs = getLinkerArgs(Input, BaseArgs);
 
     DenseSet<OffloadKind> ActiveOffloadKinds;
-    for (const auto &File : ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/114843