[clang] d60c474 - [Clang] Propagate target-features if compatible when using mlink-builtin-bitcode
Juan Manuel MARTINEZ CAAMAÑO via cfe-commits
cfe-commits at lists.llvm.org
Fri Sep 8 02:20:29 PDT 2023
Author: Juan Manuel MARTINEZ CAAMAÑO
Date: 2023-09-08T11:20:16+02:00
New Revision: d60c47476dde601f1e48d44717ef23b7f1006fe6
URL: https://github.com/llvm/llvm-project/commit/d60c47476dde601f1e48d44717ef23b7f1006fe6
DIFF: https://github.com/llvm/llvm-project/commit/d60c47476dde601f1e48d44717ef23b7f1006fe6.diff
LOG: [Clang] Propagate target-features if compatible when using mlink-builtin-bitcode
Buitlins from AMD's device-libs are compiled without specifying a
target-cpu, which results in builtins without the target-features
attribute set.
Before this patch, when linking this builtins with -mlink-builtin-bitcode
the target-features were not propagated in the incoming builtins.
With this patch, the default target features are propagated
if they are compatible with the target-features in the incoming builtin.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D159206
Added:
Modified:
clang/lib/CodeGen/CGCall.cpp
clang/test/CodeGen/link-builtin-bitcode.c
clang/test/CodeGenCUDA/link-builtin-bitcode-gpu-attrs-preserved.cu
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 177059191bf4948..37de2800fd69c02 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2001,6 +2001,54 @@ static void getTrivialDefaultFunctionAttributes(
}
}
+static void
+overrideFunctionFeaturesWithTargetFeatures(llvm::AttrBuilder &FuncAttr,
+ const llvm::Function &F,
+ const TargetOptions &TargetOpts) {
+ auto FFeatures = F.getFnAttribute("target-features");
+
+ llvm::StringSet<> IncompatibleFeatureNames;
+ SmallVector<StringRef> MergedFeatures;
+ MergedFeatures.reserve(TargetOpts.Features.size());
+
+ if (FFeatures.isValid()) {
+ const auto &TFeatures = TargetOpts.FeatureMap;
+ for (StringRef Feature : llvm::split(FFeatures.getValueAsString(), ',')) {
+ if (Feature.empty())
+ continue;
+
+ bool EnabledForFunc = Feature.starts_with("+");
+ assert(EnabledForFunc || Feature.starts_with("-"));
+
+ StringRef Name = Feature.drop_front(1);
+ auto TEntry = TFeatures.find(Name);
+
+ // Preserves features that are incompatible (either set to something
+ //
diff erent or missing) from the target features
+ bool MissingFromTarget = TEntry == TFeatures.end();
+ bool EnabledForTarget = !MissingFromTarget && TEntry->second;
+ bool Incompatible = EnabledForTarget != EnabledForFunc;
+ if (MissingFromTarget || Incompatible) {
+ MergedFeatures.push_back(Feature);
+ if (Incompatible)
+ IncompatibleFeatureNames.insert(Name);
+ }
+ }
+ }
+
+ for (StringRef Feature : TargetOpts.Features) {
+ if (Feature.empty())
+ continue;
+ StringRef Name = Feature.drop_front(1);
+ if (IncompatibleFeatureNames.contains(Name))
+ continue;
+ MergedFeatures.push_back(Feature);
+ }
+
+ if (!MergedFeatures.empty())
+ FuncAttr.addAttribute("target-features", llvm::join(MergedFeatures, ","));
+}
+
void CodeGen::mergeDefaultFunctionDefinitionAttributes(
llvm::Function &F, const CodeGenOptions &CodeGenOpts,
const LangOptions &LangOpts, const TargetOptions &TargetOpts,
@@ -2058,6 +2106,9 @@ void CodeGen::mergeDefaultFunctionDefinitionAttributes(
F.removeFnAttrs(AttrsToRemove);
addDenormalModeAttrs(Merged, MergedF32, FuncAttrs);
+
+ overrideFunctionFeaturesWithTargetFeatures(FuncAttrs, F, TargetOpts);
+
F.addFnAttrs(FuncAttrs);
}
diff --git a/clang/test/CodeGen/link-builtin-bitcode.c b/clang/test/CodeGen/link-builtin-bitcode.c
index b3b54badf3f827e..fe60a9746f1c85f 100644
--- a/clang/test/CodeGen/link-builtin-bitcode.c
+++ b/clang/test/CodeGen/link-builtin-bitcode.c
@@ -1,42 +1,49 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals --include-generated-funcs --version 2
+// Build two version of the bitcode library, one with a target-cpu set and one without
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx803 -DBITCODE -emit-llvm-bc -o %t-lib.bc %s
+// RUN: %clang_cc1 -triple amdgcn-- -DBITCODE -emit-llvm-bc -o %t-lib.no-cpu.bc %s
+
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm-bc -o %t.bc %s
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm \
// RUN: -mlink-builtin-bitcode %t-lib.bc -o - %t.bc | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm-bc -o %t.bc %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm \
+// RUN: -mlink-builtin-bitcode %t-lib.no-cpu.bc -o - %t.bc | FileCheck %s
+
#ifdef BITCODE
-int foo(void) { return 42; }
+int no_attr(void) { return 42; }
+int __attribute__((target("gfx8-insts"))) attr_in_target(void) { return 42; }
+int __attribute__((target("extended-image-insts"))) attr_not_in_target(void) { return 42; }
+int __attribute__((target("no-gfx9-insts"))) attr_incompatible(void) { return 42; }
int x = 12;
#endif
-extern int foo(void);
+extern int no_attr(void);
+extern int attr_in_target(void);
+extern int attr_not_in_target(void);
+extern int attr_incompatible(void);
extern int x;
-int bar() { return foo() + x; }
-//.
+int bar() { return no_attr() + attr_in_target() + attr_not_in_target() + attr_incompatible() + x; }
+
// CHECK: @x = internal addrspace(1) global i32 12, align 4
-//.
-// CHECK: Function Attrs: noinline nounwind optnone
+
// CHECK-LABEL: define dso_local i32 @bar
-// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[CALL:%.*]] = call i32 @foo()
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @x to ptr), align 4
-// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[TMP0]]
-// CHECK-NEXT: ret i32 [[ADD]]
+// CHECK-SAME: () #[[ATTR_BAR:[0-9]+]] {
//
-//
-// CHECK: Function Attrs: convergent noinline nounwind optnone
-// CHECK-LABEL: define internal i32 @foo
-// CHECK-SAME: () #[[ATTR1:[0-9]+]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: ret i32 42
-//
-//.
-// CHECK: attributes #0 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
-// CHECK: attributes #1 = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
-//.
+// CHECK-LABEL: define internal i32 @no_attr
+// CHECK-SAME: () #[[ATTR_COMPATIBLE:[0-9]+]] {
+
+// CHECK-LABEL: define internal i32 @attr_in_target
+// CHECK-SAME: () #[[ATTR_COMPATIBLE:[0-9]+]] {
+
+// CHECK-LABEL: define internal i32 @attr_not_in_target
+// CHECK-SAME: () #[[ATTR_EXTEND:[0-9]+]] {
+
+// CHECK-LABEL: @attr_incompatible
+// CHECK-SAME: () #[[ATTR_INCOMPATIBLE:[0-9]+]] {
+
+// CHECK: attributes #[[ATTR_BAR]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
+// CHECK: attributes #[[ATTR_COMPATIBLE]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
+// CHECK: attributes #[[ATTR_EXTEND]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+extended-image-insts,+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
+// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="-gfx9-insts,+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
diff --git a/clang/test/CodeGenCUDA/link-builtin-bitcode-gpu-attrs-preserved.cu b/clang/test/CodeGenCUDA/link-builtin-bitcode-gpu-attrs-preserved.cu
index a2c8faec4758648..07e8fc71de31f00 100644
--- a/clang/test/CodeGenCUDA/link-builtin-bitcode-gpu-attrs-preserved.cu
+++ b/clang/test/CodeGenCUDA/link-builtin-bitcode-gpu-attrs-preserved.cu
@@ -31,7 +31,7 @@
// CHECK: define {{.*}} i64 @do_intrin_stuff() #[[ATTR:[0-9]+]]
-// INTERNALIZE: attributes #[[ATTR]] = {{.*}} "target-cpu"="gfx{{.*}}" "target-features"="+gfx11-insts"
+// INTERNALIZE: attributes #[[ATTR]] = {{.*}} "target-cpu"="gfx{{.*}}" "target-features"="{{.*}}+gfx11-insts{{.*}}"
// NOINTERNALIZE: attributes #[[ATTR]] = {{.*}} "target-features"="+gfx11-insts"
#define __device__ __attribute__((device))
More information about the cfe-commits
mailing list