[clang] [RFC][Clang][AMDGPU] Emit only delta target-features to reduce IR bloat (PR #176533)
Shilei Tian via cfe-commits
cfe-commits at lists.llvm.org
Tue Jan 20 07:02:07 PST 2026
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/176533
>From 173ec06dd54cc03f399f5110e58dccf22291e047 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Fri, 16 Jan 2026 22:39:13 -0500
Subject: [PATCH 1/3] [RFC][Clang][AMDGPU] Emit only delta target-features to
reduce IR bloat
Currently, AMDGPU functions have `target-features` attribute populated with all default features for the target GPU. This is redundant because the backend can derive these defaults from the `target-cpu` attribute via `AMDGPUTargetMachine::getFeatureString()`.
In this PR, for AMDGPU targets only:
- Functions without explicit target attributes no longer emit `target-features`
- Functions with `__attribute__((target(...)))` or `-target-feature` emit only features that differ from the target's defaults (delta)
The backend already handles missing `target-features` correctly by falling back to the TargetMachine's defaults.
A new cc1 flag `-famdgpu-emit-full-target-features` is added to emit full features when needed.
Example:
Before:
```llvm
attributes #0 = { "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,..." }
```
After (default):
```llvm
attributes #0 = { "target-cpu"="gfx90a" }
```
After (with explicit `+wavefrontsize32` override):
```llvm
attributes #0 = { "target-cpu"="gfx90a" "target-features"="+wavefrontsize32" }
```
---
clang/include/clang/Basic/CodeGenOptions.def | 4 +
clang/include/clang/Options/Options.td | 6 +
clang/lib/CodeGen/CodeGenModule.cpp | 48 +++++++-
clang/test/CodeGen/link-builtin-bitcode.c | 2 +-
.../test/CodeGenOpenCL/amdgpu-cluster-dims.cl | 4 +-
.../CodeGenOpenCL/amdgpu-enqueue-kernel.cl | 8 +-
.../amdgpu-features-default-delta.cl | 70 ++++++++++++
clang/test/CodeGenOpenCL/amdgpu-features.cl | 106 +++++++++---------
...eadonly-features-written-with-no-target.cl | 10 +-
clang/test/OpenMP/amdgcn-attributes.cpp | 4 +-
10 files changed, 189 insertions(+), 73 deletions(-)
create mode 100644 clang/test/CodeGenOpenCL/amdgpu-features-default-delta.cl
diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index baf8b093c10e6..ec3cf0b432143 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -470,6 +470,10 @@ CODEGENOPT(EmitIEEENaNCompliantInsts, 1, 1, Benign)
/// Expands s_waitcnt instructions to help PC-sampling profilers identify stalls.
CODEGENOPT(AMDGPUExpandWaitcntProfiling, 1, 0, Benign)
+/// Emit full target-features attribute for AMDGPU (for testing). (AMDGPU Only)
+/// By default, only features that differ from the target CPU's defaults are emitted.
+CODEGENOPT(AMDGPUEmitFullTargetFeatures, 1, 0, Benign)
+
// Whether to emit Swift Async function extended frame information: auto,
// never, always.
ENUM_CODEGENOPT(SwiftAsyncFramePointer, SwiftAsyncFramePointerKind, 2,
diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td
index 188739e72434a..daecec88adcf2 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -5606,6 +5606,12 @@ defm amdgpu_expand_waitcnt_profiling : BoolMOption<"amdgpu-expand-waitcnt-profil
"emits waitcnt(N-1), waitcnt(N-2), ..., waitcnt(target). (AMDGPU only)">,
NegFlag<SetFalse, [], [ClangOption]>>;
+def famdgpu_emit_full_target_features : Flag<["-"], "famdgpu-emit-full-target-features">,
+ Visibility<[CC1Option]>,
+ HelpText<"Emit full target-features attribute for AMDGPU functions instead of "
+ "only the delta from the target CPU's defaults. (AMDGPU only)">,
+ MarshallingInfoFlag<CodeGenOpts<"AMDGPUEmitFullTargetFeatures">>;
+
def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>,
HelpText<"Specify code object ABI version. Defaults to 6. (AMDGPU only)">,
Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>,
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index dc8a31b7f7f0d..3532306554938 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2950,14 +2950,26 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
const auto *SD = FD ? FD->getAttr<CPUSpecificAttr>() : nullptr;
const auto *TC = FD ? FD->getAttr<TargetClonesAttr>() : nullptr;
bool AddedAttr = false;
+ auto HandleAMDGPUFeatureDelta = [&](llvm::StringMap<bool> &FeatureMap) {
+ // Get the default feature map for the (possibly overridden) target CPU.
+ llvm::StringMap<bool> DefaultFeatureMap;
+ getTarget().initFeatureMap(DefaultFeatureMap, getContext().getDiagnostics(),
+ TargetCPU, {});
+
+ // Only emit features that differ from the defaults.
+ for (const auto &Entry : FeatureMap) {
+ auto DefaultIt = DefaultFeatureMap.find(Entry.getKey());
+ // Emit if the feature is not in defaults or has a different value.
+ if (DefaultIt == DefaultFeatureMap.end() ||
+ DefaultIt->getValue() != Entry.getValue())
+ Features.push_back((Entry.getValue() ? "+" : "-") +
+ Entry.getKey().str());
+ }
+ };
if (TD || TV || SD || TC) {
llvm::StringMap<bool> FeatureMap;
getContext().getFunctionFeatureMap(FeatureMap, GD);
- // Produce the canonical string for this set of features.
- for (const llvm::StringMap<bool>::value_type &Entry : FeatureMap)
- Features.push_back((Entry.getValue() ? "+" : "-") + Entry.getKey().str());
-
// Now add the target-cpu and target-features to the function.
// While we populated the feature map above, we still need to
// get and parse the target attribute so we can get the cpu for
@@ -2980,10 +2992,36 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
// favor this processor.
TuneCPU = SD->getCPUName(GD.getMultiVersionIndex())->getName();
}
+
+ // For AMDGPU, by default only emit delta features (features that differ
+ // from the target CPU's defaults). Use -famdgpu-emit-full-target-features
+ // to emit all features.
+ if (getTarget().getTriple().isAMDGPU() &&
+ !CodeGenOpts.AMDGPUEmitFullTargetFeatures) {
+ HandleAMDGPUFeatureDelta(FeatureMap);
+ } else {
+ // Produce the canonical string for this set of features.
+ for (const llvm::StringMap<bool>::value_type &Entry : FeatureMap)
+ Features.push_back((Entry.getValue() ? "+" : "-") +
+ Entry.getKey().str());
+ }
} else {
// Otherwise just add the existing target cpu and target features to the
// function.
- Features = getTarget().getTargetOpts().Features;
+ if (SetTargetFeatures && getTarget().getTriple().isAMDGPU() &&
+ !CodeGenOpts.AMDGPUEmitFullTargetFeatures) {
+ llvm::StringMap<bool> FeatureMap;
+ if (FD) {
+ getContext().getFunctionFeatureMap(FeatureMap, GD);
+ } else {
+ getTarget().initFeatureMap(FeatureMap, getContext().getDiagnostics(),
+ TargetCPU,
+ getTarget().getTargetOpts().Features);
+ }
+ HandleAMDGPUFeatureDelta(FeatureMap);
+ } else {
+ Features = getTarget().getTargetOpts().Features;
+ }
}
if (!TargetCPU.empty()) {
diff --git a/clang/test/CodeGen/link-builtin-bitcode.c b/clang/test/CodeGen/link-builtin-bitcode.c
index f6e45bf573705..d03fd6fc66d03 100644
--- a/clang/test/CodeGen/link-builtin-bitcode.c
+++ b/clang/test/CodeGen/link-builtin-bitcode.c
@@ -43,7 +43,7 @@ int bar() { return no_attr() + attr_in_target() + attr_not_in_target() + attr_in
// CHECK-LABEL: @attr_incompatible
// CHECK-SAME: () #[[ATTR_INCOMPATIBLE:[0-9]+]] {
-// CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" }
+// CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" }
// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64" }
// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64" }
// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64,-gfx9-insts" }
diff --git a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl
index 38b5ed8de34cc..ece84d5b75ca7 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl
@@ -26,8 +26,8 @@ kernel void foo(global int *p) { *p = 1; }
// CHECK-NEXT: ret void
//
//.
-// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+mcast-load-insts,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+s-wakeup-barrier-inst,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" "uniform-work-group-size"="false" }
-// CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+mcast-load-insts,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+s-wakeup-barrier-inst,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" }
+// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "uniform-work-group-size"="false" }
+// CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" }
// CHECK: attributes #[[ATTR2]] = { convergent nounwind }
//.
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index 2cbc9787a04b0..c2d7616a33754 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -816,12 +816,12 @@ kernel void test_target_features_kernel(global int *i) {
// NOCPU: attributes #[[ATTR10]] = { convergent nounwind }
//.
// GFX900: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" }
-// GFX900: attributes #[[ATTR1]] = { convergent norecurse nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,-sram-ecc" }
-// GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,-sram-ecc" "uniform-work-group-size"="false" }
-// GFX900: attributes #[[ATTR3]] = { alwaysinline convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,-sram-ecc" }
+// GFX900: attributes #[[ATTR1]] = { convergent norecurse nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="-sram-ecc" }
+// GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="-sram-ecc" "uniform-work-group-size"="false" }
+// GFX900: attributes #[[ATTR3]] = { alwaysinline convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="-sram-ecc" }
// GFX900: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
// GFX900: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
-// GFX900: attributes #[[ATTR6]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,-sram-ecc" }
+// GFX900: attributes #[[ATTR6]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="-sram-ecc" }
// GFX900: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn }
// GFX900: attributes #[[ATTR8]] = { convergent nounwind }
// GFX900: attributes #[[ATTR9]] = { nounwind }
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features-default-delta.cl b/clang/test/CodeGenOpenCL/amdgpu-features-default-delta.cl
new file mode 100644
index 0000000000000..d41a029c084b1
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/amdgpu-features-default-delta.cl
@@ -0,0 +1,70 @@
+// REQUIRES: amdgpu-registered-target
+
+// Test that by default, AMDGPU functions only emit delta target-features
+// (features that differ from the target CPU's defaults). This reduces IR bloat.
+
+// Default behavior for gfx90a: test_default has no target-features,
+// test_explicit_attr has only the delta (+gfx11-insts).
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90a -emit-llvm -o - %s | FileCheck --check-prefix=GFX90A %s
+
+// With -famdgpu-emit-full-target-features, all features are emitted for both functions.
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90a -famdgpu-emit-full-target-features -emit-llvm -o - %s | FileCheck --check-prefix=FULL %s
+
+// With -target-feature, both functions get the delta feature.
+// gfx1030 defaults to wavefrontsize32, so +wavefrontsize64 is a delta.
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1030 -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck --check-prefix=CMDLINE %s
+
+// GFX90A-LABEL: define {{.*}} @test_default()
+// GFX90A-SAME: #[[ATTR_DEFAULT:[0-9]+]]
+// GFX90A-LABEL: define {{.*}} @test_explicit_attr()
+// GFX90A-SAME: #[[ATTR_EXPLICIT:[0-9]+]]
+//
+// test_default should have target-cpu but NO target-features
+// GFX90A: attributes #[[ATTR_DEFAULT]] = {
+// GFX90A-SAME: "target-cpu"="gfx90a"
+// GFX90A-NOT: "target-features"
+// GFX90A-SAME: }
+//
+// test_explicit_attr should have target-cpu and ONLY the delta target-features
+// GFX90A: attributes #[[ATTR_EXPLICIT]] = {
+// GFX90A-SAME: "target-cpu"="gfx90a"
+// GFX90A-SAME: "target-features"="+gfx11-insts"
+// GFX90A-SAME: }
+
+// With -famdgpu-emit-full-target-features, both functions get full features.
+// FULL-LABEL: define {{.*}} @test_default()
+// FULL-SAME: #[[ATTR_DEFAULT:[0-9]+]]
+// FULL-LABEL: define {{.*}} @test_explicit_attr()
+// FULL-SAME: #[[ATTR_EXPLICIT:[0-9]+]]
+//
+// FULL: attributes #[[ATTR_DEFAULT]] = {
+// FULL-SAME: "target-cpu"="gfx90a"
+// FULL-SAME: "target-features"="{{[^"]+}}"
+// FULL-SAME: }
+//
+// FULL: attributes #[[ATTR_EXPLICIT]] = {
+// FULL-SAME: "target-cpu"="gfx90a"
+// FULL-SAME: "target-features"="{{[^"]+}}"
+// FULL-SAME: }
+
+// With -target-feature +wavefrontsize64, test_default gets just that delta,
+// test_explicit_attr gets both +gfx11-insts and +wavefrontsize64.
+// CMDLINE-LABEL: define {{.*}} @test_default()
+// CMDLINE-SAME: #[[ATTR_DEFAULT:[0-9]+]]
+// CMDLINE-LABEL: define {{.*}} @test_explicit_attr()
+// CMDLINE-SAME: #[[ATTR_EXPLICIT:[0-9]+]]
+//
+// CMDLINE: attributes #[[ATTR_DEFAULT]] = {
+// CMDLINE-SAME: "target-cpu"="gfx1030"
+// CMDLINE-SAME: "target-features"="+wavefrontsize64"
+// CMDLINE-SAME: }
+//
+// CMDLINE: attributes #[[ATTR_EXPLICIT]] = {
+// CMDLINE-SAME: "target-cpu"="gfx1030"
+// CMDLINE-SAME: "target-features"="{{[^"]*}}+gfx11-insts{{[^"]*}}+wavefrontsize64{{[^"]*}}"
+// CMDLINE-SAME: }
+
+kernel void test_default() {}
+
+__attribute__((target("gfx11-insts")))
+void test_explicit_attr() {}
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index df5b56890dd5c..b3f844739e5c5 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -1,63 +1,63 @@
// REQUIRES: amdgpu-registered-target
// Check that appropriate features are defined for every supported AMDGPU
-// "-target" and "-mcpu" options.
+// "-target" and "-mcpu" options when -famdgpu-emit-full-target-features is used.
-// RUN: %clang_cc1 -triple amdgcn -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU %s
-// RUN: %clang_cc1 -triple amdgcn -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU-WAVE32 %s
-// RUN: %clang_cc1 -triple amdgcn -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU-WAVE64 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU-WAVE32 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU-WAVE64 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx600 -emit-llvm -o - %s | FileCheck --check-prefix=GFX600 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx601 -emit-llvm -o - %s | FileCheck --check-prefix=GFX601 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx602 -emit-llvm -o - %s | FileCheck --check-prefix=GFX602 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx700 -emit-llvm -o - %s | FileCheck --check-prefix=GFX700 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx701 -emit-llvm -o - %s | FileCheck --check-prefix=GFX701 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx702 -emit-llvm -o - %s | FileCheck --check-prefix=GFX702 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx703 -emit-llvm -o - %s | FileCheck --check-prefix=GFX703 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx704 -emit-llvm -o - %s | FileCheck --check-prefix=GFX704 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx705 -emit-llvm -o - %s | FileCheck --check-prefix=GFX705 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx801 -emit-llvm -o - %s | FileCheck --check-prefix=GFX801 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx802 -emit-llvm -o - %s | FileCheck --check-prefix=GFX802 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx803 -emit-llvm -o - %s | FileCheck --check-prefix=GFX803 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx805 -emit-llvm -o - %s | FileCheck --check-prefix=GFX805 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx810 -emit-llvm -o - %s | FileCheck --check-prefix=GFX810 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx900 -emit-llvm -o - %s | FileCheck --check-prefix=GFX900 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx902 -emit-llvm -o - %s | FileCheck --check-prefix=GFX902 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx904 -emit-llvm -o - %s | FileCheck --check-prefix=GFX904 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx906 -emit-llvm -o - %s | FileCheck --check-prefix=GFX906 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx908 -emit-llvm -o - %s | FileCheck --check-prefix=GFX908 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx909 -emit-llvm -o - %s | FileCheck --check-prefix=GFX909 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90a -emit-llvm -o - %s | FileCheck --check-prefix=GFX90A %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90c -emit-llvm -o - %s | FileCheck --check-prefix=GFX90C %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx942 -emit-llvm -o - %s | FileCheck --check-prefix=GFX942 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx950 -emit-llvm -o - %s | FileCheck --check-prefix=GFX950 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1011 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1011 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1013 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1013 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1030 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1030 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1031 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1031 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1032 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1032 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1033 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1033 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1034 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1034 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1035 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1035 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1036 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1036 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1100 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1101 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1101 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1102 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1102 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1103 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1150 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1151 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1152 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1153 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1153 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1200 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1201 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1201 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1250 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1251 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1251 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx600 -emit-llvm -o - %s | FileCheck --check-prefix=GFX600 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx601 -emit-llvm -o - %s | FileCheck --check-prefix=GFX601 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx602 -emit-llvm -o - %s | FileCheck --check-prefix=GFX602 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx700 -emit-llvm -o - %s | FileCheck --check-prefix=GFX700 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx701 -emit-llvm -o - %s | FileCheck --check-prefix=GFX701 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx702 -emit-llvm -o - %s | FileCheck --check-prefix=GFX702 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx703 -emit-llvm -o - %s | FileCheck --check-prefix=GFX703 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx704 -emit-llvm -o - %s | FileCheck --check-prefix=GFX704 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx705 -emit-llvm -o - %s | FileCheck --check-prefix=GFX705 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx801 -emit-llvm -o - %s | FileCheck --check-prefix=GFX801 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx802 -emit-llvm -o - %s | FileCheck --check-prefix=GFX802 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx803 -emit-llvm -o - %s | FileCheck --check-prefix=GFX803 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx805 -emit-llvm -o - %s | FileCheck --check-prefix=GFX805 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx810 -emit-llvm -o - %s | FileCheck --check-prefix=GFX810 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx900 -emit-llvm -o - %s | FileCheck --check-prefix=GFX900 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx902 -emit-llvm -o - %s | FileCheck --check-prefix=GFX902 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx904 -emit-llvm -o - %s | FileCheck --check-prefix=GFX904 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx906 -emit-llvm -o - %s | FileCheck --check-prefix=GFX906 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx908 -emit-llvm -o - %s | FileCheck --check-prefix=GFX908 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx909 -emit-llvm -o - %s | FileCheck --check-prefix=GFX909 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx90a -emit-llvm -o - %s | FileCheck --check-prefix=GFX90A %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx90c -emit-llvm -o - %s | FileCheck --check-prefix=GFX90C %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx942 -emit-llvm -o - %s | FileCheck --check-prefix=GFX942 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx950 -emit-llvm -o - %s | FileCheck --check-prefix=GFX950 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1011 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1011 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1013 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1013 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1030 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1030 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1031 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1031 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1032 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1032 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1033 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1033 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1034 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1034 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1035 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1035 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1036 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1036 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1100 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1101 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1101 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1102 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1102 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1103 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1150 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1151 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1152 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1153 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1153 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1200 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1201 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1201 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1250 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1251 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1251 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1103 -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1103-W64 %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1103 -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1103-W64 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx9-4-generic -emit-llvm -o - %s | FileCheck --check-prefix=GFX9_4_Generic %s
+// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx9-4-generic -emit-llvm -o - %s | FileCheck --check-prefix=GFX9_4_Generic %s
// NOCPU-NOT: "target-features"
// NOCPU-WAVE32: "target-features"="+wavefrontsize32"
diff --git a/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl b/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl
index 2d50ce7cab2e0..1239df2a96d2e 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl
@@ -4,13 +4,11 @@
// if there is no target specified.
// RUN: %clang_cc1 -triple amdgcn -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx942 -emit-llvm -o - %s | FileCheck --check-prefix=GFX942 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1100 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1200 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx942 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck %s
__attribute__((target("gws,image-insts,vmem-to-lds-load-insts"))) void test() {}
// NOCPU: "target-features"="+gws,+image-insts,+vmem-to-lds-load-insts"
-// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,+xf32-insts"
-// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
-// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
+// CHECK-NOT: "target-features"={{.*}}
diff --git a/clang/test/OpenMP/amdgcn-attributes.cpp b/clang/test/OpenMP/amdgcn-attributes.cpp
index 03f5c31e3157c..ad7d19cbbd55c 100644
--- a/clang/test/OpenMP/amdgcn-attributes.cpp
+++ b/clang/test/OpenMP/amdgcn-attributes.cpp
@@ -32,9 +32,9 @@ int callable(int x) {
}
// DEFAULT: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
-// CPU: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" "uniform-work-group-size"="true" }
+// CPU: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "uniform-work-group-size"="true" }
// NOIEEE: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "amdgpu-ieee"="false" "kernel" "no-nans-fp-math"="true" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
// DEFAULT: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// CPU: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" }
+// CPU: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" }
// NOIEEE: attributes #2 = { convergent mustprogress noinline nounwind optnone "amdgpu-ieee"="false" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
>From cb56f78fbd29692ca34944e4458e4593c86c6eb4 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Mon, 19 Jan 2026 17:41:19 -0500
Subject: [PATCH 2/3] resolve comments
---
clang/include/clang/Basic/CodeGenOptions.def | 4 -
clang/include/clang/Options/Options.td | 6 -
clang/lib/CodeGen/CodeGenModule.cpp | 30 ++---
.../amdgpu-features-default-delta.cl | 19 ---
clang/test/CodeGenOpenCL/amdgpu-features.cl | 117 ------------------
5 files changed, 12 insertions(+), 164 deletions(-)
delete mode 100644 clang/test/CodeGenOpenCL/amdgpu-features.cl
diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index ec3cf0b432143..baf8b093c10e6 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -470,10 +470,6 @@ CODEGENOPT(EmitIEEENaNCompliantInsts, 1, 1, Benign)
/// Expands s_waitcnt instructions to help PC-sampling profilers identify stalls.
CODEGENOPT(AMDGPUExpandWaitcntProfiling, 1, 0, Benign)
-/// Emit full target-features attribute for AMDGPU (for testing). (AMDGPU Only)
-/// By default, only features that differ from the target CPU's defaults are emitted.
-CODEGENOPT(AMDGPUEmitFullTargetFeatures, 1, 0, Benign)
-
// Whether to emit Swift Async function extended frame information: auto,
// never, always.
ENUM_CODEGENOPT(SwiftAsyncFramePointer, SwiftAsyncFramePointerKind, 2,
diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td
index daecec88adcf2..188739e72434a 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -5606,12 +5606,6 @@ defm amdgpu_expand_waitcnt_profiling : BoolMOption<"amdgpu-expand-waitcnt-profil
"emits waitcnt(N-1), waitcnt(N-2), ..., waitcnt(target). (AMDGPU only)">,
NegFlag<SetFalse, [], [ClangOption]>>;
-def famdgpu_emit_full_target_features : Flag<["-"], "famdgpu-emit-full-target-features">,
- Visibility<[CC1Option]>,
- HelpText<"Emit full target-features attribute for AMDGPU functions instead of "
- "only the delta from the target CPU's defaults. (AMDGPU only)">,
- MarshallingInfoFlag<CodeGenOpts<"AMDGPUEmitFullTargetFeatures">>;
-
def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>,
HelpText<"Specify code object ABI version. Defaults to 6. (AMDGPU only)">,
Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>,
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 3532306554938..4e046d011a825 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2950,20 +2950,16 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
const auto *SD = FD ? FD->getAttr<CPUSpecificAttr>() : nullptr;
const auto *TC = FD ? FD->getAttr<TargetClonesAttr>() : nullptr;
bool AddedAttr = false;
- auto HandleAMDGPUFeatureDelta = [&](llvm::StringMap<bool> &FeatureMap) {
+ auto HandleFeatureDelta = [&](llvm::StringMap<bool> &FeatureMap) {
// Get the default feature map for the (possibly overridden) target CPU.
llvm::StringMap<bool> DefaultFeatureMap;
getTarget().initFeatureMap(DefaultFeatureMap, getContext().getDiagnostics(),
TargetCPU, {});
-
- // Only emit features that differ from the defaults.
- for (const auto &Entry : FeatureMap) {
- auto DefaultIt = DefaultFeatureMap.find(Entry.getKey());
+ for (const auto &[K, V] : FeatureMap) {
+ auto DefaultIt = DefaultFeatureMap.find(K);
// Emit if the feature is not in defaults or has a different value.
- if (DefaultIt == DefaultFeatureMap.end() ||
- DefaultIt->getValue() != Entry.getValue())
- Features.push_back((Entry.getValue() ? "+" : "-") +
- Entry.getKey().str());
+ if (DefaultIt == DefaultFeatureMap.end() || DefaultIt->getValue() != V)
+ Features.push_back((V ? "+" : "-") + K.str());
}
};
if (TD || TV || SD || TC) {
@@ -2993,12 +2989,11 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
TuneCPU = SD->getCPUName(GD.getMultiVersionIndex())->getName();
}
- // For AMDGPU, by default only emit delta features (features that differ
- // from the target CPU's defaults). Use -famdgpu-emit-full-target-features
- // to emit all features.
- if (getTarget().getTriple().isAMDGPU() &&
- !CodeGenOpts.AMDGPUEmitFullTargetFeatures) {
- HandleAMDGPUFeatureDelta(FeatureMap);
+ // For AMDGPU, only emit delta features (features that differ from the
+ // target CPU's defaults). Other targets might want to follow a similar
+ // pattern.
+ if (getTarget().getTriple().isAMDGPU()) {
+ HandleFeatureDelta(FeatureMap);
} else {
// Produce the canonical string for this set of features.
for (const llvm::StringMap<bool>::value_type &Entry : FeatureMap)
@@ -3008,8 +3003,7 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
} else {
// Otherwise just add the existing target cpu and target features to the
// function.
- if (SetTargetFeatures && getTarget().getTriple().isAMDGPU() &&
- !CodeGenOpts.AMDGPUEmitFullTargetFeatures) {
+ if (SetTargetFeatures && getTarget().getTriple().isAMDGPU()) {
llvm::StringMap<bool> FeatureMap;
if (FD) {
getContext().getFunctionFeatureMap(FeatureMap, GD);
@@ -3018,7 +3012,7 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
TargetCPU,
getTarget().getTargetOpts().Features);
}
- HandleAMDGPUFeatureDelta(FeatureMap);
+ HandleFeatureDelta(FeatureMap);
} else {
Features = getTarget().getTargetOpts().Features;
}
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features-default-delta.cl b/clang/test/CodeGenOpenCL/amdgpu-features-default-delta.cl
index d41a029c084b1..56f8ed2c0e979 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features-default-delta.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features-default-delta.cl
@@ -7,9 +7,6 @@
// test_explicit_attr has only the delta (+gfx11-insts).
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90a -emit-llvm -o - %s | FileCheck --check-prefix=GFX90A %s
-// With -famdgpu-emit-full-target-features, all features are emitted for both functions.
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90a -famdgpu-emit-full-target-features -emit-llvm -o - %s | FileCheck --check-prefix=FULL %s
-
// With -target-feature, both functions get the delta feature.
// gfx1030 defaults to wavefrontsize32, so +wavefrontsize64 is a delta.
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1030 -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck --check-prefix=CMDLINE %s
@@ -31,22 +28,6 @@
// GFX90A-SAME: "target-features"="+gfx11-insts"
// GFX90A-SAME: }
-// With -famdgpu-emit-full-target-features, both functions get full features.
-// FULL-LABEL: define {{.*}} @test_default()
-// FULL-SAME: #[[ATTR_DEFAULT:[0-9]+]]
-// FULL-LABEL: define {{.*}} @test_explicit_attr()
-// FULL-SAME: #[[ATTR_EXPLICIT:[0-9]+]]
-//
-// FULL: attributes #[[ATTR_DEFAULT]] = {
-// FULL-SAME: "target-cpu"="gfx90a"
-// FULL-SAME: "target-features"="{{[^"]+}}"
-// FULL-SAME: }
-//
-// FULL: attributes #[[ATTR_EXPLICIT]] = {
-// FULL-SAME: "target-cpu"="gfx90a"
-// FULL-SAME: "target-features"="{{[^"]+}}"
-// FULL-SAME: }
-
// With -target-feature +wavefrontsize64, test_default gets just that delta,
// test_explicit_attr gets both +gfx11-insts and +wavefrontsize64.
// CMDLINE-LABEL: define {{.*}} @test_default()
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
deleted file mode 100644
index b3f844739e5c5..0000000000000
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ /dev/null
@@ -1,117 +0,0 @@
-// REQUIRES: amdgpu-registered-target
-
-// Check that appropriate features are defined for every supported AMDGPU
-// "-target" and "-mcpu" options when -famdgpu-emit-full-target-features is used.
-
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU-WAVE32 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU-WAVE64 %s
-
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx600 -emit-llvm -o - %s | FileCheck --check-prefix=GFX600 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx601 -emit-llvm -o - %s | FileCheck --check-prefix=GFX601 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx602 -emit-llvm -o - %s | FileCheck --check-prefix=GFX602 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx700 -emit-llvm -o - %s | FileCheck --check-prefix=GFX700 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx701 -emit-llvm -o - %s | FileCheck --check-prefix=GFX701 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx702 -emit-llvm -o - %s | FileCheck --check-prefix=GFX702 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx703 -emit-llvm -o - %s | FileCheck --check-prefix=GFX703 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx704 -emit-llvm -o - %s | FileCheck --check-prefix=GFX704 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx705 -emit-llvm -o - %s | FileCheck --check-prefix=GFX705 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx801 -emit-llvm -o - %s | FileCheck --check-prefix=GFX801 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx802 -emit-llvm -o - %s | FileCheck --check-prefix=GFX802 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx803 -emit-llvm -o - %s | FileCheck --check-prefix=GFX803 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx805 -emit-llvm -o - %s | FileCheck --check-prefix=GFX805 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx810 -emit-llvm -o - %s | FileCheck --check-prefix=GFX810 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx900 -emit-llvm -o - %s | FileCheck --check-prefix=GFX900 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx902 -emit-llvm -o - %s | FileCheck --check-prefix=GFX902 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx904 -emit-llvm -o - %s | FileCheck --check-prefix=GFX904 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx906 -emit-llvm -o - %s | FileCheck --check-prefix=GFX906 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx908 -emit-llvm -o - %s | FileCheck --check-prefix=GFX908 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx909 -emit-llvm -o - %s | FileCheck --check-prefix=GFX909 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx90a -emit-llvm -o - %s | FileCheck --check-prefix=GFX90A %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx90c -emit-llvm -o - %s | FileCheck --check-prefix=GFX90C %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx942 -emit-llvm -o - %s | FileCheck --check-prefix=GFX942 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx950 -emit-llvm -o - %s | FileCheck --check-prefix=GFX950 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1011 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1011 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1013 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1013 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1030 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1030 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1031 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1031 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1032 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1032 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1033 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1033 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1034 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1034 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1035 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1035 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1036 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1036 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1100 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1101 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1101 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1102 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1102 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1103 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1150 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1151 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1152 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1153 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1153 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1200 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1201 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1201 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1250 %s
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1251 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1251 %s
-
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx1103 -target-feature +wavefrontsize64 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1103-W64 %s
-
-// RUN: %clang_cc1 -triple amdgcn -famdgpu-emit-full-target-features -target-cpu gfx9-4-generic -emit-llvm -o - %s | FileCheck --check-prefix=GFX9_4_Generic %s
-
-// NOCPU-NOT: "target-features"
-// NOCPU-WAVE32: "target-features"="+wavefrontsize32"
-// NOCPU-WAVE64: "target-features"="+wavefrontsize64"
-
-// GFX600: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX601: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX602: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX700: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX701: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX702: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX703: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX704: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX705: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX801: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX802: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX803: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX805: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX810: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX900: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX902: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX904: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX906: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX908: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX909: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,+xf32-insts"
-// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+lerp-inst,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
-// GFX1010: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
-// GFX1011: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
-// GFX1012: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
-// GFX1013: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
-// GFX1030: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
-// GFX1031: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
-// GFX1032: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
-// GFX1033: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
-// GFX1034: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
-// GFX1035: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
-// GFX1036: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
-// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
-// GFX1101: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
-// GFX1102: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
-// GFX1103: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
-// GFX1150: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
-// GFX1151: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
-// GFX1152: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
-// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
-// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
-// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
-// GFX1250: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+mcast-load-insts,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+s-wakeup-barrier-inst,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32"
-// GFX1251: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+mcast-load-insts,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+s-wakeup-barrier-inst,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32"
-
-// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize64"
-
-kernel void test() {}
>From 6bce53eef1c15d7c54d53abb19e360187ffff53d Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Tue, 20 Jan 2026 10:01:39 -0500
Subject: [PATCH 3/3] resolve review comments
---
clang/lib/CodeGen/CodeGenModule.cpp | 34 +++++++++++++++++------------
1 file changed, 20 insertions(+), 14 deletions(-)
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 4e046d011a825..c7f0997f6e972 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2933,6 +2933,24 @@ void CodeGenModule::SetCommonAttributes(GlobalDecl GD, llvm::GlobalValue *GV) {
addUsedOrCompilerUsedGlobal(GV);
}
+/// Get the feature delta from the default feature map for the given target CPU.
+static std::vector<std::string>
+getFeatureDeltaFromDefault(const CodeGenModule &CGM, StringRef TargetCPU,
+ llvm::StringMap<bool> &FeatureMap) {
+ llvm::StringMap<bool> DefaultFeatureMap;
+ CGM.getTarget().initFeatureMap(
+ DefaultFeatureMap, CGM.getContext().getDiagnostics(), TargetCPU, {});
+
+ std::vector<std::string> Delta;
+ for (const auto &[K, V] : FeatureMap) {
+ auto DefaultIt = DefaultFeatureMap.find(K);
+ if (DefaultIt == DefaultFeatureMap.end() || DefaultIt->getValue() != V)
+ Delta.push_back((V ? "+" : "-") + K.str());
+ }
+
+ return Delta;
+}
+
bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
llvm::AttrBuilder &Attrs,
bool SetTargetFeatures) {
@@ -2950,18 +2968,6 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
const auto *SD = FD ? FD->getAttr<CPUSpecificAttr>() : nullptr;
const auto *TC = FD ? FD->getAttr<TargetClonesAttr>() : nullptr;
bool AddedAttr = false;
- auto HandleFeatureDelta = [&](llvm::StringMap<bool> &FeatureMap) {
- // Get the default feature map for the (possibly overridden) target CPU.
- llvm::StringMap<bool> DefaultFeatureMap;
- getTarget().initFeatureMap(DefaultFeatureMap, getContext().getDiagnostics(),
- TargetCPU, {});
- for (const auto &[K, V] : FeatureMap) {
- auto DefaultIt = DefaultFeatureMap.find(K);
- // Emit if the feature is not in defaults or has a different value.
- if (DefaultIt == DefaultFeatureMap.end() || DefaultIt->getValue() != V)
- Features.push_back((V ? "+" : "-") + K.str());
- }
- };
if (TD || TV || SD || TC) {
llvm::StringMap<bool> FeatureMap;
getContext().getFunctionFeatureMap(FeatureMap, GD);
@@ -2993,7 +2999,7 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
// target CPU's defaults). Other targets might want to follow a similar
// pattern.
if (getTarget().getTriple().isAMDGPU()) {
- HandleFeatureDelta(FeatureMap);
+ Features = getFeatureDeltaFromDefault(*this, TargetCPU, FeatureMap);
} else {
// Produce the canonical string for this set of features.
for (const llvm::StringMap<bool>::value_type &Entry : FeatureMap)
@@ -3012,7 +3018,7 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
TargetCPU,
getTarget().getTargetOpts().Features);
}
- HandleFeatureDelta(FeatureMap);
+ Features = getFeatureDeltaFromDefault(*this, TargetCPU, FeatureMap);
} else {
Features = getTarget().getTargetOpts().Features;
}
More information about the cfe-commits
mailing list