[llvm-branch-commits] [clang] [flang] [llvm] [AMDGPU] More radical feature initialization refactoring (PR #155222)
Yaxun Liu via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Aug 26 07:37:35 PDT 2025
================
@@ -364,8 +364,320 @@ StringRef AMDGPU::getCanonicalArchName(const Triple &T, StringRef Arch) {
return T.isAMDGCN() ? getArchNameAMDGCN(ProcKind) : getArchNameR600(ProcKind);
}
-void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
- StringMap<bool> &Features) {
+static std::pair<FeatureError, StringRef>
+insertWaveSizeFeature(StringRef GPU, const Triple &T,
+ const StringMap<bool> &DefaultFeatures,
+ StringMap<bool> &Features) {
+ const bool IsNullGPU = GPU.empty();
+ const bool TargetHasWave32 = DefaultFeatures.count("wavefrontsize32");
+ const bool TargetHasWave64 = DefaultFeatures.count("wavefrontsize64");
+ const bool HaveWave32 = Features.count("wavefrontsize32");
+ const bool HaveWave64 = Features.count("wavefrontsize64");
+ if (HaveWave32 && HaveWave64)
+ return {AMDGPU::INVALID_FEATURE_COMBINATION,
+ "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive"};
+
+ if (HaveWave32 && !IsNullGPU && TargetHasWave64)
+ return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize32"};
+
+ if (HaveWave64 && !IsNullGPU && TargetHasWave32)
+ return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize64"};
+
+ // Don't assume any wavesize with an unknown subtarget.
+ // Default to wave32 if target supports both.
+ if (!IsNullGPU && !HaveWave32 && !HaveWave64 && !TargetHasWave32 &&
+ !TargetHasWave64)
+ Features.insert(std::make_pair("wavefrontsize32", true));
+
+ for (const auto &Entry : DefaultFeatures) {
+ if (!Features.count(Entry.getKey()))
+ Features[Entry.getKey()] = Entry.getValue();
+ }
+
+ return {NO_ERROR, StringRef()};
+}
+
+static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
+ StringMap<bool> &Features) {
+ AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU);
+ switch (Kind) {
+ case GK_GFX1250:
+ Features["ci-insts"] = true;
+ Features["dot7-insts"] = true;
+ Features["dot8-insts"] = true;
+ Features["dl-insts"] = true;
+ Features["16-bit-insts"] = true;
+ Features["dpp"] = true;
+ Features["gfx8-insts"] = true;
+ Features["gfx9-insts"] = true;
+ Features["gfx10-insts"] = true;
+ Features["gfx10-3-insts"] = true;
+ Features["gfx11-insts"] = true;
+ Features["gfx12-insts"] = true;
+ Features["gfx1250-insts"] = true;
+ Features["bitop3-insts"] = true;
+ Features["prng-inst"] = true;
+ Features["tanh-insts"] = true;
+ Features["tensor-cvt-lut-insts"] = true;
+ Features["transpose-load-f4f6-insts"] = true;
+ Features["bf16-trans-insts"] = true;
+ Features["bf16-cvt-insts"] = true;
+ Features["fp8-conversion-insts"] = true;
+ Features["fp8e5m3-insts"] = true;
+ Features["permlane16-swap"] = true;
+ Features["ashr-pk-insts"] = true;
+ Features["atomic-buffer-pk-add-bf16-inst"] = true;
+ Features["vmem-pref-insts"] = true;
+ Features["atomic-fadd-rtn-insts"] = true;
+ Features["atomic-buffer-global-pk-add-f16-insts"] = true;
+ Features["atomic-flat-pk-add-16-insts"] = true;
+ Features["atomic-global-pk-add-bf16-inst"] = true;
+ Features["atomic-ds-pk-add-16-insts"] = true;
+ Features["setprio-inc-wg-inst"] = true;
+ Features["atomic-fmin-fmax-global-f32"] = true;
+ Features["atomic-fmin-fmax-global-f64"] = true;
+ Features["wavefrontsize32"] = true;
+ break;
+ case GK_GFX1201:
+ case GK_GFX1200:
+ case GK_GFX12_GENERIC:
+ Features["ci-insts"] = true;
+ Features["dot7-insts"] = true;
+ Features["dot8-insts"] = true;
+ Features["dot9-insts"] = true;
+ Features["dot10-insts"] = true;
+ Features["dot11-insts"] = true;
+ Features["dot12-insts"] = true;
+ Features["dl-insts"] = true;
+ Features["atomic-ds-pk-add-16-insts"] = true;
+ Features["atomic-flat-pk-add-16-insts"] = true;
+ Features["atomic-buffer-global-pk-add-f16-insts"] = true;
+ Features["atomic-buffer-pk-add-bf16-inst"] = true;
+ Features["atomic-global-pk-add-bf16-inst"] = true;
+ Features["16-bit-insts"] = true;
+ Features["dpp"] = true;
+ Features["gfx8-insts"] = true;
+ Features["gfx9-insts"] = true;
+ Features["gfx10-insts"] = true;
+ Features["gfx10-3-insts"] = true;
+ Features["gfx11-insts"] = true;
+ Features["gfx12-insts"] = true;
+ Features["atomic-fadd-rtn-insts"] = true;
+ Features["image-insts"] = true;
+ Features["fp8-conversion-insts"] = true;
+ Features["atomic-fmin-fmax-global-f32"] = true;
+ break;
+ case GK_GFX1153:
+ case GK_GFX1152:
+ case GK_GFX1151:
+ case GK_GFX1150:
+ case GK_GFX1103:
+ case GK_GFX1102:
+ case GK_GFX1101:
+ case GK_GFX1100:
+ case GK_GFX11_GENERIC:
+ Features["ci-insts"] = true;
+ Features["dot5-insts"] = true;
+ Features["dot7-insts"] = true;
+ Features["dot8-insts"] = true;
+ Features["dot9-insts"] = true;
+ Features["dot10-insts"] = true;
+ Features["dot12-insts"] = true;
+ Features["dl-insts"] = true;
+ Features["16-bit-insts"] = true;
+ Features["dpp"] = true;
+ Features["gfx8-insts"] = true;
+ Features["gfx9-insts"] = true;
+ Features["gfx10-insts"] = true;
+ Features["gfx10-3-insts"] = true;
+ Features["gfx11-insts"] = true;
+ Features["atomic-fadd-rtn-insts"] = true;
+ Features["image-insts"] = true;
+ Features["gws"] = true;
+ Features["atomic-fmin-fmax-global-f32"] = true;
+ break;
+ case GK_GFX1036:
+ case GK_GFX1035:
+ case GK_GFX1034:
+ case GK_GFX1033:
+ case GK_GFX1032:
+ case GK_GFX1031:
+ case GK_GFX1030:
+ case GK_GFX10_3_GENERIC:
+ Features["ci-insts"] = true;
+ Features["dot1-insts"] = true;
+ Features["dot2-insts"] = true;
+ Features["dot5-insts"] = true;
+ Features["dot6-insts"] = true;
+ Features["dot7-insts"] = true;
+ Features["dot10-insts"] = true;
+ Features["dl-insts"] = true;
+ Features["16-bit-insts"] = true;
+ Features["dpp"] = true;
+ Features["gfx8-insts"] = true;
+ Features["gfx9-insts"] = true;
+ Features["gfx10-insts"] = true;
+ Features["gfx10-3-insts"] = true;
+ Features["image-insts"] = true;
+ Features["s-memrealtime"] = true;
+ Features["s-memtime-inst"] = true;
+ Features["gws"] = true;
+ Features["vmem-to-lds-load-insts"] = true;
+ Features["atomic-fmin-fmax-global-f32"] = true;
+ Features["atomic-fmin-fmax-global-f64"] = true;
+ break;
+ case GK_GFX1012:
+ case GK_GFX1011:
+ Features["dot1-insts"] = true;
+ Features["dot2-insts"] = true;
+ Features["dot5-insts"] = true;
+ Features["dot6-insts"] = true;
+ Features["dot7-insts"] = true;
+ Features["dot10-insts"] = true;
+ [[fallthrough]];
+ case GK_GFX1013:
+ case GK_GFX1010:
+ case GK_GFX10_1_GENERIC:
+ Features["dl-insts"] = true;
+ Features["ci-insts"] = true;
+ Features["16-bit-insts"] = true;
+ Features["dpp"] = true;
+ Features["gfx8-insts"] = true;
+ Features["gfx9-insts"] = true;
+ Features["gfx10-insts"] = true;
+ Features["image-insts"] = true;
+ Features["s-memrealtime"] = true;
+ Features["s-memtime-inst"] = true;
+ Features["gws"] = true;
+ Features["vmem-to-lds-load-insts"] = true;
+ Features["atomic-fmin-fmax-global-f32"] = true;
+ Features["atomic-fmin-fmax-global-f64"] = true;
+ break;
+ case GK_GFX950:
+ Features["bitop3-insts"] = true;
+ Features["fp6bf6-cvt-scale-insts"] = true;
+ Features["fp4-cvt-scale-insts"] = true;
+ Features["bf8-cvt-scale-insts"] = true;
+ Features["fp8-cvt-scale-insts"] = true;
+ Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true;
+ Features["f32-to-f16bf16-cvt-sr-insts"] = true;
+ Features["prng-inst"] = true;
+ Features["permlane16-swap"] = true;
+ Features["permlane32-swap"] = true;
+ Features["ashr-pk-insts"] = true;
+ Features["dot12-insts"] = true;
+ Features["dot13-insts"] = true;
+ Features["atomic-buffer-pk-add-bf16-inst"] = true;
+ Features["gfx950-insts"] = true;
+ [[fallthrough]];
+ case GK_GFX942:
+ Features["fp8-insts"] = true;
+ Features["fp8-conversion-insts"] = true;
+ if (Kind != GK_GFX950)
+ Features["xf32-insts"] = true;
+ [[fallthrough]];
+ case GK_GFX9_4_GENERIC:
+ Features["gfx940-insts"] = true;
+ Features["atomic-ds-pk-add-16-insts"] = true;
+ Features["atomic-flat-pk-add-16-insts"] = true;
+ Features["atomic-global-pk-add-bf16-inst"] = true;
+ Features["gfx90a-insts"] = true;
+ Features["atomic-buffer-global-pk-add-f16-insts"] = true;
+ Features["atomic-fadd-rtn-insts"] = true;
+ Features["dot3-insts"] = true;
+ Features["dot4-insts"] = true;
+ Features["dot5-insts"] = true;
+ Features["dot6-insts"] = true;
+ Features["mai-insts"] = true;
+ Features["dl-insts"] = true;
+ Features["dot1-insts"] = true;
+ Features["dot2-insts"] = true;
+ Features["dot7-insts"] = true;
+ Features["dot10-insts"] = true;
+ Features["gfx9-insts"] = true;
+ Features["gfx8-insts"] = true;
+ Features["16-bit-insts"] = true;
+ Features["dpp"] = true;
+ Features["s-memrealtime"] = true;
+ Features["ci-insts"] = true;
+ Features["s-memtime-inst"] = true;
+ Features["gws"] = true;
+ Features["vmem-to-lds-load-insts"] = true;
+ Features["atomic-fmin-fmax-global-f64"] = true;
+ Features["wavefrontsize64"] = true;
+ break;
+ case GK_GFX90A:
+ Features["gfx90a-insts"] = true;
+ Features["atomic-buffer-global-pk-add-f16-insts"] = true;
+ Features["atomic-fadd-rtn-insts"] = true;
+ Features["atomic-fmin-fmax-global-f64"] = true;
+ [[fallthrough]];
+ case GK_GFX908:
+ Features["dot3-insts"] = true;
+ Features["dot4-insts"] = true;
+ Features["dot5-insts"] = true;
+ Features["dot6-insts"] = true;
+ Features["mai-insts"] = true;
+ [[fallthrough]];
+ case GK_GFX906:
+ Features["dl-insts"] = true;
+ Features["dot1-insts"] = true;
+ Features["dot2-insts"] = true;
+ Features["dot7-insts"] = true;
+ Features["dot10-insts"] = true;
+ [[fallthrough]];
+ case GK_GFX90C:
+ case GK_GFX909:
+ case GK_GFX904:
+ case GK_GFX902:
+ case GK_GFX900:
+ case GK_GFX9_GENERIC:
+ Features["gfx9-insts"] = true;
+ Features["vmem-to-lds-load-insts"] = true;
+ [[fallthrough]];
+ case GK_GFX810:
+ case GK_GFX805:
+ case GK_GFX803:
+ case GK_GFX802:
+ case GK_GFX801:
+ Features["gfx8-insts"] = true;
+ Features["16-bit-insts"] = true;
+ Features["dpp"] = true;
+ Features["s-memrealtime"] = true;
+ Features["ci-insts"] = true;
+ Features["image-insts"] = true;
+ Features["s-memtime-inst"] = true;
+ Features["gws"] = true;
+ Features["wavefrontsize64"] = true;
+ break;
+ case GK_GFX705:
+ case GK_GFX704:
+ case GK_GFX703:
+ case GK_GFX702:
+ case GK_GFX701:
+ case GK_GFX700:
+ Features["ci-insts"] = true;
+ [[fallthrough]];
+ case GK_GFX602:
+ case GK_GFX601:
+ case GK_GFX600:
+ Features["image-insts"] = true;
+ Features["s-memtime-inst"] = true;
+ Features["gws"] = true;
+ Features["atomic-fmin-fmax-global-f32"] = true;
+ Features["atomic-fmin-fmax-global-f64"] = true;
+ Features["wavefrontsize64"] = true;
+ break;
+ case GK_NONE:
+ break;
+ default:
+ llvm_unreachable("Unhandled GPU!");
+ }
+}
+
+std::pair<FeatureError, StringRef>
----------------
yxsamliu wrote:
can we add a comment that \p Features contains overriding target features and this function returns default target features with entries overridden by \p Features.
https://github.com/llvm/llvm-project/pull/155222
More information about the llvm-branch-commits
mailing list