[clang] [flang] [llvm] [AMDGPU] Adding instruction specific features (PR #167809)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 18 01:09:37 PST 2025
https://github.com/Shoreshen updated https://github.com/llvm/llvm-project/pull/167809
>From eccc49197174bfb20a26c9cad573df37614ed629 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Thu, 13 Nov 2025 10:18:26 +0800
Subject: [PATCH 01/11] Adding instruction specific features
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 108 +++++++++++++++++--
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 21 ++++
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 14 +--
llvm/lib/Target/AMDGPU/VOP2Instructions.td | 2 +-
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 22 ++--
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll | 1 -
6 files changed, 142 insertions(+), 26 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index b008354cfd462..fe2a192f0f372 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -901,6 +901,48 @@ def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
"Has v_pk_fmac_f16 instruction"
>;
+def FeatureVCUBEInsts : SubtargetFeature<"V_CUBE-Insts",
+ "HasVCUBEInsts",
+ "true",
+ "Has V_CUBE* instructions"
+>;
+
+def FeatureVLERPInsts : SubtargetFeature<"V_LERP-insts",
+ "HasVLERPInsts",
+ "true",
+ "Has V_LERP* instructions"
+>;
+
+def FeatureVSADInsts : SubtargetFeature<"V_SAD-insts",
+ "HasVSADInsts",
+ "true",
+ "Has V_SAD* instructions"
+>;
+
+def FeatureVQSADInsts : SubtargetFeature<"V_QSAD-insts",
+ "HasVQSADInsts",
+ "true",
+ "Has V_QSAD* instructions"
+>;
+
+def FeatureVCVTNORMInsts : SubtargetFeature<"V_CVT_NORM-insts",
+ "HasVCVTNORMInsts",
+ "true",
+ "Has V_CVT_NORM* instructions"
+>;
+
+def FeatureVCVTPKNORMVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts",
+ "HasVCVTPKNORMVOP2Insts",
+ "true",
+ "Has V_CVT_NORM* VOP2 instructions"
+>;
+
+def FeatureVCVTPKNORMVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts",
+ "HasVCVTPKNORMVOP3Insts",
+ "true",
+ "Has V_CVT_NORM* VOP3 instructions"
+>;
+
def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts",
"HasAtomicDsPkAdd16Insts",
"true",
@@ -1494,7 +1536,8 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts,
FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
- FeatureVmemWriteVgprInOrder
+ FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts,
+ FeatureVSADInsts, FeatureVCVTPKNORMVOP2Insts
]
>;
@@ -1508,7 +1551,8 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
- FeatureVmemWriteVgprInOrder
+ FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts,
+ FeatureVSADInsts, FeatureVQSADInsts, FeatureVCVTPKNORMVOP2Insts
]
>;
@@ -1524,7 +1568,9 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32,
FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS,
- FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder
+ FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts,
+ FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts,
+ FeatureVCVTPKNORMVOP2Insts
]
>;
@@ -1543,7 +1589,10 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK,
FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess,
FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS,
- FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad
+ FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad,
+ FeatureVCUBEInsts, FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts,
+ FeatureVCVTNORMInsts, FeatureVCVTPKNORMVOP2Insts,
+ FeatureVCVTPKNORMVOP3Insts
]
>;
@@ -1567,7 +1616,10 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
FeatureDefaultComponentZero, FeatureMaxHardClauseLength63,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
- FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad
+ FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad, FeatureVCUBEInsts,
+ FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts,
+ FeatureVCVTNORMInsts, FeatureVCVTPKNORMVOP2Insts,
+ FeatureVCVTPKNORMVOP3Insts
]
>;
@@ -1590,7 +1642,9 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11",
FeatureUnalignedDSAccess, FeatureGDS, FeatureGWS,
FeatureDefaultComponentZero, FeatureMaxHardClauseLength32,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
- FeatureVmemWriteVgprInOrder
+ FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts,
+ FeatureVSADInsts, FeatureVQSADInsts, FeatureVCVTNORMInsts,
+ FeatureVCVTPKNORMVOP2Insts, FeatureVCVTPKNORMVOP3Insts
]
>;
@@ -2069,10 +2123,17 @@ def FeatureISAVersion12 : FeatureSet<
FeatureMemoryAtomicFAddF32DenormalSupport,
FeatureBVHDualAndBVH8Insts,
FeatureWaitsBeforeSystemScopeStores,
- FeatureD16Writes32BitVgpr
+ FeatureD16Writes32BitVgpr,
+ FeatureVCUBEInsts,
+ FeatureVLERPInsts,
+ FeatureVSADInsts,
+ FeatureVQSADInsts,
+ FeatureVCVTNORMInsts,
+ FeatureVCVTPKNORMVOP2Insts,
+ FeatureVCVTPKNORMVOP3Insts
]>;
-def FeatureISAVersion12_50 : FeatureSet<
+def FeatureISAVersion12_50_Common : FeatureSet<
[FeatureGFX12,
FeatureGFX1250Insts,
FeatureRequiresAlignedVGPRs,
@@ -2147,6 +2208,16 @@ def FeatureISAVersion12_50 : FeatureSet<
FeatureD16Writes32BitVgpr,
]>;
+def FeatureISAVersion12_50 : FeatureSet<
+ !listconcat(FeatureISAVersion12_50_Common.Features,
+ [FeatureVCUBEInsts,
+ FeatureVLERPInsts,
+ FeatureVSADInsts,
+ FeatureVQSADInsts,
+ FeatureVCVTNORMInsts,
+ FeatureVCVTPKNORMVOP2Insts,
+ FeatureVCVTPKNORMVOP3Insts])>;
+
def FeatureISAVersion12_51 : FeatureSet<
!listconcat(FeatureISAVersion12_50.Features,
[FeatureDPALU_DPP])>;
@@ -2816,6 +2887,27 @@ def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">,
def HasFP8ConversionInsts : Predicate<"Subtarget->hasFP8ConversionInsts()">,
AssemblerPredicate<(all_of FeatureFP8ConversionInsts)>;
+def HasVCUBEInsts : Predicate<"Subtarget->hasVCUBEInsts()">,
+ AssemblerPredicate<(all_of FeatureVCUBEInsts)>;
+
+def HasVLERPInsts : Predicate<"Subtarget->hasVLERPInsts()">,
+ AssemblerPredicate<(all_of FeatureVLERPInsts)>;
+
+def HasVSADInsts : Predicate<"Subtarget->hasVSADInsts()">,
+ AssemblerPredicate<(all_of FeatureVSADInsts)>;
+
+def HasVQSADInsts : Predicate<"Subtarget->hasVQSADInsts()">,
+ AssemblerPredicate<(all_of FeatureVQSADInsts)>;
+
+def HasVCVTNORMInsts : Predicate<"Subtarget->hasVCVTNORMInsts()">,
+ AssemblerPredicate<(all_of FeatureVCVTNORMInsts)>;
+
+def HasVCVTPKNORMVOP2Insts : Predicate<"Subtarget->hasVCVTPKNORMVOP2Insts()">,
+ AssemblerPredicate<(all_of FeatureVCVTPKNORMVOP2Insts)>;
+
+def HasVCVTPKNORMVOP3Insts : Predicate<"Subtarget->hasVCVTPKNORMVOP3Insts()">,
+ AssemblerPredicate<(all_of FeatureVCVTPKNORMVOP3Insts)>;
+
def HasFP8E5M3Insts : Predicate<"Subtarget->hasFP8E5M3Insts()">,
AssemblerPredicate<(all_of FeatureFP8E5M3Insts)>;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index f377b8aaf1333..862cee468b7d3 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -166,6 +166,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasMAIInsts = false;
bool HasFP8Insts = false;
bool HasFP8ConversionInsts = false;
+ bool HasVCUBEInsts = false;
+ bool HasVLERPInsts = false;
+ bool HasVSADInsts = false;
+ bool HasVQSADInsts = false;
+ bool HasVCVTNORMInsts = false;
+ bool HasVCVTPKNORMVOP2Insts = false;
+ bool HasVCVTPKNORMVOP3Insts = false;
bool HasFP8E5M3Insts = false;
bool HasCvtFP8Vop1Bug = false;
bool HasPkFmacF16Inst = false;
@@ -892,6 +899,20 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasFP8ConversionInsts() const { return HasFP8ConversionInsts; }
+ bool hasVCUBEInsts() const { return HasVCUBEInsts; }
+
+ bool hasVLERPInsts() const { return HasVLERPInsts; }
+
+ bool hasVSADInsts() const { return HasVSADInsts; }
+
+ bool hasVQSADInsts() const { return HasVQSADInsts; }
+
+ bool hasVCVTNORMInsts() const { return HasVCVTNORMInsts; }
+
+ bool hasVCVTPKNORMVOP2Insts() const { return HasVCVTPKNORMVOP2Insts; }
+
+ bool hasVCVTPKNORMVOP3Insts() const { return HasVCVTPKNORMVOP3Insts; }
+
bool hasFP8E5M3Insts() const { return HasFP8E5M3Insts; }
bool hasPkFmacF16Inst() const {
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 85adcab55b742..23095ba17cae8 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -616,15 +616,15 @@ let SubtargetPredicate = isGFX9Plus in {
let isReMaterializable = 1 in
defm V_SAT_PK_U8_I16 : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>;
-
- let mayRaiseFPException = 0 in {
- defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16",
- VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
- defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16",
- VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
- } // End mayRaiseFPException = 0
} // End SubtargetPredicate = isGFX9Plus
+let mayRaiseFPException = 0, SubtargetPredicate = HasVCVTNORMInsts in {
+defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16",
+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
+defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16",
+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
+} // End mayRaiseFPException = 0, SubtargetPredicate = HasVCVTNORMInsts
+
let SubtargetPredicate = isGFX9Only in {
defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>;
} // End SubtargetPredicate = isGFX9Only
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index d87d250a034f0..afd2d610b17de 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -971,7 +971,7 @@ defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_a
} // End IsNeverUniform = 1
defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, any_fldexp>;
-let ReadsModeReg = 0, mayRaiseFPException = 0 in {
+let ReadsModeReg = 0, mayRaiseFPException = 0, SubtargetPredicate = HasVCVTPKNORMVOP2Insts in {
defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>;
defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_u16_f32>;
}
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 05ba76ab489d8..3d82866c1e5a7 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -185,7 +185,8 @@ defm V_FMA_LEGACY_F32 : VOP3Inst <"v_fma_legacy_f32",
defm V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, any_fma>, VOPD_Component<0x13, "v_fma_f32">;
-defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;
+let SubtargetPredicate = HasVLERPInsts in
+ defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;
let SchedRW = [WriteIntMul] in {
let SubtargetPredicate = HasMadU32Inst in
@@ -258,12 +259,12 @@ defm V_DIV_FMAS_F64 : VOP3Inst <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC>;
} // End isCommutable = 1
let isReMaterializable = 1 in {
-let mayRaiseFPException = 0 in {
+let mayRaiseFPException = 0, SubtargetPredicate = HasVCUBEInsts in {
defm V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubeid>;
defm V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubesc>;
defm V_CUBETC_F32 : VOP3Inst <"v_cubetc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubetc>;
defm V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubema>;
-} // End mayRaiseFPException
+} // mayRaiseFPException = 0, SubtargetPredicate = HasVCUBEInsts
defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>;
defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>;
@@ -306,12 +307,12 @@ let SubtargetPredicate = HasMinimum3Maximum3F32, ReadsModeReg = 0 in {
defm V_MAXIMUM3_F32 : VOP3Inst <"v_maximum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmaximum3>;
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
-let isCommutable = 1 in {
+let isCommutable = 1, SubtargetPredicate = HasVSADInsts in {
defm V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
-} // End isCommutable = 1
+} // End isCommutable = 1, SubtargetPredicate = HasVSADInsts
defm V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_I32>, int_amdgcn_cvt_pk_u8_f32>;
defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", DIV_FIXUP_F32_PROF, AMDGPUdiv_fixup>;
@@ -424,7 +425,8 @@ def VOPProfileMQSAD : VOP3_Profile<VOP_V4I32_I64_I32_V4I32, VOP3_CLAMP> {
let SubtargetPredicate = isGFX7Plus in {
let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in {
-defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
+let SubtargetPredicate = HasVQSADInsts in
+ defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>;
} // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32]
} // End SubtargetPredicate = isGFX7Plus
@@ -789,9 +791,6 @@ let isCommutable = 1 in {
defm V_MAD_I32_I16 : VOP3Inst_t16 <"v_mad_i32_i16", VOP_I32_I16_I16_I32>;
} // End isCommutable = 1
-defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>;
-defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>;
-
defm V_PACK_B32_F16 : VOP3Inst_t16 <"v_pack_b32_f16", VOP_B32_F16_F16>;
let isReMaterializable = 1 in {
@@ -996,6 +995,11 @@ def : GCNPat<(DivergentBinFrag<or> (or_oneuse i64:$src0, i64:$src1), i64:$src2),
} // End SubtargetPredicate = isGFX9Plus
+let SubtargetPredicate = HasVCVTPKNORMVOP3Insts in {
+ defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>;
+ defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>;
+} // end SubtargetPredicate = HasVCVTPKNORMVOP3Insts
+
// FIXME: Probably should hardcode clamp bit in pseudo and avoid this.
class OpSelBinOpClampPat<SDPatternOperator node,
Instruction inst> : GCNPat<
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll
index 43c69baaf3e7f..49169eec072b6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll
@@ -1,4 +1,3 @@
-; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GCN %s
declare i32 @llvm.amdgcn.lerp(i32, i32, i32) #0
>From 33276544f6cd6a96f783bf66dfee81d26a3e8b96 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Fri, 14 Nov 2025 09:45:23 +0800
Subject: [PATCH 02/11] fix comments
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 124 ++++++++++-----------
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 28 ++---
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 4 +-
llvm/lib/Target/AMDGPU/VOP2Instructions.td | 2 +-
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 16 +--
5 files changed, 87 insertions(+), 87 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index fe2a192f0f372..c5d63e5000767 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -901,46 +901,46 @@ def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
"Has v_pk_fmac_f16 instruction"
>;
-def FeatureVCUBEInsts : SubtargetFeature<"V_CUBE-Insts",
- "HasVCUBEInsts",
+def FeatureCubeInsts : SubtargetFeature<"V_CUBE-Insts",
+ "HasCubeInsts",
"true",
"Has V_CUBE* instructions"
>;
-def FeatureVLERPInsts : SubtargetFeature<"V_LERP-insts",
- "HasVLERPInsts",
+def FeatureLerpInst : SubtargetFeature<"V_LERP-insts",
+ "HasLerpInst",
"true",
- "Has V_LERP* instructions"
+ "Has v_lerp_u8 instruction"
>;
-def FeatureVSADInsts : SubtargetFeature<"V_SAD-insts",
- "HasVSADInsts",
+def FeatureSadInsts : SubtargetFeature<"V_SAD-insts",
+ "HasSadInsts",
"true",
"Has V_SAD* instructions"
>;
-def FeatureVQSADInsts : SubtargetFeature<"V_QSAD-insts",
- "HasVQSADInsts",
+def FeatureQsadInsts : SubtargetFeature<"V_QSAD-insts",
+ "HasQsadInsts",
"true",
"Has V_QSAD* instructions"
>;
-def FeatureVCVTNORMInsts : SubtargetFeature<"V_CVT_NORM-insts",
- "HasVCVTNORMInsts",
+def FeatureCvtNormInsts : SubtargetFeature<"V_CVT_NORM-insts",
+ "HasCvtNormInsts",
"true",
"Has V_CVT_NORM* instructions"
>;
-def FeatureVCVTPKNORMVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts",
- "HasVCVTPKNORMVOP2Insts",
+def FeatureCvtPkNormVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts",
+ "HasCvtPkNormVOP2Insts",
"true",
- "Has V_CVT_NORM* VOP2 instructions"
+ "Has V_CVT_PK_NORM_*_F32 instructions/Has V_CVT_PK_NORM_*_F16 instructions"
>;
-def FeatureVCVTPKNORMVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts",
- "HasVCVTPKNORMVOP3Insts",
+def FeatureCvtPkNormVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts",
+ "HasCvtPkNormVOP3Insts",
"true",
- "Has V_CVT_NORM* VOP3 instructions"
+ "Has V_CVT_PK_NORM_*_F32 instructions/Has V_CVT_PK_NORM_*_F16 instructions"
>;
def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts",
@@ -1536,8 +1536,8 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts,
FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
- FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts,
- FeatureVSADInsts, FeatureVCVTPKNORMVOP2Insts
+ FeatureVmemWriteVgprInOrder, FeatureCubeInsts, FeatureLerpInst,
+ FeatureSadInsts, FeatureCvtPkNormVOP2Insts
]
>;
@@ -1551,8 +1551,8 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
- FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts,
- FeatureVSADInsts, FeatureVQSADInsts, FeatureVCVTPKNORMVOP2Insts
+ FeatureVmemWriteVgprInOrder, FeatureCubeInsts, FeatureLerpInst,
+ FeatureSadInsts, FeatureQsadInsts, FeatureCvtPkNormVOP2Insts
]
>;
@@ -1568,9 +1568,9 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32,
FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS,
- FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts,
- FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts,
- FeatureVCVTPKNORMVOP2Insts
+ FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureCubeInsts,
+ FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts,
+ FeatureCvtPkNormVOP2Insts
]
>;
@@ -1590,9 +1590,9 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess,
FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS,
FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad,
- FeatureVCUBEInsts, FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts,
- FeatureVCVTNORMInsts, FeatureVCVTPKNORMVOP2Insts,
- FeatureVCVTPKNORMVOP3Insts
+ FeatureCubeInsts, FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts,
+ FeatureCvtNormInsts, FeatureCvtPkNormVOP2Insts,
+ FeatureCvtPkNormVOP3Insts
]
>;
@@ -1616,10 +1616,10 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
FeatureDefaultComponentZero, FeatureMaxHardClauseLength63,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
- FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad, FeatureVCUBEInsts,
- FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts,
- FeatureVCVTNORMInsts, FeatureVCVTPKNORMVOP2Insts,
- FeatureVCVTPKNORMVOP3Insts
+ FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad, FeatureCubeInsts,
+ FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts,
+ FeatureCvtNormInsts, FeatureCvtPkNormVOP2Insts,
+ FeatureCvtPkNormVOP3Insts
]
>;
@@ -1642,9 +1642,9 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11",
FeatureUnalignedDSAccess, FeatureGDS, FeatureGWS,
FeatureDefaultComponentZero, FeatureMaxHardClauseLength32,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
- FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts,
- FeatureVSADInsts, FeatureVQSADInsts, FeatureVCVTNORMInsts,
- FeatureVCVTPKNORMVOP2Insts, FeatureVCVTPKNORMVOP3Insts
+ FeatureVmemWriteVgprInOrder, FeatureCubeInsts, FeatureLerpInst,
+ FeatureSadInsts, FeatureQsadInsts, FeatureCvtNormInsts,
+ FeatureCvtPkNormVOP2Insts, FeatureCvtPkNormVOP3Insts
]
>;
@@ -2124,13 +2124,13 @@ def FeatureISAVersion12 : FeatureSet<
FeatureBVHDualAndBVH8Insts,
FeatureWaitsBeforeSystemScopeStores,
FeatureD16Writes32BitVgpr,
- FeatureVCUBEInsts,
- FeatureVLERPInsts,
- FeatureVSADInsts,
- FeatureVQSADInsts,
- FeatureVCVTNORMInsts,
- FeatureVCVTPKNORMVOP2Insts,
- FeatureVCVTPKNORMVOP3Insts
+ FeatureCubeInsts,
+ FeatureLerpInst,
+ FeatureSadInsts,
+ FeatureQsadInsts,
+ FeatureCvtNormInsts,
+ FeatureCvtPkNormVOP2Insts,
+ FeatureCvtPkNormVOP3Insts
]>;
def FeatureISAVersion12_50_Common : FeatureSet<
@@ -2210,13 +2210,13 @@ def FeatureISAVersion12_50_Common : FeatureSet<
def FeatureISAVersion12_50 : FeatureSet<
!listconcat(FeatureISAVersion12_50_Common.Features,
- [FeatureVCUBEInsts,
- FeatureVLERPInsts,
- FeatureVSADInsts,
- FeatureVQSADInsts,
- FeatureVCVTNORMInsts,
- FeatureVCVTPKNORMVOP2Insts,
- FeatureVCVTPKNORMVOP3Insts])>;
+ [FeatureCubeInsts,
+ FeatureLerpInst,
+ FeatureSadInsts,
+ FeatureQsadInsts,
+ FeatureCvtNormInsts,
+ FeatureCvtPkNormVOP2Insts,
+ FeatureCvtPkNormVOP3Insts])>;
def FeatureISAVersion12_51 : FeatureSet<
!listconcat(FeatureISAVersion12_50.Features,
@@ -2887,26 +2887,26 @@ def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">,
def HasFP8ConversionInsts : Predicate<"Subtarget->hasFP8ConversionInsts()">,
AssemblerPredicate<(all_of FeatureFP8ConversionInsts)>;
-def HasVCUBEInsts : Predicate<"Subtarget->hasVCUBEInsts()">,
- AssemblerPredicate<(all_of FeatureVCUBEInsts)>;
+def HasCubeInsts : Predicate<"Subtarget->hasCubeInsts()">,
+ AssemblerPredicate<(all_of FeatureCubeInsts)>;
-def HasVLERPInsts : Predicate<"Subtarget->hasVLERPInsts()">,
- AssemblerPredicate<(all_of FeatureVLERPInsts)>;
+def HasLerpInst : Predicate<"Subtarget->hasLerpInst()">,
+ AssemblerPredicate<(all_of FeatureLerpInst)>;
-def HasVSADInsts : Predicate<"Subtarget->hasVSADInsts()">,
- AssemblerPredicate<(all_of FeatureVSADInsts)>;
+def HasSadInsts : Predicate<"Subtarget->hasSadInsts()">,
+ AssemblerPredicate<(all_of FeatureSadInsts)>;
-def HasVQSADInsts : Predicate<"Subtarget->hasVQSADInsts()">,
- AssemblerPredicate<(all_of FeatureVQSADInsts)>;
+def HasQsadInsts : Predicate<"Subtarget->hasQsadInsts()">,
+ AssemblerPredicate<(all_of FeatureQsadInsts)>;
-def HasVCVTNORMInsts : Predicate<"Subtarget->hasVCVTNORMInsts()">,
- AssemblerPredicate<(all_of FeatureVCVTNORMInsts)>;
+def HasCvtNormInsts : Predicate<"Subtarget->hasCvtNormInsts()">,
+ AssemblerPredicate<(all_of FeatureCvtNormInsts)>;
-def HasVCVTPKNORMVOP2Insts : Predicate<"Subtarget->hasVCVTPKNORMVOP2Insts()">,
- AssemblerPredicate<(all_of FeatureVCVTPKNORMVOP2Insts)>;
+def HasCvtPkNormVOP2Insts : Predicate<"Subtarget->hasCvtPkNormVOP2Insts()">,
+ AssemblerPredicate<(all_of FeatureCvtPkNormVOP2Insts)>;
-def HasVCVTPKNORMVOP3Insts : Predicate<"Subtarget->hasVCVTPKNORMVOP3Insts()">,
- AssemblerPredicate<(all_of FeatureVCVTPKNORMVOP3Insts)>;
+def HasCvtPkNormVOP3Insts : Predicate<"Subtarget->hasCvtPkNormVOP3Insts()">,
+ AssemblerPredicate<(all_of FeatureCvtPkNormVOP3Insts)>;
def HasFP8E5M3Insts : Predicate<"Subtarget->hasFP8E5M3Insts()">,
AssemblerPredicate<(all_of FeatureFP8E5M3Insts)>;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 862cee468b7d3..85260c4f123c7 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -166,13 +166,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasMAIInsts = false;
bool HasFP8Insts = false;
bool HasFP8ConversionInsts = false;
- bool HasVCUBEInsts = false;
- bool HasVLERPInsts = false;
- bool HasVSADInsts = false;
- bool HasVQSADInsts = false;
- bool HasVCVTNORMInsts = false;
- bool HasVCVTPKNORMVOP2Insts = false;
- bool HasVCVTPKNORMVOP3Insts = false;
+ bool HasCubeInsts = false;
+ bool HasLerpInst = false;
+ bool HasSadInsts = false;
+ bool HasQsadInsts = false;
+ bool HasCvtNormInsts = false;
+ bool HasCvtPkNormVOP2Insts = false;
+ bool HasCvtPkNormVOP3Insts = false;
bool HasFP8E5M3Insts = false;
bool HasCvtFP8Vop1Bug = false;
bool HasPkFmacF16Inst = false;
@@ -899,19 +899,19 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasFP8ConversionInsts() const { return HasFP8ConversionInsts; }
- bool hasVCUBEInsts() const { return HasVCUBEInsts; }
+ bool hasCubeInsts() const { return HasCubeInsts; }
- bool hasVLERPInsts() const { return HasVLERPInsts; }
+ bool hasLerpInst() const { return HasLerpInst; }
- bool hasVSADInsts() const { return HasVSADInsts; }
+ bool hasSadInsts() const { return HasSadInsts; }
- bool hasVQSADInsts() const { return HasVQSADInsts; }
+ bool hasQsadInsts() const { return HasQsadInsts; }
- bool hasVCVTNORMInsts() const { return HasVCVTNORMInsts; }
+ bool hasCvtNormInsts() const { return HasCvtNormInsts; }
- bool hasVCVTPKNORMVOP2Insts() const { return HasVCVTPKNORMVOP2Insts; }
+ bool hasCvtPkNormVOP2Insts() const { return HasCvtPkNormVOP2Insts; }
- bool hasVCVTPKNORMVOP3Insts() const { return HasVCVTPKNORMVOP3Insts; }
+ bool hasCvtPkNormVOP3Insts() const { return HasCvtPkNormVOP3Insts; }
bool hasFP8E5M3Insts() const { return HasFP8E5M3Insts; }
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 23095ba17cae8..1d1e95908fce6 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -618,12 +618,12 @@ let SubtargetPredicate = isGFX9Plus in {
defm V_SAT_PK_U8_I16 : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>;
} // End SubtargetPredicate = isGFX9Plus
-let mayRaiseFPException = 0, SubtargetPredicate = HasVCVTNORMInsts in {
+let mayRaiseFPException = 0, SubtargetPredicate = HasCvtNormInsts in {
defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16",
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16",
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
-} // End mayRaiseFPException = 0, SubtargetPredicate = HasVCVTNORMInsts
+} // End mayRaiseFPException = 0, SubtargetPredicate = HasCvtNormInsts
let SubtargetPredicate = isGFX9Only in {
defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>;
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index afd2d610b17de..dbb7862ab4ab5 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -971,7 +971,7 @@ defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_a
} // End IsNeverUniform = 1
defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, any_fldexp>;
-let ReadsModeReg = 0, mayRaiseFPException = 0, SubtargetPredicate = HasVCVTPKNORMVOP2Insts in {
+let ReadsModeReg = 0, mayRaiseFPException = 0, SubtargetPredicate = HasCvtPkNormVOP2Insts in {
defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>;
defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_u16_f32>;
}
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 3d82866c1e5a7..872bde501cd2d 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -185,7 +185,7 @@ defm V_FMA_LEGACY_F32 : VOP3Inst <"v_fma_legacy_f32",
defm V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, any_fma>, VOPD_Component<0x13, "v_fma_f32">;
-let SubtargetPredicate = HasVLERPInsts in
+let SubtargetPredicate = HasLerpInst in
defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;
let SchedRW = [WriteIntMul] in {
@@ -259,12 +259,12 @@ defm V_DIV_FMAS_F64 : VOP3Inst <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC>;
} // End isCommutable = 1
let isReMaterializable = 1 in {
-let mayRaiseFPException = 0, SubtargetPredicate = HasVCUBEInsts in {
+let mayRaiseFPException = 0, SubtargetPredicate = HasCubeInsts in {
defm V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubeid>;
defm V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubesc>;
defm V_CUBETC_F32 : VOP3Inst <"v_cubetc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubetc>;
defm V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubema>;
-} // mayRaiseFPException = 0, SubtargetPredicate = HasVCUBEInsts
+} // mayRaiseFPException = 0, SubtargetPredicate = HasCubeInsts
defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>;
defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>;
@@ -307,12 +307,12 @@ let SubtargetPredicate = HasMinimum3Maximum3F32, ReadsModeReg = 0 in {
defm V_MAXIMUM3_F32 : VOP3Inst <"v_maximum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmaximum3>;
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
-let isCommutable = 1, SubtargetPredicate = HasVSADInsts in {
+let isCommutable = 1, SubtargetPredicate = HasSadInsts in {
defm V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
-} // End isCommutable = 1, SubtargetPredicate = HasVSADInsts
+} // End isCommutable = 1, SubtargetPredicate = HasSadInsts
defm V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_I32>, int_amdgcn_cvt_pk_u8_f32>;
defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", DIV_FIXUP_F32_PROF, AMDGPUdiv_fixup>;
@@ -425,7 +425,7 @@ def VOPProfileMQSAD : VOP3_Profile<VOP_V4I32_I64_I32_V4I32, VOP3_CLAMP> {
let SubtargetPredicate = isGFX7Plus in {
let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in {
-let SubtargetPredicate = HasVQSADInsts in
+let SubtargetPredicate = HasQsadInsts in
defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>;
} // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32]
@@ -995,10 +995,10 @@ def : GCNPat<(DivergentBinFrag<or> (or_oneuse i64:$src0, i64:$src1), i64:$src2),
} // End SubtargetPredicate = isGFX9Plus
-let SubtargetPredicate = HasVCVTPKNORMVOP3Insts in {
+let SubtargetPredicate = HasCvtPkNormVOP3Insts in {
defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>;
defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>;
-} // end SubtargetPredicate = HasVCVTPKNORMVOP3Insts
+} // end SubtargetPredicate = HasCvtPkNormVOP3Insts
// FIXME: Probably should hardcode clamp bit in pseudo and avoid this.
class OpSelBinOpClampPat<SDPatternOperator node,
>From cd83519edfef4bc692b656b7c1a10cbb5727f17c Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 18 Nov 2025 10:59:53 +0800
Subject: [PATCH 03/11] add feature to builtins
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 22 ++++++++++----------
llvm/lib/Target/AMDGPU/AMDGPU.td | 4 ++--
2 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 2b6fcb1fd479b..0dfa9c13792cf 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -122,13 +122,13 @@ BUILTIN(__builtin_amdgcn_frexp_exp, "id", "nc")
BUILTIN(__builtin_amdgcn_frexp_expf, "if", "nc")
BUILTIN(__builtin_amdgcn_fract, "dd", "nc")
BUILTIN(__builtin_amdgcn_fractf, "ff", "nc")
-BUILTIN(__builtin_amdgcn_lerp, "UiUiUiUi", "nc")
+TARGET_BUILTIN(__builtin_amdgcn_lerp, "UiUiUiUi", "nc", "HasLerpInst")
BUILTIN(__builtin_amdgcn_class, "bdi", "nc")
BUILTIN(__builtin_amdgcn_classf, "bfi", "nc")
-BUILTIN(__builtin_amdgcn_cubeid, "ffff", "nc")
-BUILTIN(__builtin_amdgcn_cubesc, "ffff", "nc")
-BUILTIN(__builtin_amdgcn_cubetc, "ffff", "nc")
-BUILTIN(__builtin_amdgcn_cubema, "ffff", "nc")
+TARGET_BUILTIN(__builtin_amdgcn_cubeid, "ffff", "nc", "HasCubeInsts")
+TARGET_BUILTIN(__builtin_amdgcn_cubesc, "ffff", "nc", "HasCubeInsts")
+TARGET_BUILTIN(__builtin_amdgcn_cubetc, "ffff", "nc", "HasCubeInsts")
+TARGET_BUILTIN(__builtin_amdgcn_cubema, "ffff", "nc", "HasCubeInsts")
BUILTIN(__builtin_amdgcn_s_sleep, "vIi", "n")
BUILTIN(__builtin_amdgcn_s_incperflevel, "vIi", "n")
BUILTIN(__builtin_amdgcn_s_decperflevel, "vIi", "n")
@@ -149,17 +149,17 @@ BUILTIN(__builtin_amdgcn_alignbyte, "UiUiUiUi", "nc")
BUILTIN(__builtin_amdgcn_ubfe, "UiUiUiUi", "nc")
BUILTIN(__builtin_amdgcn_sbfe, "UiUiUiUi", "nc")
BUILTIN(__builtin_amdgcn_cvt_pkrtz, "E2hff", "nc")
-BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc")
-BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc", "HasCvtPkNormVOP2Insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc", "HasCvtPkNormVOP2Insts")
BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc")
BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc")
BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc")
BUILTIN(__builtin_amdgcn_cvt_off_f32_i4, "fi", "nc")
-BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc")
+TARGET_BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc", "HasSadInsts")
BUILTIN(__builtin_amdgcn_msad_u8, "UiUiUiUi", "nc")
-BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc")
-BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc")
-BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
+TARGET_BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc", "HasSadInsts")
+TARGET_BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc", "HasSadInsts")
+TARGET_BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc", "HasQsadInsts")
BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui", "nc")
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index c5d63e5000767..cd8327563d9d6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -934,13 +934,13 @@ def FeatureCvtNormInsts : SubtargetFeature<"V_CVT_NORM-insts",
def FeatureCvtPkNormVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts",
"HasCvtPkNormVOP2Insts",
"true",
- "Has V_CVT_PK_NORM_*_F32 instructions/Has V_CVT_PK_NORM_*_F16 instructions"
+ "Has V_CVT_PK_NORM_* instructions/Has V_CVT_PK_NORM_*_F16 instructions"
>;
def FeatureCvtPkNormVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts",
"HasCvtPkNormVOP3Insts",
"true",
- "Has V_CVT_PK_NORM_*_F32 instructions/Has V_CVT_PK_NORM_*_F16 instructions"
+ "Has V_CVT_PK_NORM_* instructions/Has V_CVT_PK_NORM_*_F16 instructions"
>;
def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts",
>From 15e6eece5e39bd0c8fb1d1eb6e36151b0f45fdb7 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 18 Nov 2025 13:17:14 +0800
Subject: [PATCH 04/11] fix builtin features
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 22 ++---
.../CodeGenOpenCL/builtins-amdgcn-fiji.cl | 86 +++++++++++++++++++
clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 67 ---------------
llvm/lib/Target/AMDGPU/AMDGPU.td | 14 +--
llvm/lib/TargetParser/TargetParser.cpp | 50 ++++++++++-
5 files changed, 153 insertions(+), 86 deletions(-)
create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-fiji.cl
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 0dfa9c13792cf..c349cdc6aef5b 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -122,13 +122,13 @@ BUILTIN(__builtin_amdgcn_frexp_exp, "id", "nc")
BUILTIN(__builtin_amdgcn_frexp_expf, "if", "nc")
BUILTIN(__builtin_amdgcn_fract, "dd", "nc")
BUILTIN(__builtin_amdgcn_fractf, "ff", "nc")
-TARGET_BUILTIN(__builtin_amdgcn_lerp, "UiUiUiUi", "nc", "HasLerpInst")
+TARGET_BUILTIN(__builtin_amdgcn_lerp, "UiUiUiUi", "nc", "lerp-inst")
BUILTIN(__builtin_amdgcn_class, "bdi", "nc")
BUILTIN(__builtin_amdgcn_classf, "bfi", "nc")
-TARGET_BUILTIN(__builtin_amdgcn_cubeid, "ffff", "nc", "HasCubeInsts")
-TARGET_BUILTIN(__builtin_amdgcn_cubesc, "ffff", "nc", "HasCubeInsts")
-TARGET_BUILTIN(__builtin_amdgcn_cubetc, "ffff", "nc", "HasCubeInsts")
-TARGET_BUILTIN(__builtin_amdgcn_cubema, "ffff", "nc", "HasCubeInsts")
+TARGET_BUILTIN(__builtin_amdgcn_cubeid, "ffff", "nc", "cube-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cubesc, "ffff", "nc", "cube-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cubetc, "ffff", "nc", "cube-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cubema, "ffff", "nc", "cube-insts")
BUILTIN(__builtin_amdgcn_s_sleep, "vIi", "n")
BUILTIN(__builtin_amdgcn_s_incperflevel, "vIi", "n")
BUILTIN(__builtin_amdgcn_s_decperflevel, "vIi", "n")
@@ -149,17 +149,17 @@ BUILTIN(__builtin_amdgcn_alignbyte, "UiUiUiUi", "nc")
BUILTIN(__builtin_amdgcn_ubfe, "UiUiUiUi", "nc")
BUILTIN(__builtin_amdgcn_sbfe, "UiUiUiUi", "nc")
BUILTIN(__builtin_amdgcn_cvt_pkrtz, "E2hff", "nc")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc", "HasCvtPkNormVOP2Insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc", "HasCvtPkNormVOP2Insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc", "cvt-pknorm-vop2-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc", "cvt-pknorm-vop2-insts")
BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc")
BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc")
BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc")
BUILTIN(__builtin_amdgcn_cvt_off_f32_i4, "fi", "nc")
-TARGET_BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc", "HasSadInsts")
+TARGET_BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc", "sad-insts")
BUILTIN(__builtin_amdgcn_msad_u8, "UiUiUiUi", "nc")
-TARGET_BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc", "HasSadInsts")
-TARGET_BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc", "HasSadInsts")
-TARGET_BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc", "HasQsadInsts")
+TARGET_BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc", "sad-insts")
+TARGET_BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc", "sad-insts")
+TARGET_BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc", "qsad-insts")
BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui", "nc")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fiji.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fiji.cl
new file mode 100644
index 0000000000000..2178718f90d5a
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fiji.cl
@@ -0,0 +1,86 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu fiji -emit-llvm -o - %s | FileCheck -enable-var-scope --check-prefixes=CHECK %s
+
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+typedef unsigned long ulong;
+typedef unsigned int uint;
+typedef unsigned short ushort;
+typedef half __attribute__((ext_vector_type(2))) half2;
+typedef short __attribute__((ext_vector_type(2))) short2;
+typedef ushort __attribute__((ext_vector_type(2))) ushort2;
+typedef uint __attribute__((ext_vector_type(4))) uint4;
+
+// CHECK-LABEL: @test_lerp
+// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.lerp
+void test_lerp(global int* out, int a, int b, int c)
+{
+ *out = __builtin_amdgcn_lerp(a, b, c);
+}
+
+// CHECK-LABEL: @test_cubeid(
+// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubeid(float %a, float %b, float %c)
+void test_cubeid(global float* out, float a, float b, float c) {
+ *out = __builtin_amdgcn_cubeid(a, b, c);
+}
+
+// CHECK-LABEL: @test_cubesc(
+// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubesc(float %a, float %b, float %c)
+void test_cubesc(global float* out, float a, float b, float c) {
+ *out = __builtin_amdgcn_cubesc(a, b, c);
+}
+
+// CHECK-LABEL: @test_cubetc(
+// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubetc(float %a, float %b, float %c)
+void test_cubetc(global float* out, float a, float b, float c) {
+ *out = __builtin_amdgcn_cubetc(a, b, c);
+}
+
+// CHECK-LABEL: @test_cubema(
+// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubema(float %a, float %b, float %c)
+void test_cubema(global float* out, float a, float b, float c) {
+ *out = __builtin_amdgcn_cubema(a, b, c);
+}
+
+// CHECK-LABEL: @test_cvt_pknorm_i16(
+// CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %src0, float %src1)
+kernel void test_cvt_pknorm_i16(global short2* out, float src0, float src1) {
+ *out = __builtin_amdgcn_cvt_pknorm_i16(src0, src1);
+}
+
+// CHECK-LABEL: @test_cvt_pknorm_u16(
+// CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %src0, float %src1)
+kernel void test_cvt_pknorm_u16(global ushort2* out, float src0, float src1) {
+ *out = __builtin_amdgcn_cvt_pknorm_u16(src0, src1);
+}
+
+// CHECK-LABEL: @test_sad_u8(
+// CHECK: tail call{{.*}} i32 @llvm.amdgcn.sad.u8(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_sad_u8(global uint* out, uint src0, uint src1, uint src2) {
+ *out = __builtin_amdgcn_sad_u8(src0, src1, src2);
+}
+
+// CHECK-LABEL: test_msad_u8(
+// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.msad.u8(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_msad_u8(global uint* out, uint src0, uint src1, uint src2) {
+ *out = __builtin_amdgcn_msad_u8(src0, src1, src2);
+}
+
+// CHECK-LABEL: test_sad_hi_u8(
+// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.sad.hi.u8(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_sad_hi_u8(global uint* out, uint src0, uint src1, uint src2) {
+ *out = __builtin_amdgcn_sad_hi_u8(src0, src1, src2);
+}
+
+// CHECK-LABEL: @test_sad_u16(
+// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.sad.u16(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_sad_u16(global uint* out, uint src0, uint src1, uint src2) {
+ *out = __builtin_amdgcn_sad_u16(src0, src1, src2);
+}
+
+// CHECK-LABEL: @test_qsad_pk_u16_u8(
+// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src0, i32 %src1, i64 %src2)
+kernel void test_qsad_pk_u16_u8(global ulong* out, ulong src0, uint src1, ulong src2) {
+ *out = __builtin_amdgcn_qsad_pk_u16_u8(src0, src1, src2);
+}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index ab0b0b936abdc..b92454de60c78 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -251,13 +251,6 @@ void test_fract_f64(global int* out, double a)
*out = __builtin_amdgcn_fract(a);
}
-// CHECK-LABEL: @test_lerp
-// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.lerp
-void test_lerp(global int* out, int a, int b, int c)
-{
- *out = __builtin_amdgcn_lerp(a, b, c);
-}
-
// CHECK-LABEL: @test_sicmp_i32
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 32)
void test_sicmp_i32(global ulong* out, int a, int b)
@@ -865,30 +858,6 @@ void test_s_setprio()
__builtin_amdgcn_s_setprio(3);
}
-// CHECK-LABEL: @test_cubeid(
-// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubeid(float %a, float %b, float %c)
-void test_cubeid(global float* out, float a, float b, float c) {
- *out = __builtin_amdgcn_cubeid(a, b, c);
-}
-
-// CHECK-LABEL: @test_cubesc(
-// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubesc(float %a, float %b, float %c)
-void test_cubesc(global float* out, float a, float b, float c) {
- *out = __builtin_amdgcn_cubesc(a, b, c);
-}
-
-// CHECK-LABEL: @test_cubetc(
-// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubetc(float %a, float %b, float %c)
-void test_cubetc(global float* out, float a, float b, float c) {
- *out = __builtin_amdgcn_cubetc(a, b, c);
-}
-
-// CHECK-LABEL: @test_cubema(
-// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubema(float %a, float %b, float %c)
-void test_cubema(global float* out, float a, float b, float c) {
- *out = __builtin_amdgcn_cubema(a, b, c);
-}
-
// CHECK-LABEL: @test_read_exec(
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.ballot.i64(i1 true)
void test_read_exec(global ulong* out) {
@@ -1139,18 +1108,6 @@ kernel void test_cvt_pkrtz(global half2* out, float src0, float src1) {
*out = __builtin_amdgcn_cvt_pkrtz(src0, src1);
}
-// CHECK-LABEL: @test_cvt_pknorm_i16(
-// CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %src0, float %src1)
-kernel void test_cvt_pknorm_i16(global short2* out, float src0, float src1) {
- *out = __builtin_amdgcn_cvt_pknorm_i16(src0, src1);
-}
-
-// CHECK-LABEL: @test_cvt_pknorm_u16(
-// CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %src0, float %src1)
-kernel void test_cvt_pknorm_u16(global ushort2* out, float src0, float src1) {
- *out = __builtin_amdgcn_cvt_pknorm_u16(src0, src1);
-}
-
// CHECK-LABEL: @test_cvt_pk_i16(
// CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %src0, i32 %src1)
kernel void test_cvt_pk_i16(global short2* out, int src0, int src1) {
@@ -1169,36 +1126,12 @@ kernel void test_cvt_pk_u8_f32(global uint* out, float src0, uint src1, uint src
*out = __builtin_amdgcn_cvt_pk_u8_f32(src0, src1, src2);
}
-// CHECK-LABEL: @test_sad_u8(
-// CHECK: tail call{{.*}} i32 @llvm.amdgcn.sad.u8(i32 %src0, i32 %src1, i32 %src2)
-kernel void test_sad_u8(global uint* out, uint src0, uint src1, uint src2) {
- *out = __builtin_amdgcn_sad_u8(src0, src1, src2);
-}
-
// CHECK-LABEL: test_msad_u8(
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.msad.u8(i32 %src0, i32 %src1, i32 %src2)
kernel void test_msad_u8(global uint* out, uint src0, uint src1, uint src2) {
*out = __builtin_amdgcn_msad_u8(src0, src1, src2);
}
-// CHECK-LABEL: test_sad_hi_u8(
-// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.sad.hi.u8(i32 %src0, i32 %src1, i32 %src2)
-kernel void test_sad_hi_u8(global uint* out, uint src0, uint src1, uint src2) {
- *out = __builtin_amdgcn_sad_hi_u8(src0, src1, src2);
-}
-
-// CHECK-LABEL: @test_sad_u16(
-// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.sad.u16(i32 %src0, i32 %src1, i32 %src2)
-kernel void test_sad_u16(global uint* out, uint src0, uint src1, uint src2) {
- *out = __builtin_amdgcn_sad_u16(src0, src1, src2);
-}
-
-// CHECK-LABEL: @test_qsad_pk_u16_u8(
-// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src0, i32 %src1, i64 %src2)
-kernel void test_qsad_pk_u16_u8(global ulong* out, ulong src0, uint src1, ulong src2) {
- *out = __builtin_amdgcn_qsad_pk_u16_u8(src0, src1, src2);
-}
-
// CHECK-LABEL: @test_mqsad_pk_u16_u8(
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src0, i32 %src1, i64 %src2)
kernel void test_mqsad_pk_u16_u8(global ulong* out, ulong src0, uint src1, ulong src2) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index cd8327563d9d6..b2bbd3ba6c5ca 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -901,43 +901,43 @@ def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
"Has v_pk_fmac_f16 instruction"
>;
-def FeatureCubeInsts : SubtargetFeature<"V_CUBE-Insts",
+def FeatureCubeInsts : SubtargetFeature<"cube-insts",
"HasCubeInsts",
"true",
"Has V_CUBE* instructions"
>;
-def FeatureLerpInst : SubtargetFeature<"V_LERP-insts",
+def FeatureLerpInst : SubtargetFeature<"lerp-inst",
"HasLerpInst",
"true",
"Has v_lerp_u8 instruction"
>;
-def FeatureSadInsts : SubtargetFeature<"V_SAD-insts",
+def FeatureSadInsts : SubtargetFeature<"sad-insts",
"HasSadInsts",
"true",
"Has V_SAD* instructions"
>;
-def FeatureQsadInsts : SubtargetFeature<"V_QSAD-insts",
+def FeatureQsadInsts : SubtargetFeature<"qsad-insts",
"HasQsadInsts",
"true",
"Has V_QSAD* instructions"
>;
-def FeatureCvtNormInsts : SubtargetFeature<"V_CVT_NORM-insts",
+def FeatureCvtNormInsts : SubtargetFeature<"cvt-norm-insts",
"HasCvtNormInsts",
"true",
"Has V_CVT_NORM* instructions"
>;
-def FeatureCvtPkNormVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts",
+def FeatureCvtPkNormVOP2Insts : SubtargetFeature<"cvt-pknorm-vop2-insts",
"HasCvtPkNormVOP2Insts",
"true",
"Has V_CVT_PK_NORM_* instructions/Has V_CVT_PK_NORM_*_F16 instructions"
>;
-def FeatureCvtPkNormVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts",
+def FeatureCvtPkNormVOP3Insts : SubtargetFeature<"cvt-pknorm-vop3-insts",
"HasCvtPkNormVOP3Insts",
"true",
"Has V_CVT_PK_NORM_* instructions/Has V_CVT_PK_NORM_*_F16 instructions"
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 96bef0e574a45..9a9e76d581432 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -447,6 +447,12 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
Features["atomic-fmin-fmax-global-f64"] = true;
Features["wavefrontsize32"] = true;
Features["clusters"] = true;
+ Features["cube-insts"] = true;
+ Features["lerp-inst"] = true;
+ ;
+ Features["sad-insts"] = true;
+ Features["qsad-insts"] = true;
+ Features["cvt-pknorm-vop2-insts"] = true;
break;
case GK_GFX1201:
case GK_GFX1200:
@@ -474,6 +480,12 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
Features["gfx12-insts"] = true;
Features["atomic-fadd-rtn-insts"] = true;
Features["image-insts"] = true;
+ Features["cube-insts"] = true;
+ Features["lerp-inst"] = true;
+ ;
+ Features["sad-insts"] = true;
+ Features["qsad-insts"] = true;
+ Features["cvt-pknorm-vop2-insts"] = true;
Features["fp8-conversion-insts"] = true;
Features["atomic-fmin-fmax-global-f32"] = true;
break;
@@ -503,6 +515,12 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
Features["gfx11-insts"] = true;
Features["atomic-fadd-rtn-insts"] = true;
Features["image-insts"] = true;
+ Features["cube-insts"] = true;
+ Features["lerp-inst"] = true;
+ ;
+ Features["sad-insts"] = true;
+ Features["qsad-insts"] = true;
+ Features["cvt-pknorm-vop2-insts"] = true;
Features["gws"] = true;
Features["atomic-fmin-fmax-global-f32"] = true;
break;
@@ -562,6 +580,12 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
Features["vmem-to-lds-load-insts"] = true;
Features["atomic-fmin-fmax-global-f32"] = true;
Features["atomic-fmin-fmax-global-f64"] = true;
+ Features["cube-insts"] = true;
+ Features["lerp-inst"] = true;
+ ;
+ Features["sad-insts"] = true;
+ Features["qsad-insts"] = true;
+ Features["cvt-pknorm-vop2-insts"] = true;
break;
case GK_GFX950:
Features["bitop3-insts"] = true;
@@ -615,6 +639,12 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
Features["vmem-to-lds-load-insts"] = true;
Features["atomic-fmin-fmax-global-f64"] = true;
Features["wavefrontsize64"] = true;
+ Features["cube-insts"] = true;
+ Features["lerp-inst"] = true;
+ ;
+ Features["sad-insts"] = true;
+ Features["qsad-insts"] = true;
+ Features["cvt-pknorm-vop2-insts"] = true;
break;
case GK_GFX90A:
Features["gfx90a-insts"] = true;
@@ -659,6 +689,12 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
Features["s-memtime-inst"] = true;
Features["gws"] = true;
Features["wavefrontsize64"] = true;
+ Features["cube-insts"] = true;
+ Features["lerp-inst"] = true;
+ ;
+ Features["sad-insts"] = true;
+ Features["qsad-insts"] = true;
+ Features["cvt-pknorm-vop2-insts"] = true;
break;
case GK_GFX705:
case GK_GFX704:
@@ -667,16 +703,28 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
case GK_GFX701:
case GK_GFX700:
Features["ci-insts"] = true;
- [[fallthrough]];
+ Features["cube-insts"] = true;
+ Features["lerp-inst"] = true;
+ ;
+ Features["sad-insts"] = true;
+ Features["qsad-insts"] = true;
+ Features["cvt-pknorm-vop2-insts"] = true;
+ break;
case GK_GFX602:
case GK_GFX601:
case GK_GFX600:
+ Features["ci-insts"] = true;
Features["image-insts"] = true;
Features["s-memtime-inst"] = true;
Features["gws"] = true;
Features["atomic-fmin-fmax-global-f32"] = true;
Features["atomic-fmin-fmax-global-f64"] = true;
Features["wavefrontsize64"] = true;
+ Features["cube-insts"] = true;
+ Features["lerp-inst"] = true;
+ ;
+ Features["sad-insts"] = true;
+ Features["cvt-pknorm-vop2-insts"] = true;
break;
case GK_NONE:
break;
>From c6ac0f1a79e59389fbbb944d84607a015a694a22 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 18 Nov 2025 13:43:18 +0800
Subject: [PATCH 05/11] fix format
---
llvm/lib/TargetParser/TargetParser.cpp | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 9a9e76d581432..3f28780e2cb04 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -449,7 +449,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
Features["clusters"] = true;
Features["cube-insts"] = true;
Features["lerp-inst"] = true;
- ;
Features["sad-insts"] = true;
Features["qsad-insts"] = true;
Features["cvt-pknorm-vop2-insts"] = true;
@@ -482,7 +481,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
Features["image-insts"] = true;
Features["cube-insts"] = true;
Features["lerp-inst"] = true;
- ;
Features["sad-insts"] = true;
Features["qsad-insts"] = true;
Features["cvt-pknorm-vop2-insts"] = true;
@@ -517,7 +515,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
Features["image-insts"] = true;
Features["cube-insts"] = true;
Features["lerp-inst"] = true;
- ;
Features["sad-insts"] = true;
Features["qsad-insts"] = true;
Features["cvt-pknorm-vop2-insts"] = true;
@@ -553,6 +550,11 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
Features["vmem-to-lds-load-insts"] = true;
Features["atomic-fmin-fmax-global-f32"] = true;
Features["atomic-fmin-fmax-global-f64"] = true;
+ Features["cube-insts"] = true;
+ Features["lerp-inst"] = true;
+ Features["sad-insts"] = true;
+ Features["qsad-insts"] = true;
+ Features["cvt-pknorm-vop2-insts"] = true;
break;
case GK_GFX1012:
case GK_GFX1011:
@@ -582,7 +584,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
Features["atomic-fmin-fmax-global-f64"] = true;
Features["cube-insts"] = true;
Features["lerp-inst"] = true;
- ;
Features["sad-insts"] = true;
Features["qsad-insts"] = true;
Features["cvt-pknorm-vop2-insts"] = true;
@@ -641,7 +642,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
Features["wavefrontsize64"] = true;
Features["cube-insts"] = true;
Features["lerp-inst"] = true;
- ;
Features["sad-insts"] = true;
Features["qsad-insts"] = true;
Features["cvt-pknorm-vop2-insts"] = true;
@@ -691,7 +691,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
Features["wavefrontsize64"] = true;
Features["cube-insts"] = true;
Features["lerp-inst"] = true;
- ;
Features["sad-insts"] = true;
Features["qsad-insts"] = true;
Features["cvt-pknorm-vop2-insts"] = true;
@@ -705,7 +704,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
Features["ci-insts"] = true;
Features["cube-insts"] = true;
Features["lerp-inst"] = true;
- ;
Features["sad-insts"] = true;
Features["qsad-insts"] = true;
Features["cvt-pknorm-vop2-insts"] = true;
@@ -722,7 +720,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
Features["wavefrontsize64"] = true;
Features["cube-insts"] = true;
Features["lerp-inst"] = true;
- ;
Features["sad-insts"] = true;
Features["cvt-pknorm-vop2-insts"] = true;
break;
>From a3987b136105dd98ede771b3f17429f4493e19b6 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 18 Nov 2025 14:39:04 +0800
Subject: [PATCH 06/11] fix tests
---
clang/test/CodeGenOpenCL/amdgpu-features.cl | 98 ++++++++++-----------
llvm/lib/TargetParser/TargetParser.cpp | 6 ++
2 files changed, 55 insertions(+), 49 deletions(-)
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index 9bd096f3fcbc7..aae05dd623a63 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -63,55 +63,55 @@
// NOCPU-WAVE32: "target-features"="+wavefrontsize32"
// NOCPU-WAVE64: "target-features"="+wavefrontsize64"
-// GFX600: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+s-memtime-inst,+wavefrontsize64
-// GFX601: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+s-memtime-inst,+wavefrontsize64
-// GFX602: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+s-memtime-inst,+wavefrontsize64
-// GFX700: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64"
-// GFX701: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64"
-// GFX702: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64"
-// GFX703: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64"
-// GFX704: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64"
-// GFX705: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64"
-// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX802: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX803: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX805: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX810: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX900: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX902: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
-// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX1010: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1011: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1012: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1013: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1030: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1031: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1032: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1033: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1034: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1035: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1036: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1101: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1102: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1103: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1150: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1151: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1152: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1250: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32"
-// GFX1251: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32"
+// GFX600: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64
+// GFX601: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64
+// GFX602: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64
+// GFX700: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX701: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX702: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX703: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX704: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX705: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX801: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX802: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX803: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX805: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX810: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX900: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX902: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX904: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX906: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX908: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX909: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,+xf32-insts"
+// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+lerp-inst,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX1010: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
+// GFX1011: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
+// GFX1012: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
+// GFX1013: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
+// GFX1030: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
+// GFX1031: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
+// GFX1032: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
+// GFX1033: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
+// GFX1034: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
+// GFX1035: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
+// GFX1036: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
+// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
+// GFX1101: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
+// GFX1102: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
+// GFX1103: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
+// GFX1150: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
+// GFX1151: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
+// GFX1152: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
+// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
+// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
+// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
+// GFX1250: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32"
+// GFX1251: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32"
-// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"
+// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize64"
kernel void test() {}
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 3f28780e2cb04..9439fa2b3a0ec 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -707,6 +707,12 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
Features["sad-insts"] = true;
Features["qsad-insts"] = true;
Features["cvt-pknorm-vop2-insts"] = true;
+ Features["image-insts"] = true;
+ Features["s-memtime-inst"] = true;
+ Features["gws"] = true;
+ Features["atomic-fmin-fmax-global-f32"] = true;
+ Features["atomic-fmin-fmax-global-f64"] = true;
+ Features["wavefrontsize64"] = true;
break;
case GK_GFX602:
case GK_GFX601:
>From 2b6be97afae2e4e225715cb7998b38da2bd0600a Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 18 Nov 2025 14:45:04 +0800
Subject: [PATCH 07/11] fix description
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index b2bbd3ba6c5ca..5dea64844e64e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -904,7 +904,7 @@ def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
def FeatureCubeInsts : SubtargetFeature<"cube-insts",
"HasCubeInsts",
"true",
- "Has V_CUBE* instructions"
+ "Has v_cube* instructions"
>;
def FeatureLerpInst : SubtargetFeature<"lerp-inst",
@@ -916,31 +916,31 @@ def FeatureLerpInst : SubtargetFeature<"lerp-inst",
def FeatureSadInsts : SubtargetFeature<"sad-insts",
"HasSadInsts",
"true",
- "Has V_SAD* instructions"
+ "Has v_sad* instructions"
>;
def FeatureQsadInsts : SubtargetFeature<"qsad-insts",
"HasQsadInsts",
"true",
- "Has V_QSAD* instructions"
+ "Has v_qsad* instructions"
>;
def FeatureCvtNormInsts : SubtargetFeature<"cvt-norm-insts",
"HasCvtNormInsts",
"true",
- "Has V_CVT_NORM* instructions"
+ "Has v_cvt_norm* instructions"
>;
def FeatureCvtPkNormVOP2Insts : SubtargetFeature<"cvt-pknorm-vop2-insts",
"HasCvtPkNormVOP2Insts",
"true",
- "Has V_CVT_PK_NORM_* instructions/Has V_CVT_PK_NORM_*_F16 instructions"
+ "Has v_cvt_pk_norm_*f32 instructions/Has v_cvt_pk_norm_*_f16 instructions"
>;
def FeatureCvtPkNormVOP3Insts : SubtargetFeature<"cvt-pknorm-vop3-insts",
"HasCvtPkNormVOP3Insts",
"true",
- "Has V_CVT_PK_NORM_* instructions/Has V_CVT_PK_NORM_*_F16 instructions"
+ "Has v_cvt_pk_norm_*f32 instructions/Has v_cvt_pk_norm_*_f16 instructions"
>;
def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts",
>From 3691fc903f591cb40d53f3e6b558c035292964a9 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 18 Nov 2025 15:36:09 +0800
Subject: [PATCH 08/11] fix failing tests
---
clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl | 4 ++--
clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl | 8 ++++----
.../amdgpu-readonly-features-written-with-no-target.cl | 6 +++---
3 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl
index 14fbeb24a96c2..c5656c49c4761 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl
@@ -26,8 +26,8 @@ kernel void foo(global int *p) { *p = 1; }
// CHECK-NEXT: ret void
//
//.
-// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" "uniform-work-group-size"="false" }
-// CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" }
+// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" "uniform-work-group-size"="false" }
+// CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" }
// CHECK: attributes #[[ATTR2]] = { convergent nounwind }
//.
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index e9adac23a6530..2cbc9787a04b0 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -816,12 +816,12 @@ kernel void test_target_features_kernel(global int *i) {
// NOCPU: attributes #[[ATTR10]] = { convergent nounwind }
//.
// GFX900: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" }
-// GFX900: attributes #[[ATTR1]] = { convergent norecurse nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
-// GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" "uniform-work-group-size"="false" }
-// GFX900: attributes #[[ATTR3]] = { alwaysinline convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
+// GFX900: attributes #[[ATTR1]] = { convergent norecurse nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,-sram-ecc" }
+// GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,-sram-ecc" "uniform-work-group-size"="false" }
+// GFX900: attributes #[[ATTR3]] = { alwaysinline convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,-sram-ecc" }
// GFX900: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
// GFX900: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
-// GFX900: attributes #[[ATTR6]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
+// GFX900: attributes #[[ATTR6]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,-sram-ecc" }
// GFX900: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn }
// GFX900: attributes #[[ATTR8]] = { convergent nounwind }
// GFX900: attributes #[[ATTR9]] = { nounwind }
diff --git a/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl b/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl
index 1a0a30ca0b51e..2d50ce7cab2e0 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl
@@ -11,6 +11,6 @@
__attribute__((target("gws,image-insts,vmem-to-lds-load-insts"))) void test() {}
// NOCPU: "target-features"="+gws,+image-insts,+vmem-to-lds-load-insts"
-// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
-// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32
+// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,+xf32-insts"
+// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
+// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
>From 6a8d41af37e2c617580d20fc536644197c6ad88c Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 18 Nov 2025 16:20:16 +0800
Subject: [PATCH 09/11] fix fialing test
---
clang/test/CodeGen/link-builtin-bitcode.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/clang/test/CodeGen/link-builtin-bitcode.c b/clang/test/CodeGen/link-builtin-bitcode.c
index 9a5b6de3c3b38..f6e45bf573705 100644
--- a/clang/test/CodeGen/link-builtin-bitcode.c
+++ b/clang/test/CodeGen/link-builtin-bitcode.c
@@ -43,7 +43,7 @@ int bar() { return no_attr() + attr_in_target() + attr_not_in_target() + attr_in
// CHECK-LABEL: @attr_incompatible
// CHECK-SAME: () #[[ATTR_INCOMPATIBLE:[0-9]+]] {
-// CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
-// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64" }
-// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64" }
-// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64,-gfx9-insts" }
+// CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" }
+// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64" }
+// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64" }
+// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64,-gfx9-insts" }
>From 91bd9087906d802e750368c3303598ce41e5c711 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 18 Nov 2025 16:32:05 +0800
Subject: [PATCH 10/11] fix failing tests
---
clang/test/OpenMP/amdgcn-attributes.cpp | 4 ++--
flang/test/Lower/OpenMP/target_cpu_features.f90 | 4 ++--
llvm/lib/TargetParser/TargetParser.cpp | 1 -
3 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/clang/test/OpenMP/amdgcn-attributes.cpp b/clang/test/OpenMP/amdgcn-attributes.cpp
index 2c9e16a4f5098..03f5c31e3157c 100644
--- a/clang/test/OpenMP/amdgcn-attributes.cpp
+++ b/clang/test/OpenMP/amdgcn-attributes.cpp
@@ -32,9 +32,9 @@ int callable(int x) {
}
// DEFAULT: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
-// CPU: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" "uniform-work-group-size"="true" }
+// CPU: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" "uniform-work-group-size"="true" }
// NOIEEE: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "amdgpu-ieee"="false" "kernel" "no-nans-fp-math"="true" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
// DEFAULT: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// CPU: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
+// CPU: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" }
// NOIEEE: attributes #2 = { convergent mustprogress noinline nounwind optnone "amdgpu-ieee"="false" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
diff --git a/flang/test/Lower/OpenMP/target_cpu_features.f90 b/flang/test/Lower/OpenMP/target_cpu_features.f90
index 4532593156eab..341cfc7991d43 100644
--- a/flang/test/Lower/OpenMP/target_cpu_features.f90
+++ b/flang/test/Lower/OpenMP/target_cpu_features.f90
@@ -11,8 +11,8 @@
!AMDGCN-SAME: fir.target_features = #llvm.target_features<["+16-bit-insts", "+ci-insts",
!AMDGCN-SAME: "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot3-insts",
!AMDGCN-SAME: "+dot4-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp",
-!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+mai-insts",
-!AMDGCN-SAME: "+s-memrealtime", "+s-memtime-inst", "+vmem-to-lds-load-insts", "+wavefrontsize64"]>
+!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+lerp-inst", "+mai-insts",
+!AMDGCN-SAME: "+qsad-insts", "+s-memrealtime", "+s-memtime-inst", "+sad-insts", "+vmem-to-lds-load-insts", "+wavefrontsize64"]>
!NVPTX: module attributes {
!NVPTX-SAME: fir.target_cpu = "sm_80"
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 9439fa2b3a0ec..28f3649a840d6 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -717,7 +717,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
case GK_GFX602:
case GK_GFX601:
case GK_GFX600:
- Features["ci-insts"] = true;
Features["image-insts"] = true;
Features["s-memtime-inst"] = true;
Features["gws"] = true;
>From c8d801121a8bce0a4f36b1ea851c2a304c5132ff Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 18 Nov 2025 17:09:16 +0800
Subject: [PATCH 11/11] fix failing test
---
clang/test/CodeGenOpenCL/amdgpu-features.cl | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index aae05dd623a63..bd162b40b8e47 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -63,9 +63,9 @@
// NOCPU-WAVE32: "target-features"="+wavefrontsize32"
// NOCPU-WAVE64: "target-features"="+wavefrontsize64"
-// GFX600: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64
-// GFX601: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64
-// GFX602: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64
+// GFX600: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX601: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX602: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64"
// GFX700: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64"
// GFX701: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64"
// GFX702: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64"
More information about the llvm-commits
mailing list