[clang] [flang] [llvm] [AMDGPU] Adding instruction specific features (PR #167809)

via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 18 00:32:25 PST 2025


https://github.com/Shoreshen updated https://github.com/llvm/llvm-project/pull/167809

>From eccc49197174bfb20a26c9cad573df37614ed629 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Thu, 13 Nov 2025 10:18:26 +0800
Subject: [PATCH 01/10] Adding instruction specific features

---
 llvm/lib/Target/AMDGPU/AMDGPU.td             | 108 +++++++++++++++++--
 llvm/lib/Target/AMDGPU/GCNSubtarget.h        |  21 ++++
 llvm/lib/Target/AMDGPU/VOP1Instructions.td   |  14 +--
 llvm/lib/Target/AMDGPU/VOP2Instructions.td   |   2 +-
 llvm/lib/Target/AMDGPU/VOP3Instructions.td   |  22 ++--
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll |   1 -
 6 files changed, 142 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index b008354cfd462..fe2a192f0f372 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -901,6 +901,48 @@ def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
   "Has v_pk_fmac_f16 instruction"
 >;
 
+def FeatureVCUBEInsts : SubtargetFeature<"V_CUBE-Insts",
+  "HasVCUBEInsts",
+  "true",
+  "Has V_CUBE* instructions"
+>; 
+
+def FeatureVLERPInsts : SubtargetFeature<"V_LERP-insts",
+  "HasVLERPInsts",
+  "true",
+  "Has V_LERP* instructions"
+>; 
+
+def FeatureVSADInsts : SubtargetFeature<"V_SAD-insts",
+  "HasVSADInsts",
+  "true",
+  "Has V_SAD* instructions"
+>; 
+
+def FeatureVQSADInsts : SubtargetFeature<"V_QSAD-insts",
+  "HasVQSADInsts",
+  "true",
+  "Has V_QSAD* instructions"
+>; 
+
+def FeatureVCVTNORMInsts : SubtargetFeature<"V_CVT_NORM-insts",
+  "HasVCVTNORMInsts",
+  "true",
+  "Has V_CVT_NORM* instructions"
+>;
+
+def FeatureVCVTPKNORMVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts",
+  "HasVCVTPKNORMVOP2Insts",
+  "true",
+  "Has V_CVT_NORM* VOP2 instructions"
+>;
+
+def FeatureVCVTPKNORMVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts",
+  "HasVCVTPKNORMVOP3Insts",
+  "true",
+  "Has V_CVT_NORM* VOP3 instructions"
+>;
+
 def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts",
   "HasAtomicDsPkAdd16Insts",
   "true",
@@ -1494,7 +1536,8 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
   FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts,
   FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
   FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
-  FeatureVmemWriteVgprInOrder
+  FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts,
+  FeatureVSADInsts, FeatureVCVTPKNORMVOP2Insts
   ]
 >;
 
@@ -1508,7 +1551,8 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
   FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
   FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
   FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
-  FeatureVmemWriteVgprInOrder
+  FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts,
+  FeatureVSADInsts, FeatureVQSADInsts, FeatureVCVTPKNORMVOP2Insts
   ]
 >;
 
@@ -1524,7 +1568,9 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
    FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
    FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32,
    FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS,
-   FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder
+   FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, 
+   FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts, 
+   FeatureVCVTPKNORMVOP2Insts
   ]
 >;
 
@@ -1543,7 +1589,10 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
    FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK,
    FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess,
    FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS,
-   FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad
+   FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad,
+   FeatureVCUBEInsts, FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts, 
+   FeatureVCVTNORMInsts, FeatureVCVTPKNORMVOP2Insts, 
+   FeatureVCVTPKNORMVOP3Insts
   ]
 >;
 
@@ -1567,7 +1616,10 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
    FeatureDefaultComponentZero, FeatureMaxHardClauseLength63,
    FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
    FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
-   FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad
+   FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad, FeatureVCUBEInsts, 
+   FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts, 
+   FeatureVCVTNORMInsts, FeatureVCVTPKNORMVOP2Insts, 
+   FeatureVCVTPKNORMVOP3Insts
   ]
 >;
 
@@ -1590,7 +1642,9 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11",
    FeatureUnalignedDSAccess, FeatureGDS, FeatureGWS,
    FeatureDefaultComponentZero, FeatureMaxHardClauseLength32,
    FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
-   FeatureVmemWriteVgprInOrder
+   FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts,
+   FeatureVSADInsts, FeatureVQSADInsts, FeatureVCVTNORMInsts, 
+   FeatureVCVTPKNORMVOP2Insts, FeatureVCVTPKNORMVOP3Insts
   ]
 >;
 
@@ -2069,10 +2123,17 @@ def FeatureISAVersion12 : FeatureSet<
    FeatureMemoryAtomicFAddF32DenormalSupport,
    FeatureBVHDualAndBVH8Insts,
    FeatureWaitsBeforeSystemScopeStores,
-   FeatureD16Writes32BitVgpr
+   FeatureD16Writes32BitVgpr,
+   FeatureVCUBEInsts, 
+   FeatureVLERPInsts, 
+   FeatureVSADInsts,
+   FeatureVQSADInsts, 
+   FeatureVCVTNORMInsts, 
+   FeatureVCVTPKNORMVOP2Insts, 
+   FeatureVCVTPKNORMVOP3Insts
    ]>;
 
-def FeatureISAVersion12_50 : FeatureSet<
+def FeatureISAVersion12_50_Common : FeatureSet<
   [FeatureGFX12,
    FeatureGFX1250Insts,
    FeatureRequiresAlignedVGPRs,
@@ -2147,6 +2208,16 @@ def FeatureISAVersion12_50 : FeatureSet<
    FeatureD16Writes32BitVgpr,
 ]>;
 
+def FeatureISAVersion12_50 : FeatureSet<
+  !listconcat(FeatureISAVersion12_50_Common.Features,
+  [FeatureVCUBEInsts, 
+   FeatureVLERPInsts, 
+   FeatureVSADInsts, 
+   FeatureVQSADInsts, 
+   FeatureVCVTNORMInsts, 
+   FeatureVCVTPKNORMVOP2Insts, 
+   FeatureVCVTPKNORMVOP3Insts])>;
+
 def FeatureISAVersion12_51 : FeatureSet<
   !listconcat(FeatureISAVersion12_50.Features,
   [FeatureDPALU_DPP])>;
@@ -2816,6 +2887,27 @@ def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">,
 def HasFP8ConversionInsts : Predicate<"Subtarget->hasFP8ConversionInsts()">,
   AssemblerPredicate<(all_of FeatureFP8ConversionInsts)>;
 
+def HasVCUBEInsts : Predicate<"Subtarget->hasVCUBEInsts()">,
+  AssemblerPredicate<(all_of FeatureVCUBEInsts)>;
+
+def HasVLERPInsts : Predicate<"Subtarget->hasVLERPInsts()">,
+  AssemblerPredicate<(all_of FeatureVLERPInsts)>;
+
+def HasVSADInsts : Predicate<"Subtarget->hasVSADInsts()">,
+  AssemblerPredicate<(all_of FeatureVSADInsts)>;
+
+def HasVQSADInsts : Predicate<"Subtarget->hasVQSADInsts()">,
+  AssemblerPredicate<(all_of FeatureVQSADInsts)>;
+
+def HasVCVTNORMInsts : Predicate<"Subtarget->hasVCVTNORMInsts()">,
+  AssemblerPredicate<(all_of FeatureVCVTNORMInsts)>;
+
+def HasVCVTPKNORMVOP2Insts : Predicate<"Subtarget->hasVCVTPKNORMVOP2Insts()">,
+  AssemblerPredicate<(all_of FeatureVCVTPKNORMVOP2Insts)>;
+
+def HasVCVTPKNORMVOP3Insts : Predicate<"Subtarget->hasVCVTPKNORMVOP3Insts()">,
+  AssemblerPredicate<(all_of FeatureVCVTPKNORMVOP3Insts)>;
+
 def HasFP8E5M3Insts : Predicate<"Subtarget->hasFP8E5M3Insts()">,
   AssemblerPredicate<(all_of FeatureFP8E5M3Insts)>;
 
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index f377b8aaf1333..862cee468b7d3 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -166,6 +166,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool HasMAIInsts = false;
   bool HasFP8Insts = false;
   bool HasFP8ConversionInsts = false;
+  bool HasVCUBEInsts = false;
+  bool HasVLERPInsts = false;
+  bool HasVSADInsts = false;
+  bool HasVQSADInsts = false;
+  bool HasVCVTNORMInsts = false;
+  bool HasVCVTPKNORMVOP2Insts = false;
+  bool HasVCVTPKNORMVOP3Insts = false;
   bool HasFP8E5M3Insts = false;
   bool HasCvtFP8Vop1Bug = false;
   bool HasPkFmacF16Inst = false;
@@ -892,6 +899,20 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
 
   bool hasFP8ConversionInsts() const { return HasFP8ConversionInsts; }
 
+  bool hasVCUBEInsts() const { return HasVCUBEInsts; }
+
+  bool hasVLERPInsts() const { return HasVLERPInsts; }
+
+  bool hasVSADInsts() const { return HasVSADInsts; }
+
+  bool hasVQSADInsts() const { return HasVQSADInsts; }
+
+  bool hasVCVTNORMInsts() const { return HasVCVTNORMInsts; }
+
+  bool hasVCVTPKNORMVOP2Insts() const { return HasVCVTPKNORMVOP2Insts; }
+
+  bool hasVCVTPKNORMVOP3Insts() const { return HasVCVTPKNORMVOP3Insts; }
+
   bool hasFP8E5M3Insts() const { return HasFP8E5M3Insts; }
 
   bool hasPkFmacF16Inst() const {
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 85adcab55b742..23095ba17cae8 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -616,15 +616,15 @@ let SubtargetPredicate = isGFX9Plus in {
 
   let isReMaterializable = 1 in
   defm V_SAT_PK_U8_I16    : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>;
-
-  let mayRaiseFPException = 0 in {
-  defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16",
-      VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
-  defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16",
-      VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
-  } // End mayRaiseFPException = 0
 } // End SubtargetPredicate = isGFX9Plus
 
+let mayRaiseFPException = 0, SubtargetPredicate = HasVCVTNORMInsts in {
+defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16",
+    VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
+defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16",
+    VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
+} // End mayRaiseFPException = 0, SubtargetPredicate = HasVCVTNORMInsts
+
 let SubtargetPredicate = isGFX9Only in {
   defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>;
 } // End SubtargetPredicate = isGFX9Only
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index d87d250a034f0..afd2d610b17de 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -971,7 +971,7 @@ defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_a
 } // End IsNeverUniform = 1
 defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, any_fldexp>;
 
-let ReadsModeReg = 0, mayRaiseFPException = 0 in {
+let ReadsModeReg = 0, mayRaiseFPException = 0, SubtargetPredicate = HasVCVTPKNORMVOP2Insts in {
 defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>;
 defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_u16_f32>;
 }
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 05ba76ab489d8..3d82866c1e5a7 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -185,7 +185,8 @@ defm V_FMA_LEGACY_F32 : VOP3Inst <"v_fma_legacy_f32",
 defm V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
 defm V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
 defm V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, any_fma>, VOPD_Component<0x13, "v_fma_f32">;
-defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;
+let SubtargetPredicate = HasVLERPInsts in
+  defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;
 
 let SchedRW = [WriteIntMul] in {
   let SubtargetPredicate = HasMadU32Inst in
@@ -258,12 +259,12 @@ defm V_DIV_FMAS_F64 : VOP3Inst <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC>;
 } // End isCommutable = 1
 
 let isReMaterializable = 1 in {
-let mayRaiseFPException = 0 in {
+let mayRaiseFPException = 0, SubtargetPredicate = HasVCUBEInsts in {
 defm V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubeid>;
 defm V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubesc>;
 defm V_CUBETC_F32 : VOP3Inst <"v_cubetc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubetc>;
 defm V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubema>;
-} // End mayRaiseFPException
+} // mayRaiseFPException = 0, SubtargetPredicate = HasVCUBEInsts
 
 defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>;
 defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>;
@@ -306,12 +307,12 @@ let SubtargetPredicate = HasMinimum3Maximum3F32, ReadsModeReg = 0 in {
   defm V_MAXIMUM3_F32 : VOP3Inst <"v_maximum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmaximum3>;
 } // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
 
-let isCommutable = 1 in {
+let isCommutable = 1, SubtargetPredicate = HasVSADInsts in {
   defm V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
   defm V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
   defm V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
   defm V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
-} // End isCommutable = 1
+} // End isCommutable = 1, SubtargetPredicate = HasVSADInsts
 defm V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_I32>, int_amdgcn_cvt_pk_u8_f32>;
 
 defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", DIV_FIXUP_F32_PROF, AMDGPUdiv_fixup>;
@@ -424,7 +425,8 @@ def VOPProfileMQSAD : VOP3_Profile<VOP_V4I32_I64_I32_V4I32, VOP3_CLAMP> {
 
 let SubtargetPredicate = isGFX7Plus in {
 let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in {
-defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
+let SubtargetPredicate = HasVQSADInsts in
+  defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
 defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>;
 } // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32]
 } // End SubtargetPredicate = isGFX7Plus
@@ -789,9 +791,6 @@ let isCommutable = 1 in {
   defm V_MAD_I32_I16 : VOP3Inst_t16 <"v_mad_i32_i16", VOP_I32_I16_I16_I32>;
 } // End isCommutable = 1
 
-defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>;
-defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>;
-
 defm V_PACK_B32_F16 : VOP3Inst_t16 <"v_pack_b32_f16", VOP_B32_F16_F16>;
 
 let isReMaterializable = 1 in {
@@ -996,6 +995,11 @@ def : GCNPat<(DivergentBinFrag<or> (or_oneuse i64:$src0, i64:$src1), i64:$src2),
 
 } // End SubtargetPredicate = isGFX9Plus
 
+let SubtargetPredicate = HasVCVTPKNORMVOP3Insts in {
+  defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>;
+  defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>;
+} // end SubtargetPredicate = HasVCVTPKNORMVOP3Insts
+
 // FIXME: Probably should hardcode clamp bit in pseudo and avoid this.
 class OpSelBinOpClampPat<SDPatternOperator node,
                          Instruction inst> : GCNPat<
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll
index 43c69baaf3e7f..49169eec072b6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll
@@ -1,4 +1,3 @@
-; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GCN %s
 
 declare i32 @llvm.amdgcn.lerp(i32, i32, i32) #0

>From 33276544f6cd6a96f783bf66dfee81d26a3e8b96 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Fri, 14 Nov 2025 09:45:23 +0800
Subject: [PATCH 02/10] fix comments

---
 llvm/lib/Target/AMDGPU/AMDGPU.td           | 124 ++++++++++-----------
 llvm/lib/Target/AMDGPU/GCNSubtarget.h      |  28 ++---
 llvm/lib/Target/AMDGPU/VOP1Instructions.td |   4 +-
 llvm/lib/Target/AMDGPU/VOP2Instructions.td |   2 +-
 llvm/lib/Target/AMDGPU/VOP3Instructions.td |  16 +--
 5 files changed, 87 insertions(+), 87 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index fe2a192f0f372..c5d63e5000767 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -901,46 +901,46 @@ def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
   "Has v_pk_fmac_f16 instruction"
 >;
 
-def FeatureVCUBEInsts : SubtargetFeature<"V_CUBE-Insts",
-  "HasVCUBEInsts",
+def FeatureCubeInsts : SubtargetFeature<"V_CUBE-Insts",
+  "HasCubeInsts",
   "true",
   "Has V_CUBE* instructions"
 >; 
 
-def FeatureVLERPInsts : SubtargetFeature<"V_LERP-insts",
-  "HasVLERPInsts",
+def FeatureLerpInst : SubtargetFeature<"V_LERP-insts",
+  "HasLerpInst",
   "true",
-  "Has V_LERP* instructions"
+  "Has v_lerp_u8 instruction"
 >; 
 
-def FeatureVSADInsts : SubtargetFeature<"V_SAD-insts",
-  "HasVSADInsts",
+def FeatureSadInsts : SubtargetFeature<"V_SAD-insts",
+  "HasSadInsts",
   "true",
   "Has V_SAD* instructions"
 >; 
 
-def FeatureVQSADInsts : SubtargetFeature<"V_QSAD-insts",
-  "HasVQSADInsts",
+def FeatureQsadInsts : SubtargetFeature<"V_QSAD-insts",
+  "HasQsadInsts",
   "true",
   "Has V_QSAD* instructions"
 >; 
 
-def FeatureVCVTNORMInsts : SubtargetFeature<"V_CVT_NORM-insts",
-  "HasVCVTNORMInsts",
+def FeatureCvtNormInsts : SubtargetFeature<"V_CVT_NORM-insts",
+  "HasCvtNormInsts",
   "true",
   "Has V_CVT_NORM* instructions"
 >;
 
-def FeatureVCVTPKNORMVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts",
-  "HasVCVTPKNORMVOP2Insts",
+def FeatureCvtPkNormVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts",
+  "HasCvtPkNormVOP2Insts",
   "true",
-  "Has V_CVT_NORM* VOP2 instructions"
+  "Has V_CVT_PK_NORM_*_F32 instructions/Has V_CVT_PK_NORM_*_F16 instructions"
 >;
 
-def FeatureVCVTPKNORMVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts",
-  "HasVCVTPKNORMVOP3Insts",
+def FeatureCvtPkNormVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts",
+  "HasCvtPkNormVOP3Insts",
   "true",
-  "Has V_CVT_NORM* VOP3 instructions"
+  "Has V_CVT_PK_NORM_*_F32 instructions/Has V_CVT_PK_NORM_*_F16 instructions"
 >;
 
 def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts",
@@ -1536,8 +1536,8 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
   FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts,
   FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
   FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
-  FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts,
-  FeatureVSADInsts, FeatureVCVTPKNORMVOP2Insts
+  FeatureVmemWriteVgprInOrder, FeatureCubeInsts, FeatureLerpInst,
+  FeatureSadInsts, FeatureCvtPkNormVOP2Insts
   ]
 >;
 
@@ -1551,8 +1551,8 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
   FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
   FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
   FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
-  FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts,
-  FeatureVSADInsts, FeatureVQSADInsts, FeatureVCVTPKNORMVOP2Insts
+  FeatureVmemWriteVgprInOrder, FeatureCubeInsts, FeatureLerpInst,
+  FeatureSadInsts, FeatureQsadInsts, FeatureCvtPkNormVOP2Insts
   ]
 >;
 
@@ -1568,9 +1568,9 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
    FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
    FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32,
    FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS,
-   FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, 
-   FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts, 
-   FeatureVCVTPKNORMVOP2Insts
+   FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureCubeInsts, 
+   FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts, 
+   FeatureCvtPkNormVOP2Insts
   ]
 >;
 
@@ -1590,9 +1590,9 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
    FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess,
    FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS,
    FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad,
-   FeatureVCUBEInsts, FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts, 
-   FeatureVCVTNORMInsts, FeatureVCVTPKNORMVOP2Insts, 
-   FeatureVCVTPKNORMVOP3Insts
+   FeatureCubeInsts, FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts, 
+   FeatureCvtNormInsts, FeatureCvtPkNormVOP2Insts, 
+   FeatureCvtPkNormVOP3Insts
   ]
 >;
 
@@ -1616,10 +1616,10 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
    FeatureDefaultComponentZero, FeatureMaxHardClauseLength63,
    FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
    FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
-   FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad, FeatureVCUBEInsts, 
-   FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts, 
-   FeatureVCVTNORMInsts, FeatureVCVTPKNORMVOP2Insts, 
-   FeatureVCVTPKNORMVOP3Insts
+   FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad, FeatureCubeInsts, 
+   FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts, 
+   FeatureCvtNormInsts, FeatureCvtPkNormVOP2Insts, 
+   FeatureCvtPkNormVOP3Insts
   ]
 >;
 
@@ -1642,9 +1642,9 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11",
    FeatureUnalignedDSAccess, FeatureGDS, FeatureGWS,
    FeatureDefaultComponentZero, FeatureMaxHardClauseLength32,
    FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
-   FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts,
-   FeatureVSADInsts, FeatureVQSADInsts, FeatureVCVTNORMInsts, 
-   FeatureVCVTPKNORMVOP2Insts, FeatureVCVTPKNORMVOP3Insts
+   FeatureVmemWriteVgprInOrder, FeatureCubeInsts, FeatureLerpInst,
+   FeatureSadInsts, FeatureQsadInsts, FeatureCvtNormInsts, 
+   FeatureCvtPkNormVOP2Insts, FeatureCvtPkNormVOP3Insts
   ]
 >;
 
@@ -2124,13 +2124,13 @@ def FeatureISAVersion12 : FeatureSet<
    FeatureBVHDualAndBVH8Insts,
    FeatureWaitsBeforeSystemScopeStores,
    FeatureD16Writes32BitVgpr,
-   FeatureVCUBEInsts, 
-   FeatureVLERPInsts, 
-   FeatureVSADInsts,
-   FeatureVQSADInsts, 
-   FeatureVCVTNORMInsts, 
-   FeatureVCVTPKNORMVOP2Insts, 
-   FeatureVCVTPKNORMVOP3Insts
+   FeatureCubeInsts, 
+   FeatureLerpInst, 
+   FeatureSadInsts,
+   FeatureQsadInsts, 
+   FeatureCvtNormInsts, 
+   FeatureCvtPkNormVOP2Insts, 
+   FeatureCvtPkNormVOP3Insts
    ]>;
 
 def FeatureISAVersion12_50_Common : FeatureSet<
@@ -2210,13 +2210,13 @@ def FeatureISAVersion12_50_Common : FeatureSet<
 
 def FeatureISAVersion12_50 : FeatureSet<
   !listconcat(FeatureISAVersion12_50_Common.Features,
-  [FeatureVCUBEInsts, 
-   FeatureVLERPInsts, 
-   FeatureVSADInsts, 
-   FeatureVQSADInsts, 
-   FeatureVCVTNORMInsts, 
-   FeatureVCVTPKNORMVOP2Insts, 
-   FeatureVCVTPKNORMVOP3Insts])>;
+  [FeatureCubeInsts, 
+   FeatureLerpInst, 
+   FeatureSadInsts, 
+   FeatureQsadInsts, 
+   FeatureCvtNormInsts, 
+   FeatureCvtPkNormVOP2Insts, 
+   FeatureCvtPkNormVOP3Insts])>;
 
 def FeatureISAVersion12_51 : FeatureSet<
   !listconcat(FeatureISAVersion12_50.Features,
@@ -2887,26 +2887,26 @@ def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">,
 def HasFP8ConversionInsts : Predicate<"Subtarget->hasFP8ConversionInsts()">,
   AssemblerPredicate<(all_of FeatureFP8ConversionInsts)>;
 
-def HasVCUBEInsts : Predicate<"Subtarget->hasVCUBEInsts()">,
-  AssemblerPredicate<(all_of FeatureVCUBEInsts)>;
+def HasCubeInsts : Predicate<"Subtarget->hasCubeInsts()">,
+  AssemblerPredicate<(all_of FeatureCubeInsts)>;
 
-def HasVLERPInsts : Predicate<"Subtarget->hasVLERPInsts()">,
-  AssemblerPredicate<(all_of FeatureVLERPInsts)>;
+def HasLerpInst : Predicate<"Subtarget->hasLerpInst()">,
+  AssemblerPredicate<(all_of FeatureLerpInst)>;
 
-def HasVSADInsts : Predicate<"Subtarget->hasVSADInsts()">,
-  AssemblerPredicate<(all_of FeatureVSADInsts)>;
+def HasSadInsts : Predicate<"Subtarget->hasSadInsts()">,
+  AssemblerPredicate<(all_of FeatureSadInsts)>;
 
-def HasVQSADInsts : Predicate<"Subtarget->hasVQSADInsts()">,
-  AssemblerPredicate<(all_of FeatureVQSADInsts)>;
+def HasQsadInsts : Predicate<"Subtarget->hasQsadInsts()">,
+  AssemblerPredicate<(all_of FeatureQsadInsts)>;
 
-def HasVCVTNORMInsts : Predicate<"Subtarget->hasVCVTNORMInsts()">,
-  AssemblerPredicate<(all_of FeatureVCVTNORMInsts)>;
+def HasCvtNormInsts : Predicate<"Subtarget->hasCvtNormInsts()">,
+  AssemblerPredicate<(all_of FeatureCvtNormInsts)>;
 
-def HasVCVTPKNORMVOP2Insts : Predicate<"Subtarget->hasVCVTPKNORMVOP2Insts()">,
-  AssemblerPredicate<(all_of FeatureVCVTPKNORMVOP2Insts)>;
+def HasCvtPkNormVOP2Insts : Predicate<"Subtarget->hasCvtPkNormVOP2Insts()">,
+  AssemblerPredicate<(all_of FeatureCvtPkNormVOP2Insts)>;
 
-def HasVCVTPKNORMVOP3Insts : Predicate<"Subtarget->hasVCVTPKNORMVOP3Insts()">,
-  AssemblerPredicate<(all_of FeatureVCVTPKNORMVOP3Insts)>;
+def HasCvtPkNormVOP3Insts : Predicate<"Subtarget->hasCvtPkNormVOP3Insts()">,
+  AssemblerPredicate<(all_of FeatureCvtPkNormVOP3Insts)>;
 
 def HasFP8E5M3Insts : Predicate<"Subtarget->hasFP8E5M3Insts()">,
   AssemblerPredicate<(all_of FeatureFP8E5M3Insts)>;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 862cee468b7d3..85260c4f123c7 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -166,13 +166,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool HasMAIInsts = false;
   bool HasFP8Insts = false;
   bool HasFP8ConversionInsts = false;
-  bool HasVCUBEInsts = false;
-  bool HasVLERPInsts = false;
-  bool HasVSADInsts = false;
-  bool HasVQSADInsts = false;
-  bool HasVCVTNORMInsts = false;
-  bool HasVCVTPKNORMVOP2Insts = false;
-  bool HasVCVTPKNORMVOP3Insts = false;
+  bool HasCubeInsts = false;
+  bool HasLerpInst = false;
+  bool HasSadInsts = false;
+  bool HasQsadInsts = false;
+  bool HasCvtNormInsts = false;
+  bool HasCvtPkNormVOP2Insts = false;
+  bool HasCvtPkNormVOP3Insts = false;
   bool HasFP8E5M3Insts = false;
   bool HasCvtFP8Vop1Bug = false;
   bool HasPkFmacF16Inst = false;
@@ -899,19 +899,19 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
 
   bool hasFP8ConversionInsts() const { return HasFP8ConversionInsts; }
 
-  bool hasVCUBEInsts() const { return HasVCUBEInsts; }
+  bool hasCubeInsts() const { return HasCubeInsts; }
 
-  bool hasVLERPInsts() const { return HasVLERPInsts; }
+  bool hasLerpInst() const { return HasLerpInst; }
 
-  bool hasVSADInsts() const { return HasVSADInsts; }
+  bool hasSadInsts() const { return HasSadInsts; }
 
-  bool hasVQSADInsts() const { return HasVQSADInsts; }
+  bool hasQsadInsts() const { return HasQsadInsts; }
 
-  bool hasVCVTNORMInsts() const { return HasVCVTNORMInsts; }
+  bool hasCvtNormInsts() const { return HasCvtNormInsts; }
 
-  bool hasVCVTPKNORMVOP2Insts() const { return HasVCVTPKNORMVOP2Insts; }
+  bool hasCvtPkNormVOP2Insts() const { return HasCvtPkNormVOP2Insts; }
 
-  bool hasVCVTPKNORMVOP3Insts() const { return HasVCVTPKNORMVOP3Insts; }
+  bool hasCvtPkNormVOP3Insts() const { return HasCvtPkNormVOP3Insts; }
 
   bool hasFP8E5M3Insts() const { return HasFP8E5M3Insts; }
 
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 23095ba17cae8..1d1e95908fce6 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -618,12 +618,12 @@ let SubtargetPredicate = isGFX9Plus in {
   defm V_SAT_PK_U8_I16    : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>;
 } // End SubtargetPredicate = isGFX9Plus
 
-let mayRaiseFPException = 0, SubtargetPredicate = HasVCVTNORMInsts in {
+let mayRaiseFPException = 0, SubtargetPredicate = HasCvtNormInsts in {
 defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16",
     VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
 defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16",
     VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
-} // End mayRaiseFPException = 0, SubtargetPredicate = HasVCVTNORMInsts
+} // End mayRaiseFPException = 0, SubtargetPredicate = HasCvtNormInsts
 
 let SubtargetPredicate = isGFX9Only in {
   defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>;
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index afd2d610b17de..dbb7862ab4ab5 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -971,7 +971,7 @@ defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_a
 } // End IsNeverUniform = 1
 defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, any_fldexp>;
 
-let ReadsModeReg = 0, mayRaiseFPException = 0, SubtargetPredicate = HasVCVTPKNORMVOP2Insts in {
+let ReadsModeReg = 0, mayRaiseFPException = 0, SubtargetPredicate = HasCvtPkNormVOP2Insts in {
 defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>;
 defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_u16_f32>;
 }
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 3d82866c1e5a7..872bde501cd2d 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -185,7 +185,7 @@ defm V_FMA_LEGACY_F32 : VOP3Inst <"v_fma_legacy_f32",
 defm V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
 defm V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
 defm V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, any_fma>, VOPD_Component<0x13, "v_fma_f32">;
-let SubtargetPredicate = HasVLERPInsts in
+let SubtargetPredicate = HasLerpInst in
   defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;
 
 let SchedRW = [WriteIntMul] in {
@@ -259,12 +259,12 @@ defm V_DIV_FMAS_F64 : VOP3Inst <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC>;
 } // End isCommutable = 1
 
 let isReMaterializable = 1 in {
-let mayRaiseFPException = 0, SubtargetPredicate = HasVCUBEInsts in {
+let mayRaiseFPException = 0, SubtargetPredicate = HasCubeInsts in {
 defm V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubeid>;
 defm V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubesc>;
 defm V_CUBETC_F32 : VOP3Inst <"v_cubetc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubetc>;
 defm V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubema>;
-} // mayRaiseFPException = 0, SubtargetPredicate = HasVCUBEInsts
+} // mayRaiseFPException = 0, SubtargetPredicate = HasCubeInsts
 
 defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>;
 defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>;
@@ -307,12 +307,12 @@ let SubtargetPredicate = HasMinimum3Maximum3F32, ReadsModeReg = 0 in {
   defm V_MAXIMUM3_F32 : VOP3Inst <"v_maximum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmaximum3>;
 } // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
 
-let isCommutable = 1, SubtargetPredicate = HasVSADInsts in {
+let isCommutable = 1, SubtargetPredicate = HasSadInsts in {
   defm V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
   defm V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
   defm V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
   defm V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
-} // End isCommutable = 1, SubtargetPredicate = HasVSADInsts
+} // End isCommutable = 1, SubtargetPredicate = HasSadInsts
 defm V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_I32>, int_amdgcn_cvt_pk_u8_f32>;
 
 defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", DIV_FIXUP_F32_PROF, AMDGPUdiv_fixup>;
@@ -425,7 +425,7 @@ def VOPProfileMQSAD : VOP3_Profile<VOP_V4I32_I64_I32_V4I32, VOP3_CLAMP> {
 
 let SubtargetPredicate = isGFX7Plus in {
 let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in {
-let SubtargetPredicate = HasVQSADInsts in
+let SubtargetPredicate = HasQsadInsts in
   defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
 defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>;
 } // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32]
@@ -995,10 +995,10 @@ def : GCNPat<(DivergentBinFrag<or> (or_oneuse i64:$src0, i64:$src1), i64:$src2),
 
 } // End SubtargetPredicate = isGFX9Plus
 
-let SubtargetPredicate = HasVCVTPKNORMVOP3Insts in {
+let SubtargetPredicate = HasCvtPkNormVOP3Insts in {
   defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>;
   defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>;
-} // end SubtargetPredicate = HasVCVTPKNORMVOP3Insts
+} // end SubtargetPredicate = HasCvtPkNormVOP3Insts
 
 // FIXME: Probably should hardcode clamp bit in pseudo and avoid this.
 class OpSelBinOpClampPat<SDPatternOperator node,

>From cd83519edfef4bc692b656b7c1a10cbb5727f17c Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 18 Nov 2025 10:59:53 +0800
Subject: [PATCH 03/10] add feature to builtins

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def | 22 ++++++++++----------
 llvm/lib/Target/AMDGPU/AMDGPU.td             |  4 ++--
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 2b6fcb1fd479b..0dfa9c13792cf 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -122,13 +122,13 @@ BUILTIN(__builtin_amdgcn_frexp_exp, "id", "nc")
 BUILTIN(__builtin_amdgcn_frexp_expf, "if", "nc")
 BUILTIN(__builtin_amdgcn_fract, "dd", "nc")
 BUILTIN(__builtin_amdgcn_fractf, "ff", "nc")
-BUILTIN(__builtin_amdgcn_lerp, "UiUiUiUi", "nc")
+TARGET_BUILTIN(__builtin_amdgcn_lerp, "UiUiUiUi", "nc", "HasLerpInst")
 BUILTIN(__builtin_amdgcn_class, "bdi", "nc")
 BUILTIN(__builtin_amdgcn_classf, "bfi", "nc")
-BUILTIN(__builtin_amdgcn_cubeid, "ffff", "nc")
-BUILTIN(__builtin_amdgcn_cubesc, "ffff", "nc")
-BUILTIN(__builtin_amdgcn_cubetc, "ffff", "nc")
-BUILTIN(__builtin_amdgcn_cubema, "ffff", "nc")
+TARGET_BUILTIN(__builtin_amdgcn_cubeid, "ffff", "nc", "HasCubeInsts")
+TARGET_BUILTIN(__builtin_amdgcn_cubesc, "ffff", "nc", "HasCubeInsts")
+TARGET_BUILTIN(__builtin_amdgcn_cubetc, "ffff", "nc", "HasCubeInsts")
+TARGET_BUILTIN(__builtin_amdgcn_cubema, "ffff", "nc", "HasCubeInsts")
 BUILTIN(__builtin_amdgcn_s_sleep, "vIi", "n")
 BUILTIN(__builtin_amdgcn_s_incperflevel, "vIi", "n")
 BUILTIN(__builtin_amdgcn_s_decperflevel, "vIi", "n")
@@ -149,17 +149,17 @@ BUILTIN(__builtin_amdgcn_alignbyte, "UiUiUiUi", "nc")
 BUILTIN(__builtin_amdgcn_ubfe, "UiUiUiUi", "nc")
 BUILTIN(__builtin_amdgcn_sbfe, "UiUiUiUi", "nc")
 BUILTIN(__builtin_amdgcn_cvt_pkrtz, "E2hff", "nc")
-BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc")
-BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc", "HasCvtPkNormVOP2Insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc", "HasCvtPkNormVOP2Insts")
 BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc")
 BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc")
 BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc")
 BUILTIN(__builtin_amdgcn_cvt_off_f32_i4, "fi", "nc")
-BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc")
+TARGET_BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc", "HasSadInsts")
 BUILTIN(__builtin_amdgcn_msad_u8, "UiUiUiUi", "nc")
-BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc")
-BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc")
-BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
+TARGET_BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc", "HasSadInsts")
+TARGET_BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc", "HasSadInsts")
+TARGET_BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc", "HasQsadInsts")
 BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
 BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui", "nc")
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index c5d63e5000767..cd8327563d9d6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -934,13 +934,13 @@ def FeatureCvtNormInsts : SubtargetFeature<"V_CVT_NORM-insts",
 def FeatureCvtPkNormVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts",
   "HasCvtPkNormVOP2Insts",
   "true",
-  "Has V_CVT_PK_NORM_*_F32 instructions/Has V_CVT_PK_NORM_*_F16 instructions"
+  "Has V_CVT_PK_NORM_* instructions/Has V_CVT_PK_NORM_*_F16 instructions"
 >;
 
 def FeatureCvtPkNormVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts",
   "HasCvtPkNormVOP3Insts",
   "true",
-  "Has V_CVT_PK_NORM_*_F32 instructions/Has V_CVT_PK_NORM_*_F16 instructions"
+  "Has V_CVT_PK_NORM_* instructions/Has V_CVT_PK_NORM_*_F16 instructions"
 >;
 
 def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts",

>From 15e6eece5e39bd0c8fb1d1eb6e36151b0f45fdb7 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 18 Nov 2025 13:17:14 +0800
Subject: [PATCH 04/10] fix builtin features

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def  | 22 ++---
 .../CodeGenOpenCL/builtins-amdgcn-fiji.cl     | 86 +++++++++++++++++++
 clang/test/CodeGenOpenCL/builtins-amdgcn.cl   | 67 ---------------
 llvm/lib/Target/AMDGPU/AMDGPU.td              | 14 +--
 llvm/lib/TargetParser/TargetParser.cpp        | 50 ++++++++++-
 5 files changed, 153 insertions(+), 86 deletions(-)
 create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-fiji.cl

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 0dfa9c13792cf..c349cdc6aef5b 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -122,13 +122,13 @@ BUILTIN(__builtin_amdgcn_frexp_exp, "id", "nc")
 BUILTIN(__builtin_amdgcn_frexp_expf, "if", "nc")
 BUILTIN(__builtin_amdgcn_fract, "dd", "nc")
 BUILTIN(__builtin_amdgcn_fractf, "ff", "nc")
-TARGET_BUILTIN(__builtin_amdgcn_lerp, "UiUiUiUi", "nc", "HasLerpInst")
+TARGET_BUILTIN(__builtin_amdgcn_lerp, "UiUiUiUi", "nc", "lerp-inst")
 BUILTIN(__builtin_amdgcn_class, "bdi", "nc")
 BUILTIN(__builtin_amdgcn_classf, "bfi", "nc")
-TARGET_BUILTIN(__builtin_amdgcn_cubeid, "ffff", "nc", "HasCubeInsts")
-TARGET_BUILTIN(__builtin_amdgcn_cubesc, "ffff", "nc", "HasCubeInsts")
-TARGET_BUILTIN(__builtin_amdgcn_cubetc, "ffff", "nc", "HasCubeInsts")
-TARGET_BUILTIN(__builtin_amdgcn_cubema, "ffff", "nc", "HasCubeInsts")
+TARGET_BUILTIN(__builtin_amdgcn_cubeid, "ffff", "nc", "cube-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cubesc, "ffff", "nc", "cube-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cubetc, "ffff", "nc", "cube-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cubema, "ffff", "nc", "cube-insts")
 BUILTIN(__builtin_amdgcn_s_sleep, "vIi", "n")
 BUILTIN(__builtin_amdgcn_s_incperflevel, "vIi", "n")
 BUILTIN(__builtin_amdgcn_s_decperflevel, "vIi", "n")
@@ -149,17 +149,17 @@ BUILTIN(__builtin_amdgcn_alignbyte, "UiUiUiUi", "nc")
 BUILTIN(__builtin_amdgcn_ubfe, "UiUiUiUi", "nc")
 BUILTIN(__builtin_amdgcn_sbfe, "UiUiUiUi", "nc")
 BUILTIN(__builtin_amdgcn_cvt_pkrtz, "E2hff", "nc")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc", "HasCvtPkNormVOP2Insts")
-TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc", "HasCvtPkNormVOP2Insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc", "cvt-pknorm-vop2-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc", "cvt-pknorm-vop2-insts")
 BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc")
 BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc")
 BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc")
 BUILTIN(__builtin_amdgcn_cvt_off_f32_i4, "fi", "nc")
-TARGET_BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc", "HasSadInsts")
+TARGET_BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc", "sad-insts")
 BUILTIN(__builtin_amdgcn_msad_u8, "UiUiUiUi", "nc")
-TARGET_BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc", "HasSadInsts")
-TARGET_BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc", "HasSadInsts")
-TARGET_BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc", "HasQsadInsts")
+TARGET_BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc", "sad-insts")
+TARGET_BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc", "sad-insts")
+TARGET_BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc", "qsad-insts")
 BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
 BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui", "nc")
 
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fiji.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fiji.cl
new file mode 100644
index 0000000000000..2178718f90d5a
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fiji.cl
@@ -0,0 +1,86 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu fiji -emit-llvm -o - %s | FileCheck -enable-var-scope --check-prefixes=CHECK %s
+
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+typedef unsigned long ulong;
+typedef unsigned int uint;
+typedef unsigned short ushort;
+typedef half __attribute__((ext_vector_type(2))) half2;
+typedef short __attribute__((ext_vector_type(2))) short2;
+typedef ushort __attribute__((ext_vector_type(2))) ushort2;
+typedef uint __attribute__((ext_vector_type(4))) uint4;
+
+// CHECK-LABEL: @test_lerp
+// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.lerp
+void test_lerp(global int* out, int a, int b, int c)
+{
+  *out = __builtin_amdgcn_lerp(a, b, c);
+}
+
+// CHECK-LABEL: @test_cubeid(
+// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubeid(float %a, float %b, float %c)
+void test_cubeid(global float* out, float a, float b, float c) {
+  *out = __builtin_amdgcn_cubeid(a, b, c);
+}
+
+// CHECK-LABEL: @test_cubesc(
+// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubesc(float %a, float %b, float %c)
+void test_cubesc(global float* out, float a, float b, float c) {
+  *out = __builtin_amdgcn_cubesc(a, b, c);
+}
+
+// CHECK-LABEL: @test_cubetc(
+// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubetc(float %a, float %b, float %c)
+void test_cubetc(global float* out, float a, float b, float c) {
+  *out = __builtin_amdgcn_cubetc(a, b, c);
+}
+
+// CHECK-LABEL: @test_cubema(
+// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubema(float %a, float %b, float %c)
+void test_cubema(global float* out, float a, float b, float c) {
+  *out = __builtin_amdgcn_cubema(a, b, c);
+}
+
+// CHECK-LABEL: @test_cvt_pknorm_i16(
+// CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %src0, float %src1)
+kernel void test_cvt_pknorm_i16(global short2* out, float src0, float src1) {
+  *out = __builtin_amdgcn_cvt_pknorm_i16(src0, src1);
+}
+
+// CHECK-LABEL: @test_cvt_pknorm_u16(
+// CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %src0, float %src1)
+kernel void test_cvt_pknorm_u16(global ushort2* out, float src0, float src1) {
+  *out = __builtin_amdgcn_cvt_pknorm_u16(src0, src1);
+}
+
+// CHECK-LABEL: @test_sad_u8(
+// CHECK: tail call{{.*}} i32 @llvm.amdgcn.sad.u8(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_sad_u8(global uint* out, uint src0, uint src1, uint src2) {
+  *out = __builtin_amdgcn_sad_u8(src0, src1, src2);
+}
+
+// CHECK-LABEL: test_msad_u8(
+// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.msad.u8(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_msad_u8(global uint* out, uint src0, uint src1, uint src2) {
+  *out = __builtin_amdgcn_msad_u8(src0, src1, src2);
+}
+
+// CHECK-LABEL: test_sad_hi_u8(
+// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.sad.hi.u8(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_sad_hi_u8(global uint* out, uint src0, uint src1, uint src2) {
+  *out = __builtin_amdgcn_sad_hi_u8(src0, src1, src2);
+}
+
+// CHECK-LABEL: @test_sad_u16(
+// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.sad.u16(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_sad_u16(global uint* out, uint src0, uint src1, uint src2) {
+  *out = __builtin_amdgcn_sad_u16(src0, src1, src2);
+}
+
+// CHECK-LABEL: @test_qsad_pk_u16_u8(
+// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src0, i32 %src1, i64 %src2)
+kernel void test_qsad_pk_u16_u8(global ulong* out, ulong src0, uint src1, ulong src2) {
+  *out = __builtin_amdgcn_qsad_pk_u16_u8(src0, src1, src2);
+}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index ab0b0b936abdc..b92454de60c78 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -251,13 +251,6 @@ void test_fract_f64(global int* out, double a)
   *out = __builtin_amdgcn_fract(a);
 }
 
-// CHECK-LABEL: @test_lerp
-// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.lerp
-void test_lerp(global int* out, int a, int b, int c)
-{
-  *out = __builtin_amdgcn_lerp(a, b, c);
-}
-
 // CHECK-LABEL: @test_sicmp_i32
 // CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 32)
 void test_sicmp_i32(global ulong* out, int a, int b)
@@ -865,30 +858,6 @@ void test_s_setprio()
   __builtin_amdgcn_s_setprio(3);
 }
 
-// CHECK-LABEL: @test_cubeid(
-// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubeid(float %a, float %b, float %c)
-void test_cubeid(global float* out, float a, float b, float c) {
-  *out = __builtin_amdgcn_cubeid(a, b, c);
-}
-
-// CHECK-LABEL: @test_cubesc(
-// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubesc(float %a, float %b, float %c)
-void test_cubesc(global float* out, float a, float b, float c) {
-  *out = __builtin_amdgcn_cubesc(a, b, c);
-}
-
-// CHECK-LABEL: @test_cubetc(
-// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubetc(float %a, float %b, float %c)
-void test_cubetc(global float* out, float a, float b, float c) {
-  *out = __builtin_amdgcn_cubetc(a, b, c);
-}
-
-// CHECK-LABEL: @test_cubema(
-// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.cubema(float %a, float %b, float %c)
-void test_cubema(global float* out, float a, float b, float c) {
-  *out = __builtin_amdgcn_cubema(a, b, c);
-}
-
 // CHECK-LABEL: @test_read_exec(
 // CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.ballot.i64(i1 true)
 void test_read_exec(global ulong* out) {
@@ -1139,18 +1108,6 @@ kernel void test_cvt_pkrtz(global half2* out, float src0, float src1) {
   *out = __builtin_amdgcn_cvt_pkrtz(src0, src1);
 }
 
-// CHECK-LABEL: @test_cvt_pknorm_i16(
-// CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %src0, float %src1)
-kernel void test_cvt_pknorm_i16(global short2* out, float src0, float src1) {
-  *out = __builtin_amdgcn_cvt_pknorm_i16(src0, src1);
-}
-
-// CHECK-LABEL: @test_cvt_pknorm_u16(
-// CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %src0, float %src1)
-kernel void test_cvt_pknorm_u16(global ushort2* out, float src0, float src1) {
-  *out = __builtin_amdgcn_cvt_pknorm_u16(src0, src1);
-}
-
 // CHECK-LABEL: @test_cvt_pk_i16(
 // CHECK: tail call{{.*}} <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %src0, i32 %src1)
 kernel void test_cvt_pk_i16(global short2* out, int src0, int src1) {
@@ -1169,36 +1126,12 @@ kernel void test_cvt_pk_u8_f32(global uint* out, float src0, uint src1, uint src
   *out = __builtin_amdgcn_cvt_pk_u8_f32(src0, src1, src2);
 }
 
-// CHECK-LABEL: @test_sad_u8(
-// CHECK: tail call{{.*}} i32 @llvm.amdgcn.sad.u8(i32 %src0, i32 %src1, i32 %src2)
-kernel void test_sad_u8(global uint* out, uint src0, uint src1, uint src2) {
-  *out = __builtin_amdgcn_sad_u8(src0, src1, src2);
-}
-
 // CHECK-LABEL: test_msad_u8(
 // CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.msad.u8(i32 %src0, i32 %src1, i32 %src2)
 kernel void test_msad_u8(global uint* out, uint src0, uint src1, uint src2) {
   *out = __builtin_amdgcn_msad_u8(src0, src1, src2);
 }
 
-// CHECK-LABEL: test_sad_hi_u8(
-// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.sad.hi.u8(i32 %src0, i32 %src1, i32 %src2)
-kernel void test_sad_hi_u8(global uint* out, uint src0, uint src1, uint src2) {
-  *out = __builtin_amdgcn_sad_hi_u8(src0, src1, src2);
-}
-
-// CHECK-LABEL: @test_sad_u16(
-// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.sad.u16(i32 %src0, i32 %src1, i32 %src2)
-kernel void test_sad_u16(global uint* out, uint src0, uint src1, uint src2) {
-  *out = __builtin_amdgcn_sad_u16(src0, src1, src2);
-}
-
-// CHECK-LABEL: @test_qsad_pk_u16_u8(
-// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src0, i32 %src1, i64 %src2)
-kernel void test_qsad_pk_u16_u8(global ulong* out, ulong src0, uint src1, ulong src2) {
-  *out = __builtin_amdgcn_qsad_pk_u16_u8(src0, src1, src2);
-}
-
 // CHECK-LABEL: @test_mqsad_pk_u16_u8(
 // CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src0, i32 %src1, i64 %src2)
 kernel void test_mqsad_pk_u16_u8(global ulong* out, ulong src0, uint src1, ulong src2) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index cd8327563d9d6..b2bbd3ba6c5ca 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -901,43 +901,43 @@ def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
   "Has v_pk_fmac_f16 instruction"
 >;
 
-def FeatureCubeInsts : SubtargetFeature<"V_CUBE-Insts",
+def FeatureCubeInsts : SubtargetFeature<"cube-insts",
   "HasCubeInsts",
   "true",
   "Has V_CUBE* instructions"
 >; 
 
-def FeatureLerpInst : SubtargetFeature<"V_LERP-insts",
+def FeatureLerpInst : SubtargetFeature<"lerp-inst",
   "HasLerpInst",
   "true",
   "Has v_lerp_u8 instruction"
 >; 
 
-def FeatureSadInsts : SubtargetFeature<"V_SAD-insts",
+def FeatureSadInsts : SubtargetFeature<"sad-insts",
   "HasSadInsts",
   "true",
   "Has V_SAD* instructions"
 >; 
 
-def FeatureQsadInsts : SubtargetFeature<"V_QSAD-insts",
+def FeatureQsadInsts : SubtargetFeature<"qsad-insts",
   "HasQsadInsts",
   "true",
   "Has V_QSAD* instructions"
 >; 
 
-def FeatureCvtNormInsts : SubtargetFeature<"V_CVT_NORM-insts",
+def FeatureCvtNormInsts : SubtargetFeature<"cvt-norm-insts",
   "HasCvtNormInsts",
   "true",
   "Has V_CVT_NORM* instructions"
 >;
 
-def FeatureCvtPkNormVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts",
+def FeatureCvtPkNormVOP2Insts : SubtargetFeature<"cvt-pknorm-vop2-insts",
   "HasCvtPkNormVOP2Insts",
   "true",
   "Has V_CVT_PK_NORM_* instructions/Has V_CVT_PK_NORM_*_F16 instructions"
 >;
 
-def FeatureCvtPkNormVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts",
+def FeatureCvtPkNormVOP3Insts : SubtargetFeature<"cvt-pknorm-vop3-insts",
   "HasCvtPkNormVOP3Insts",
   "true",
   "Has V_CVT_PK_NORM_* instructions/Has V_CVT_PK_NORM_*_F16 instructions"
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 96bef0e574a45..9a9e76d581432 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -447,6 +447,12 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
     Features["atomic-fmin-fmax-global-f64"] = true;
     Features["wavefrontsize32"] = true;
     Features["clusters"] = true;
+    Features["cube-insts"] = true;
+    Features["lerp-inst"] = true;
+    ;
+    Features["sad-insts"] = true;
+    Features["qsad-insts"] = true;
+    Features["cvt-pknorm-vop2-insts"] = true;
     break;
   case GK_GFX1201:
   case GK_GFX1200:
@@ -474,6 +480,12 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
     Features["gfx12-insts"] = true;
     Features["atomic-fadd-rtn-insts"] = true;
     Features["image-insts"] = true;
+    Features["cube-insts"] = true;
+    Features["lerp-inst"] = true;
+    ;
+    Features["sad-insts"] = true;
+    Features["qsad-insts"] = true;
+    Features["cvt-pknorm-vop2-insts"] = true;
     Features["fp8-conversion-insts"] = true;
     Features["atomic-fmin-fmax-global-f32"] = true;
     break;
@@ -503,6 +515,12 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
     Features["gfx11-insts"] = true;
     Features["atomic-fadd-rtn-insts"] = true;
     Features["image-insts"] = true;
+    Features["cube-insts"] = true;
+    Features["lerp-inst"] = true;
+    ;
+    Features["sad-insts"] = true;
+    Features["qsad-insts"] = true;
+    Features["cvt-pknorm-vop2-insts"] = true;
     Features["gws"] = true;
     Features["atomic-fmin-fmax-global-f32"] = true;
     break;
@@ -562,6 +580,12 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
     Features["vmem-to-lds-load-insts"] = true;
     Features["atomic-fmin-fmax-global-f32"] = true;
     Features["atomic-fmin-fmax-global-f64"] = true;
+    Features["cube-insts"] = true;
+    Features["lerp-inst"] = true;
+    ;
+    Features["sad-insts"] = true;
+    Features["qsad-insts"] = true;
+    Features["cvt-pknorm-vop2-insts"] = true;
     break;
   case GK_GFX950:
     Features["bitop3-insts"] = true;
@@ -615,6 +639,12 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
     Features["vmem-to-lds-load-insts"] = true;
     Features["atomic-fmin-fmax-global-f64"] = true;
     Features["wavefrontsize64"] = true;
+    Features["cube-insts"] = true;
+    Features["lerp-inst"] = true;
+    ;
+    Features["sad-insts"] = true;
+    Features["qsad-insts"] = true;
+    Features["cvt-pknorm-vop2-insts"] = true;
     break;
   case GK_GFX90A:
     Features["gfx90a-insts"] = true;
@@ -659,6 +689,12 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
     Features["s-memtime-inst"] = true;
     Features["gws"] = true;
     Features["wavefrontsize64"] = true;
+    Features["cube-insts"] = true;
+    Features["lerp-inst"] = true;
+    ;
+    Features["sad-insts"] = true;
+    Features["qsad-insts"] = true;
+    Features["cvt-pknorm-vop2-insts"] = true;
     break;
   case GK_GFX705:
   case GK_GFX704:
@@ -667,16 +703,28 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
   case GK_GFX701:
   case GK_GFX700:
     Features["ci-insts"] = true;
-    [[fallthrough]];
+    Features["cube-insts"] = true;
+    Features["lerp-inst"] = true;
+    ;
+    Features["sad-insts"] = true;
+    Features["qsad-insts"] = true;
+    Features["cvt-pknorm-vop2-insts"] = true;
+    break;
   case GK_GFX602:
   case GK_GFX601:
   case GK_GFX600:
+    Features["ci-insts"] = true;
     Features["image-insts"] = true;
     Features["s-memtime-inst"] = true;
     Features["gws"] = true;
     Features["atomic-fmin-fmax-global-f32"] = true;
     Features["atomic-fmin-fmax-global-f64"] = true;
     Features["wavefrontsize64"] = true;
+    Features["cube-insts"] = true;
+    Features["lerp-inst"] = true;
+    ;
+    Features["sad-insts"] = true;
+    Features["cvt-pknorm-vop2-insts"] = true;
     break;
   case GK_NONE:
     break;

>From c6ac0f1a79e59389fbbb944d84607a015a694a22 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 18 Nov 2025 13:43:18 +0800
Subject: [PATCH 05/10] fix format

---
 llvm/lib/TargetParser/TargetParser.cpp | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 9a9e76d581432..3f28780e2cb04 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -449,7 +449,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
     Features["clusters"] = true;
     Features["cube-insts"] = true;
     Features["lerp-inst"] = true;
-    ;
     Features["sad-insts"] = true;
     Features["qsad-insts"] = true;
     Features["cvt-pknorm-vop2-insts"] = true;
@@ -482,7 +481,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
     Features["image-insts"] = true;
     Features["cube-insts"] = true;
     Features["lerp-inst"] = true;
-    ;
     Features["sad-insts"] = true;
     Features["qsad-insts"] = true;
     Features["cvt-pknorm-vop2-insts"] = true;
@@ -517,7 +515,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
     Features["image-insts"] = true;
     Features["cube-insts"] = true;
     Features["lerp-inst"] = true;
-    ;
     Features["sad-insts"] = true;
     Features["qsad-insts"] = true;
     Features["cvt-pknorm-vop2-insts"] = true;
@@ -553,6 +550,11 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
     Features["vmem-to-lds-load-insts"] = true;
     Features["atomic-fmin-fmax-global-f32"] = true;
     Features["atomic-fmin-fmax-global-f64"] = true;
+    Features["cube-insts"] = true;
+    Features["lerp-inst"] = true;
+    Features["sad-insts"] = true;
+    Features["qsad-insts"] = true;
+    Features["cvt-pknorm-vop2-insts"] = true;
     break;
   case GK_GFX1012:
   case GK_GFX1011:
@@ -582,7 +584,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
     Features["atomic-fmin-fmax-global-f64"] = true;
     Features["cube-insts"] = true;
     Features["lerp-inst"] = true;
-    ;
     Features["sad-insts"] = true;
     Features["qsad-insts"] = true;
     Features["cvt-pknorm-vop2-insts"] = true;
@@ -641,7 +642,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
     Features["wavefrontsize64"] = true;
     Features["cube-insts"] = true;
     Features["lerp-inst"] = true;
-    ;
     Features["sad-insts"] = true;
     Features["qsad-insts"] = true;
     Features["cvt-pknorm-vop2-insts"] = true;
@@ -691,7 +691,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
     Features["wavefrontsize64"] = true;
     Features["cube-insts"] = true;
     Features["lerp-inst"] = true;
-    ;
     Features["sad-insts"] = true;
     Features["qsad-insts"] = true;
     Features["cvt-pknorm-vop2-insts"] = true;
@@ -705,7 +704,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
     Features["ci-insts"] = true;
     Features["cube-insts"] = true;
     Features["lerp-inst"] = true;
-    ;
     Features["sad-insts"] = true;
     Features["qsad-insts"] = true;
     Features["cvt-pknorm-vop2-insts"] = true;
@@ -722,7 +720,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
     Features["wavefrontsize64"] = true;
     Features["cube-insts"] = true;
     Features["lerp-inst"] = true;
-    ;
     Features["sad-insts"] = true;
     Features["cvt-pknorm-vop2-insts"] = true;
     break;

>From a3987b136105dd98ede771b3f17429f4493e19b6 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 18 Nov 2025 14:39:04 +0800
Subject: [PATCH 06/10] fix tests

---
 clang/test/CodeGenOpenCL/amdgpu-features.cl | 98 ++++++++++-----------
 llvm/lib/TargetParser/TargetParser.cpp      |  6 ++
 2 files changed, 55 insertions(+), 49 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index 9bd096f3fcbc7..aae05dd623a63 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -63,55 +63,55 @@
 // NOCPU-WAVE32: "target-features"="+wavefrontsize32"
 // NOCPU-WAVE64: "target-features"="+wavefrontsize64"
 
-// GFX600: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+s-memtime-inst,+wavefrontsize64
-// GFX601: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+s-memtime-inst,+wavefrontsize64
-// GFX602: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+s-memtime-inst,+wavefrontsize64
-// GFX700: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64"
-// GFX701: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64"
-// GFX702: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64"
-// GFX703: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64"
-// GFX704: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64"
-// GFX705: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+s-memtime-inst,+wavefrontsize64"
-// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX802: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX803: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX805: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX810: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX900: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX902: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
-// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX1010: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1011: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1012: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1013: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1030: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1031: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1032: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1033: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1034: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1035: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1036: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
-// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1101: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1102: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1103: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1150: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1151: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1152: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1250: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32"
-// GFX1251: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32"
+// GFX600: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64
+// GFX601: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64
+// GFX602: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+s-memtime-inst,+sad-insts,+wavefrontsize64
+// GFX700: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX701: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX702: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX703: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX704: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX705: "target-features"="+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+lerp-inst,+qsad-insts,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX801: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX802: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX803: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX805: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX810: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX900: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX902: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX904: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX906: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX908: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX909: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,+xf32-insts"
+// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+lerp-inst,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64"
+// GFX1010: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
+// GFX1011: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
+// GFX1012: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
+// GFX1013: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
+// GFX1030: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
+// GFX1031: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
+// GFX1032: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
+// GFX1033: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
+// GFX1034: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
+// GFX1035: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
+// GFX1036: "target-features"="+16-bit-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize32"
+// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
+// GFX1101: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
+// GFX1102: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
+// GFX1103: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
+// GFX1150: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
+// GFX1151: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
+// GFX1152: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
+// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
+// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
+// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
+// GFX1250: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32"
+// GFX1251: "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32"
 
-// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"
+// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize64"
 
 kernel void test() {}
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 3f28780e2cb04..9439fa2b3a0ec 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -707,6 +707,12 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
     Features["sad-insts"] = true;
     Features["qsad-insts"] = true;
     Features["cvt-pknorm-vop2-insts"] = true;
+    Features["image-insts"] = true;
+    Features["s-memtime-inst"] = true;
+    Features["gws"] = true;
+    Features["atomic-fmin-fmax-global-f32"] = true;
+    Features["atomic-fmin-fmax-global-f64"] = true;
+    Features["wavefrontsize64"] = true;
     break;
   case GK_GFX602:
   case GK_GFX601:

>From 2b6be97afae2e4e225715cb7998b38da2bd0600a Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 18 Nov 2025 14:45:04 +0800
Subject: [PATCH 07/10] fix description

---
 llvm/lib/Target/AMDGPU/AMDGPU.td | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index b2bbd3ba6c5ca..5dea64844e64e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -904,7 +904,7 @@ def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
 def FeatureCubeInsts : SubtargetFeature<"cube-insts",
   "HasCubeInsts",
   "true",
-  "Has V_CUBE* instructions"
+  "Has v_cube* instructions"
 >; 
 
 def FeatureLerpInst : SubtargetFeature<"lerp-inst",
@@ -916,31 +916,31 @@ def FeatureLerpInst : SubtargetFeature<"lerp-inst",
 def FeatureSadInsts : SubtargetFeature<"sad-insts",
   "HasSadInsts",
   "true",
-  "Has V_SAD* instructions"
+  "Has v_sad* instructions"
 >; 
 
 def FeatureQsadInsts : SubtargetFeature<"qsad-insts",
   "HasQsadInsts",
   "true",
-  "Has V_QSAD* instructions"
+  "Has v_qsad* instructions"
 >; 
 
 def FeatureCvtNormInsts : SubtargetFeature<"cvt-norm-insts",
   "HasCvtNormInsts",
   "true",
-  "Has V_CVT_NORM* instructions"
+  "Has v_cvt_norm* instructions"
 >;
 
 def FeatureCvtPkNormVOP2Insts : SubtargetFeature<"cvt-pknorm-vop2-insts",
   "HasCvtPkNormVOP2Insts",
   "true",
-  "Has V_CVT_PK_NORM_* instructions/Has V_CVT_PK_NORM_*_F16 instructions"
+  "Has v_cvt_pk_norm_*f32 instructions/Has v_cvt_pk_norm_*_f16 instructions"
 >;
 
 def FeatureCvtPkNormVOP3Insts : SubtargetFeature<"cvt-pknorm-vop3-insts",
   "HasCvtPkNormVOP3Insts",
   "true",
-  "Has V_CVT_PK_NORM_* instructions/Has V_CVT_PK_NORM_*_F16 instructions"
+  "Has v_cvt_pk_norm_*f32 instructions/Has v_cvt_pk_norm_*_f16 instructions"
 >;
 
 def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts",

>From 3691fc903f591cb40d53f3e6b558c035292964a9 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 18 Nov 2025 15:36:09 +0800
Subject: [PATCH 08/10] fix failing tests

---
 clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl           | 4 ++--
 clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl         | 8 ++++----
 .../amdgpu-readonly-features-written-with-no-target.cl    | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl
index 14fbeb24a96c2..c5656c49c4761 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl
@@ -26,8 +26,8 @@ kernel void foo(global int *p) { *p = 1; }
 // CHECK-NEXT:    ret void
 //
 //.
-// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" "uniform-work-group-size"="false" }
-// CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" }
+// CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" "uniform-work-group-size"="false" }
+// CHECK: attributes #[[ATTR1]] = { alwaysinline convergent norecurse nounwind "amdgpu-cluster-dims"="0,0,0" "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1250" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+permlane16-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+sad-insts,+setprio-inc-wg-inst,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" }
 // CHECK: attributes #[[ATTR2]] = { convergent nounwind }
 //.
 // CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index e9adac23a6530..2cbc9787a04b0 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -816,12 +816,12 @@ kernel void test_target_features_kernel(global int *i) {
 // NOCPU: attributes #[[ATTR10]] = { convergent nounwind }
 //.
 // GFX900: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" }
-// GFX900: attributes #[[ATTR1]] = { convergent norecurse nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
-// GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" "uniform-work-group-size"="false" }
-// GFX900: attributes #[[ATTR3]] = { alwaysinline convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
+// GFX900: attributes #[[ATTR1]] = { convergent norecurse nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,-sram-ecc" }
+// GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,-sram-ecc" "uniform-work-group-size"="false" }
+// GFX900: attributes #[[ATTR3]] = { alwaysinline convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,-sram-ecc" }
 // GFX900: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
 // GFX900: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
-// GFX900: attributes #[[ATTR6]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
+// GFX900: attributes #[[ATTR6]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,-sram-ecc" }
 // GFX900: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn }
 // GFX900: attributes #[[ATTR8]] = { convergent nounwind }
 // GFX900: attributes #[[ATTR9]] = { nounwind }
diff --git a/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl b/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl
index 1a0a30ca0b51e..2d50ce7cab2e0 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl
@@ -11,6 +11,6 @@
 __attribute__((target("gws,image-insts,vmem-to-lds-load-insts"))) void test() {}
 
 // NOCPU: "target-features"="+gws,+image-insts,+vmem-to-lds-load-insts"
-// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
-// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32
+// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64,+xf32-insts"
+// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f32,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"
+// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+sad-insts,+wavefrontsize32"

>From 6a8d41af37e2c617580d20fc536644197c6ad88c Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 18 Nov 2025 16:20:16 +0800
Subject: [PATCH 09/10] fix fialing test

---
 clang/test/CodeGen/link-builtin-bitcode.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/test/CodeGen/link-builtin-bitcode.c b/clang/test/CodeGen/link-builtin-bitcode.c
index 9a5b6de3c3b38..f6e45bf573705 100644
--- a/clang/test/CodeGen/link-builtin-bitcode.c
+++ b/clang/test/CodeGen/link-builtin-bitcode.c
@@ -43,7 +43,7 @@ int bar() { return no_attr() + attr_in_target() + attr_not_in_target() + attr_in
 // CHECK-LABEL: @attr_incompatible
 // CHECK-SAME: () #[[ATTR_INCOMPATIBLE:[0-9]+]] {
 
-// CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
-// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64" }
-// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64" }
-// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+vmem-to-lds-load-insts,+wavefrontsize64,-gfx9-insts" }
+// CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" }
+// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64" }
+// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64" }
+// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+atomic-fmin-fmax-global-f64,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+vmem-to-lds-load-insts,+wavefrontsize64,-gfx9-insts" }

>From 91bd9087906d802e750368c3303598ce41e5c711 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 18 Nov 2025 16:32:05 +0800
Subject: [PATCH 10/10] fix failing tests

---
 clang/test/OpenMP/amdgcn-attributes.cpp         | 4 ++--
 flang/test/Lower/OpenMP/target_cpu_features.f90 | 4 ++--
 llvm/lib/TargetParser/TargetParser.cpp          | 1 -
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/clang/test/OpenMP/amdgcn-attributes.cpp b/clang/test/OpenMP/amdgcn-attributes.cpp
index 2c9e16a4f5098..03f5c31e3157c 100644
--- a/clang/test/OpenMP/amdgcn-attributes.cpp
+++ b/clang/test/OpenMP/amdgcn-attributes.cpp
@@ -32,9 +32,9 @@ int callable(int x) {
 }
 
 // DEFAULT: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
-// CPU: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" "uniform-work-group-size"="true" }
+// CPU: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" "uniform-work-group-size"="true" }
 // NOIEEE: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "amdgpu-ieee"="false" "kernel" "no-nans-fp-math"="true" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
 
 // DEFAULT: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// CPU: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
+// CPU: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" }
 // NOIEEE: attributes #2 = { convergent mustprogress noinline nounwind optnone "amdgpu-ieee"="false" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
diff --git a/flang/test/Lower/OpenMP/target_cpu_features.f90 b/flang/test/Lower/OpenMP/target_cpu_features.f90
index 4532593156eab..341cfc7991d43 100644
--- a/flang/test/Lower/OpenMP/target_cpu_features.f90
+++ b/flang/test/Lower/OpenMP/target_cpu_features.f90
@@ -11,8 +11,8 @@
 !AMDGCN-SAME: fir.target_features = #llvm.target_features<["+16-bit-insts", "+ci-insts",
 !AMDGCN-SAME: "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot3-insts",
 !AMDGCN-SAME: "+dot4-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp",
-!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+mai-insts",
-!AMDGCN-SAME: "+s-memrealtime", "+s-memtime-inst", "+vmem-to-lds-load-insts", "+wavefrontsize64"]>
+!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+lerp-inst", "+mai-insts",
+!AMDGCN-SAME: "+qsad-insts", "+s-memrealtime", "+s-memtime-inst", "+sad-insts", "+vmem-to-lds-load-insts", "+wavefrontsize64"]>
 
 !NVPTX: module attributes {
 !NVPTX-SAME: fir.target_cpu = "sm_80"
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 9439fa2b3a0ec..28f3649a840d6 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -717,7 +717,6 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
   case GK_GFX602:
   case GK_GFX601:
   case GK_GFX600:
-    Features["ci-insts"] = true;
     Features["image-insts"] = true;
     Features["s-memtime-inst"] = true;
     Features["gws"] = true;



More information about the llvm-commits mailing list