[llvm] [AMDGPU] Adding instruction specific features (PR #167809)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 16 17:09:41 PST 2025
https://github.com/Shoreshen updated https://github.com/llvm/llvm-project/pull/167809
>From eccc49197174bfb20a26c9cad573df37614ed629 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Thu, 13 Nov 2025 10:18:26 +0800
Subject: [PATCH 1/2] Adding instruction specific features
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 108 +++++++++++++++++--
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 21 ++++
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 14 +--
llvm/lib/Target/AMDGPU/VOP2Instructions.td | 2 +-
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 22 ++--
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll | 1 -
6 files changed, 142 insertions(+), 26 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index b008354cfd462..fe2a192f0f372 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -901,6 +901,48 @@ def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
"Has v_pk_fmac_f16 instruction"
>;
+def FeatureVCUBEInsts : SubtargetFeature<"V_CUBE-Insts",
+ "HasVCUBEInsts",
+ "true",
+ "Has V_CUBE* instructions"
+>;
+
+def FeatureVLERPInsts : SubtargetFeature<"V_LERP-insts",
+ "HasVLERPInsts",
+ "true",
+ "Has V_LERP* instructions"
+>;
+
+def FeatureVSADInsts : SubtargetFeature<"V_SAD-insts",
+ "HasVSADInsts",
+ "true",
+ "Has V_SAD* instructions"
+>;
+
+def FeatureVQSADInsts : SubtargetFeature<"V_QSAD-insts",
+ "HasVQSADInsts",
+ "true",
+ "Has V_QSAD* instructions"
+>;
+
+def FeatureVCVTNORMInsts : SubtargetFeature<"V_CVT_NORM-insts",
+ "HasVCVTNORMInsts",
+ "true",
+ "Has V_CVT_NORM* instructions"
+>;
+
+def FeatureVCVTPKNORMVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts",
+ "HasVCVTPKNORMVOP2Insts",
+ "true",
+ "Has V_CVT_NORM* VOP2 instructions"
+>;
+
+def FeatureVCVTPKNORMVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts",
+ "HasVCVTPKNORMVOP3Insts",
+ "true",
+ "Has V_CVT_NORM* VOP3 instructions"
+>;
+
def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts",
"HasAtomicDsPkAdd16Insts",
"true",
@@ -1494,7 +1536,8 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts,
FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
- FeatureVmemWriteVgprInOrder
+ FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts,
+ FeatureVSADInsts, FeatureVCVTPKNORMVOP2Insts
]
>;
@@ -1508,7 +1551,8 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
- FeatureVmemWriteVgprInOrder
+ FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts,
+ FeatureVSADInsts, FeatureVQSADInsts, FeatureVCVTPKNORMVOP2Insts
]
>;
@@ -1524,7 +1568,9 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32,
FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS,
- FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder
+ FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts,
+ FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts,
+ FeatureVCVTPKNORMVOP2Insts
]
>;
@@ -1543,7 +1589,10 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK,
FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess,
FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS,
- FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad
+ FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad,
+ FeatureVCUBEInsts, FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts,
+ FeatureVCVTNORMInsts, FeatureVCVTPKNORMVOP2Insts,
+ FeatureVCVTPKNORMVOP3Insts
]
>;
@@ -1567,7 +1616,10 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
FeatureDefaultComponentZero, FeatureMaxHardClauseLength63,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
- FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad
+ FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad, FeatureVCUBEInsts,
+ FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts,
+ FeatureVCVTNORMInsts, FeatureVCVTPKNORMVOP2Insts,
+ FeatureVCVTPKNORMVOP3Insts
]
>;
@@ -1590,7 +1642,9 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11",
FeatureUnalignedDSAccess, FeatureGDS, FeatureGWS,
FeatureDefaultComponentZero, FeatureMaxHardClauseLength32,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
- FeatureVmemWriteVgprInOrder
+ FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts,
+ FeatureVSADInsts, FeatureVQSADInsts, FeatureVCVTNORMInsts,
+ FeatureVCVTPKNORMVOP2Insts, FeatureVCVTPKNORMVOP3Insts
]
>;
@@ -2069,10 +2123,17 @@ def FeatureISAVersion12 : FeatureSet<
FeatureMemoryAtomicFAddF32DenormalSupport,
FeatureBVHDualAndBVH8Insts,
FeatureWaitsBeforeSystemScopeStores,
- FeatureD16Writes32BitVgpr
+ FeatureD16Writes32BitVgpr,
+ FeatureVCUBEInsts,
+ FeatureVLERPInsts,
+ FeatureVSADInsts,
+ FeatureVQSADInsts,
+ FeatureVCVTNORMInsts,
+ FeatureVCVTPKNORMVOP2Insts,
+ FeatureVCVTPKNORMVOP3Insts
]>;
-def FeatureISAVersion12_50 : FeatureSet<
+def FeatureISAVersion12_50_Common : FeatureSet<
[FeatureGFX12,
FeatureGFX1250Insts,
FeatureRequiresAlignedVGPRs,
@@ -2147,6 +2208,16 @@ def FeatureISAVersion12_50 : FeatureSet<
FeatureD16Writes32BitVgpr,
]>;
+def FeatureISAVersion12_50 : FeatureSet<
+ !listconcat(FeatureISAVersion12_50_Common.Features,
+ [FeatureVCUBEInsts,
+ FeatureVLERPInsts,
+ FeatureVSADInsts,
+ FeatureVQSADInsts,
+ FeatureVCVTNORMInsts,
+ FeatureVCVTPKNORMVOP2Insts,
+ FeatureVCVTPKNORMVOP3Insts])>;
+
def FeatureISAVersion12_51 : FeatureSet<
!listconcat(FeatureISAVersion12_50.Features,
[FeatureDPALU_DPP])>;
@@ -2816,6 +2887,27 @@ def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">,
def HasFP8ConversionInsts : Predicate<"Subtarget->hasFP8ConversionInsts()">,
AssemblerPredicate<(all_of FeatureFP8ConversionInsts)>;
+def HasVCUBEInsts : Predicate<"Subtarget->hasVCUBEInsts()">,
+ AssemblerPredicate<(all_of FeatureVCUBEInsts)>;
+
+def HasVLERPInsts : Predicate<"Subtarget->hasVLERPInsts()">,
+ AssemblerPredicate<(all_of FeatureVLERPInsts)>;
+
+def HasVSADInsts : Predicate<"Subtarget->hasVSADInsts()">,
+ AssemblerPredicate<(all_of FeatureVSADInsts)>;
+
+def HasVQSADInsts : Predicate<"Subtarget->hasVQSADInsts()">,
+ AssemblerPredicate<(all_of FeatureVQSADInsts)>;
+
+def HasVCVTNORMInsts : Predicate<"Subtarget->hasVCVTNORMInsts()">,
+ AssemblerPredicate<(all_of FeatureVCVTNORMInsts)>;
+
+def HasVCVTPKNORMVOP2Insts : Predicate<"Subtarget->hasVCVTPKNORMVOP2Insts()">,
+ AssemblerPredicate<(all_of FeatureVCVTPKNORMVOP2Insts)>;
+
+def HasVCVTPKNORMVOP3Insts : Predicate<"Subtarget->hasVCVTPKNORMVOP3Insts()">,
+ AssemblerPredicate<(all_of FeatureVCVTPKNORMVOP3Insts)>;
+
def HasFP8E5M3Insts : Predicate<"Subtarget->hasFP8E5M3Insts()">,
AssemblerPredicate<(all_of FeatureFP8E5M3Insts)>;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index f377b8aaf1333..862cee468b7d3 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -166,6 +166,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasMAIInsts = false;
bool HasFP8Insts = false;
bool HasFP8ConversionInsts = false;
+ bool HasVCUBEInsts = false;
+ bool HasVLERPInsts = false;
+ bool HasVSADInsts = false;
+ bool HasVQSADInsts = false;
+ bool HasVCVTNORMInsts = false;
+ bool HasVCVTPKNORMVOP2Insts = false;
+ bool HasVCVTPKNORMVOP3Insts = false;
bool HasFP8E5M3Insts = false;
bool HasCvtFP8Vop1Bug = false;
bool HasPkFmacF16Inst = false;
@@ -892,6 +899,20 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasFP8ConversionInsts() const { return HasFP8ConversionInsts; }
+ bool hasVCUBEInsts() const { return HasVCUBEInsts; }
+
+ bool hasVLERPInsts() const { return HasVLERPInsts; }
+
+ bool hasVSADInsts() const { return HasVSADInsts; }
+
+ bool hasVQSADInsts() const { return HasVQSADInsts; }
+
+ bool hasVCVTNORMInsts() const { return HasVCVTNORMInsts; }
+
+ bool hasVCVTPKNORMVOP2Insts() const { return HasVCVTPKNORMVOP2Insts; }
+
+ bool hasVCVTPKNORMVOP3Insts() const { return HasVCVTPKNORMVOP3Insts; }
+
bool hasFP8E5M3Insts() const { return HasFP8E5M3Insts; }
bool hasPkFmacF16Inst() const {
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 85adcab55b742..23095ba17cae8 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -616,15 +616,15 @@ let SubtargetPredicate = isGFX9Plus in {
let isReMaterializable = 1 in
defm V_SAT_PK_U8_I16 : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>;
-
- let mayRaiseFPException = 0 in {
- defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16",
- VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
- defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16",
- VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
- } // End mayRaiseFPException = 0
} // End SubtargetPredicate = isGFX9Plus
+let mayRaiseFPException = 0, SubtargetPredicate = HasVCVTNORMInsts in {
+defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16",
+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
+defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16",
+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
+} // End mayRaiseFPException = 0, SubtargetPredicate = HasVCVTNORMInsts
+
let SubtargetPredicate = isGFX9Only in {
defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>;
} // End SubtargetPredicate = isGFX9Only
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index d87d250a034f0..afd2d610b17de 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -971,7 +971,7 @@ defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_a
} // End IsNeverUniform = 1
defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, any_fldexp>;
-let ReadsModeReg = 0, mayRaiseFPException = 0 in {
+let ReadsModeReg = 0, mayRaiseFPException = 0, SubtargetPredicate = HasVCVTPKNORMVOP2Insts in {
defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>;
defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_u16_f32>;
}
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 05ba76ab489d8..3d82866c1e5a7 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -185,7 +185,8 @@ defm V_FMA_LEGACY_F32 : VOP3Inst <"v_fma_legacy_f32",
defm V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, any_fma>, VOPD_Component<0x13, "v_fma_f32">;
-defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;
+let SubtargetPredicate = HasVLERPInsts in
+ defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;
let SchedRW = [WriteIntMul] in {
let SubtargetPredicate = HasMadU32Inst in
@@ -258,12 +259,12 @@ defm V_DIV_FMAS_F64 : VOP3Inst <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC>;
} // End isCommutable = 1
let isReMaterializable = 1 in {
-let mayRaiseFPException = 0 in {
+let mayRaiseFPException = 0, SubtargetPredicate = HasVCUBEInsts in {
defm V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubeid>;
defm V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubesc>;
defm V_CUBETC_F32 : VOP3Inst <"v_cubetc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubetc>;
defm V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubema>;
-} // End mayRaiseFPException
+} // mayRaiseFPException = 0, SubtargetPredicate = HasVCUBEInsts
defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>;
defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>;
@@ -306,12 +307,12 @@ let SubtargetPredicate = HasMinimum3Maximum3F32, ReadsModeReg = 0 in {
defm V_MAXIMUM3_F32 : VOP3Inst <"v_maximum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmaximum3>;
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
-let isCommutable = 1 in {
+let isCommutable = 1, SubtargetPredicate = HasVSADInsts in {
defm V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
-} // End isCommutable = 1
+} // End isCommutable = 1, SubtargetPredicate = HasVSADInsts
defm V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_I32>, int_amdgcn_cvt_pk_u8_f32>;
defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", DIV_FIXUP_F32_PROF, AMDGPUdiv_fixup>;
@@ -424,7 +425,8 @@ def VOPProfileMQSAD : VOP3_Profile<VOP_V4I32_I64_I32_V4I32, VOP3_CLAMP> {
let SubtargetPredicate = isGFX7Plus in {
let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in {
-defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
+let SubtargetPredicate = HasVQSADInsts in
+ defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>;
} // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32]
} // End SubtargetPredicate = isGFX7Plus
@@ -789,9 +791,6 @@ let isCommutable = 1 in {
defm V_MAD_I32_I16 : VOP3Inst_t16 <"v_mad_i32_i16", VOP_I32_I16_I16_I32>;
} // End isCommutable = 1
-defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>;
-defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>;
-
defm V_PACK_B32_F16 : VOP3Inst_t16 <"v_pack_b32_f16", VOP_B32_F16_F16>;
let isReMaterializable = 1 in {
@@ -996,6 +995,11 @@ def : GCNPat<(DivergentBinFrag<or> (or_oneuse i64:$src0, i64:$src1), i64:$src2),
} // End SubtargetPredicate = isGFX9Plus
+let SubtargetPredicate = HasVCVTPKNORMVOP3Insts in {
+ defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>;
+ defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>;
+} // end SubtargetPredicate = HasVCVTPKNORMVOP3Insts
+
// FIXME: Probably should hardcode clamp bit in pseudo and avoid this.
class OpSelBinOpClampPat<SDPatternOperator node,
Instruction inst> : GCNPat<
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll
index 43c69baaf3e7f..49169eec072b6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll
@@ -1,4 +1,3 @@
-; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GCN %s
declare i32 @llvm.amdgcn.lerp(i32, i32, i32) #0
>From 33276544f6cd6a96f783bf66dfee81d26a3e8b96 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Fri, 14 Nov 2025 09:45:23 +0800
Subject: [PATCH 2/2] fix comments
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 124 ++++++++++-----------
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 28 ++---
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 4 +-
llvm/lib/Target/AMDGPU/VOP2Instructions.td | 2 +-
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 16 +--
5 files changed, 87 insertions(+), 87 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index fe2a192f0f372..c5d63e5000767 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -901,46 +901,46 @@ def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
"Has v_pk_fmac_f16 instruction"
>;
-def FeatureVCUBEInsts : SubtargetFeature<"V_CUBE-Insts",
- "HasVCUBEInsts",
+def FeatureCubeInsts : SubtargetFeature<"V_CUBE-Insts",
+ "HasCubeInsts",
"true",
"Has V_CUBE* instructions"
>;
-def FeatureVLERPInsts : SubtargetFeature<"V_LERP-insts",
- "HasVLERPInsts",
+def FeatureLerpInst : SubtargetFeature<"V_LERP-insts",
+ "HasLerpInst",
"true",
- "Has V_LERP* instructions"
+ "Has v_lerp_u8 instruction"
>;
-def FeatureVSADInsts : SubtargetFeature<"V_SAD-insts",
- "HasVSADInsts",
+def FeatureSadInsts : SubtargetFeature<"V_SAD-insts",
+ "HasSadInsts",
"true",
"Has V_SAD* instructions"
>;
-def FeatureVQSADInsts : SubtargetFeature<"V_QSAD-insts",
- "HasVQSADInsts",
+def FeatureQsadInsts : SubtargetFeature<"V_QSAD-insts",
+ "HasQsadInsts",
"true",
"Has V_QSAD* instructions"
>;
-def FeatureVCVTNORMInsts : SubtargetFeature<"V_CVT_NORM-insts",
- "HasVCVTNORMInsts",
+def FeatureCvtNormInsts : SubtargetFeature<"V_CVT_NORM-insts",
+ "HasCvtNormInsts",
"true",
"Has V_CVT_NORM* instructions"
>;
-def FeatureVCVTPKNORMVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts",
- "HasVCVTPKNORMVOP2Insts",
+def FeatureCvtPkNormVOP2Insts : SubtargetFeature<"V_CVT_PKNORM-VOP2-insts",
+ "HasCvtPkNormVOP2Insts",
"true",
- "Has V_CVT_NORM* VOP2 instructions"
+ "Has V_CVT_PK_NORM_*_F32 instructions/Has V_CVT_PK_NORM_*_F16 instructions"
>;
-def FeatureVCVTPKNORMVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts",
- "HasVCVTPKNORMVOP3Insts",
+def FeatureCvtPkNormVOP3Insts : SubtargetFeature<"V_CVT_PKNORM-VOP3-insts",
+ "HasCvtPkNormVOP3Insts",
"true",
- "Has V_CVT_NORM* VOP3 instructions"
+ "Has V_CVT_PK_NORM_*_F32 instructions/Has V_CVT_PK_NORM_*_F16 instructions"
>;
def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts",
@@ -1536,8 +1536,8 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts,
FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
- FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts,
- FeatureVSADInsts, FeatureVCVTPKNORMVOP2Insts
+ FeatureVmemWriteVgprInOrder, FeatureCubeInsts, FeatureLerpInst,
+ FeatureSadInsts, FeatureCvtPkNormVOP2Insts
]
>;
@@ -1551,8 +1551,8 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
- FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts,
- FeatureVSADInsts, FeatureVQSADInsts, FeatureVCVTPKNORMVOP2Insts
+ FeatureVmemWriteVgprInOrder, FeatureCubeInsts, FeatureLerpInst,
+ FeatureSadInsts, FeatureQsadInsts, FeatureCvtPkNormVOP2Insts
]
>;
@@ -1568,9 +1568,9 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32,
FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS,
- FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts,
- FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts,
- FeatureVCVTPKNORMVOP2Insts
+ FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureCubeInsts,
+ FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts,
+ FeatureCvtPkNormVOP2Insts
]
>;
@@ -1590,9 +1590,9 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess,
FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS,
FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad,
- FeatureVCUBEInsts, FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts,
- FeatureVCVTNORMInsts, FeatureVCVTPKNORMVOP2Insts,
- FeatureVCVTPKNORMVOP3Insts
+ FeatureCubeInsts, FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts,
+ FeatureCvtNormInsts, FeatureCvtPkNormVOP2Insts,
+ FeatureCvtPkNormVOP3Insts
]
>;
@@ -1616,10 +1616,10 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
FeatureDefaultComponentZero, FeatureMaxHardClauseLength63,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
- FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad, FeatureVCUBEInsts,
- FeatureVLERPInsts, FeatureVSADInsts, FeatureVQSADInsts,
- FeatureVCVTNORMInsts, FeatureVCVTPKNORMVOP2Insts,
- FeatureVCVTPKNORMVOP3Insts
+ FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad, FeatureCubeInsts,
+ FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts,
+ FeatureCvtNormInsts, FeatureCvtPkNormVOP2Insts,
+ FeatureCvtPkNormVOP3Insts
]
>;
@@ -1642,9 +1642,9 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11",
FeatureUnalignedDSAccess, FeatureGDS, FeatureGWS,
FeatureDefaultComponentZero, FeatureMaxHardClauseLength32,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
- FeatureVmemWriteVgprInOrder, FeatureVCUBEInsts, FeatureVLERPInsts,
- FeatureVSADInsts, FeatureVQSADInsts, FeatureVCVTNORMInsts,
- FeatureVCVTPKNORMVOP2Insts, FeatureVCVTPKNORMVOP3Insts
+ FeatureVmemWriteVgprInOrder, FeatureCubeInsts, FeatureLerpInst,
+ FeatureSadInsts, FeatureQsadInsts, FeatureCvtNormInsts,
+ FeatureCvtPkNormVOP2Insts, FeatureCvtPkNormVOP3Insts
]
>;
@@ -2124,13 +2124,13 @@ def FeatureISAVersion12 : FeatureSet<
FeatureBVHDualAndBVH8Insts,
FeatureWaitsBeforeSystemScopeStores,
FeatureD16Writes32BitVgpr,
- FeatureVCUBEInsts,
- FeatureVLERPInsts,
- FeatureVSADInsts,
- FeatureVQSADInsts,
- FeatureVCVTNORMInsts,
- FeatureVCVTPKNORMVOP2Insts,
- FeatureVCVTPKNORMVOP3Insts
+ FeatureCubeInsts,
+ FeatureLerpInst,
+ FeatureSadInsts,
+ FeatureQsadInsts,
+ FeatureCvtNormInsts,
+ FeatureCvtPkNormVOP2Insts,
+ FeatureCvtPkNormVOP3Insts
]>;
def FeatureISAVersion12_50_Common : FeatureSet<
@@ -2210,13 +2210,13 @@ def FeatureISAVersion12_50_Common : FeatureSet<
def FeatureISAVersion12_50 : FeatureSet<
!listconcat(FeatureISAVersion12_50_Common.Features,
- [FeatureVCUBEInsts,
- FeatureVLERPInsts,
- FeatureVSADInsts,
- FeatureVQSADInsts,
- FeatureVCVTNORMInsts,
- FeatureVCVTPKNORMVOP2Insts,
- FeatureVCVTPKNORMVOP3Insts])>;
+ [FeatureCubeInsts,
+ FeatureLerpInst,
+ FeatureSadInsts,
+ FeatureQsadInsts,
+ FeatureCvtNormInsts,
+ FeatureCvtPkNormVOP2Insts,
+ FeatureCvtPkNormVOP3Insts])>;
def FeatureISAVersion12_51 : FeatureSet<
!listconcat(FeatureISAVersion12_50.Features,
@@ -2887,26 +2887,26 @@ def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">,
def HasFP8ConversionInsts : Predicate<"Subtarget->hasFP8ConversionInsts()">,
AssemblerPredicate<(all_of FeatureFP8ConversionInsts)>;
-def HasVCUBEInsts : Predicate<"Subtarget->hasVCUBEInsts()">,
- AssemblerPredicate<(all_of FeatureVCUBEInsts)>;
+def HasCubeInsts : Predicate<"Subtarget->hasCubeInsts()">,
+ AssemblerPredicate<(all_of FeatureCubeInsts)>;
-def HasVLERPInsts : Predicate<"Subtarget->hasVLERPInsts()">,
- AssemblerPredicate<(all_of FeatureVLERPInsts)>;
+def HasLerpInst : Predicate<"Subtarget->hasLerpInst()">,
+ AssemblerPredicate<(all_of FeatureLerpInst)>;
-def HasVSADInsts : Predicate<"Subtarget->hasVSADInsts()">,
- AssemblerPredicate<(all_of FeatureVSADInsts)>;
+def HasSadInsts : Predicate<"Subtarget->hasSadInsts()">,
+ AssemblerPredicate<(all_of FeatureSadInsts)>;
-def HasVQSADInsts : Predicate<"Subtarget->hasVQSADInsts()">,
- AssemblerPredicate<(all_of FeatureVQSADInsts)>;
+def HasQsadInsts : Predicate<"Subtarget->hasQsadInsts()">,
+ AssemblerPredicate<(all_of FeatureQsadInsts)>;
-def HasVCVTNORMInsts : Predicate<"Subtarget->hasVCVTNORMInsts()">,
- AssemblerPredicate<(all_of FeatureVCVTNORMInsts)>;
+def HasCvtNormInsts : Predicate<"Subtarget->hasCvtNormInsts()">,
+ AssemblerPredicate<(all_of FeatureCvtNormInsts)>;
-def HasVCVTPKNORMVOP2Insts : Predicate<"Subtarget->hasVCVTPKNORMVOP2Insts()">,
- AssemblerPredicate<(all_of FeatureVCVTPKNORMVOP2Insts)>;
+def HasCvtPkNormVOP2Insts : Predicate<"Subtarget->hasCvtPkNormVOP2Insts()">,
+ AssemblerPredicate<(all_of FeatureCvtPkNormVOP2Insts)>;
-def HasVCVTPKNORMVOP3Insts : Predicate<"Subtarget->hasVCVTPKNORMVOP3Insts()">,
- AssemblerPredicate<(all_of FeatureVCVTPKNORMVOP3Insts)>;
+def HasCvtPkNormVOP3Insts : Predicate<"Subtarget->hasCvtPkNormVOP3Insts()">,
+ AssemblerPredicate<(all_of FeatureCvtPkNormVOP3Insts)>;
def HasFP8E5M3Insts : Predicate<"Subtarget->hasFP8E5M3Insts()">,
AssemblerPredicate<(all_of FeatureFP8E5M3Insts)>;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 862cee468b7d3..85260c4f123c7 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -166,13 +166,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasMAIInsts = false;
bool HasFP8Insts = false;
bool HasFP8ConversionInsts = false;
- bool HasVCUBEInsts = false;
- bool HasVLERPInsts = false;
- bool HasVSADInsts = false;
- bool HasVQSADInsts = false;
- bool HasVCVTNORMInsts = false;
- bool HasVCVTPKNORMVOP2Insts = false;
- bool HasVCVTPKNORMVOP3Insts = false;
+ bool HasCubeInsts = false;
+ bool HasLerpInst = false;
+ bool HasSadInsts = false;
+ bool HasQsadInsts = false;
+ bool HasCvtNormInsts = false;
+ bool HasCvtPkNormVOP2Insts = false;
+ bool HasCvtPkNormVOP3Insts = false;
bool HasFP8E5M3Insts = false;
bool HasCvtFP8Vop1Bug = false;
bool HasPkFmacF16Inst = false;
@@ -899,19 +899,19 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasFP8ConversionInsts() const { return HasFP8ConversionInsts; }
- bool hasVCUBEInsts() const { return HasVCUBEInsts; }
+ bool hasCubeInsts() const { return HasCubeInsts; }
- bool hasVLERPInsts() const { return HasVLERPInsts; }
+ bool hasLerpInst() const { return HasLerpInst; }
- bool hasVSADInsts() const { return HasVSADInsts; }
+ bool hasSadInsts() const { return HasSadInsts; }
- bool hasVQSADInsts() const { return HasVQSADInsts; }
+ bool hasQsadInsts() const { return HasQsadInsts; }
- bool hasVCVTNORMInsts() const { return HasVCVTNORMInsts; }
+ bool hasCvtNormInsts() const { return HasCvtNormInsts; }
- bool hasVCVTPKNORMVOP2Insts() const { return HasVCVTPKNORMVOP2Insts; }
+ bool hasCvtPkNormVOP2Insts() const { return HasCvtPkNormVOP2Insts; }
- bool hasVCVTPKNORMVOP3Insts() const { return HasVCVTPKNORMVOP3Insts; }
+ bool hasCvtPkNormVOP3Insts() const { return HasCvtPkNormVOP3Insts; }
bool hasFP8E5M3Insts() const { return HasFP8E5M3Insts; }
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 23095ba17cae8..1d1e95908fce6 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -618,12 +618,12 @@ let SubtargetPredicate = isGFX9Plus in {
defm V_SAT_PK_U8_I16 : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>;
} // End SubtargetPredicate = isGFX9Plus
-let mayRaiseFPException = 0, SubtargetPredicate = HasVCVTNORMInsts in {
+let mayRaiseFPException = 0, SubtargetPredicate = HasCvtNormInsts in {
defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16",
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16",
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
-} // End mayRaiseFPException = 0, SubtargetPredicate = HasVCVTNORMInsts
+} // End mayRaiseFPException = 0, SubtargetPredicate = HasCvtNormInsts
let SubtargetPredicate = isGFX9Only in {
defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>;
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index afd2d610b17de..dbb7862ab4ab5 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -971,7 +971,7 @@ defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_a
} // End IsNeverUniform = 1
defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, any_fldexp>;
-let ReadsModeReg = 0, mayRaiseFPException = 0, SubtargetPredicate = HasVCVTPKNORMVOP2Insts in {
+let ReadsModeReg = 0, mayRaiseFPException = 0, SubtargetPredicate = HasCvtPkNormVOP2Insts in {
defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>;
defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_u16_f32>;
}
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 3d82866c1e5a7..872bde501cd2d 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -185,7 +185,7 @@ defm V_FMA_LEGACY_F32 : VOP3Inst <"v_fma_legacy_f32",
defm V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, any_fma>, VOPD_Component<0x13, "v_fma_f32">;
-let SubtargetPredicate = HasVLERPInsts in
+let SubtargetPredicate = HasLerpInst in
defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;
let SchedRW = [WriteIntMul] in {
@@ -259,12 +259,12 @@ defm V_DIV_FMAS_F64 : VOP3Inst <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC>;
} // End isCommutable = 1
let isReMaterializable = 1 in {
-let mayRaiseFPException = 0, SubtargetPredicate = HasVCUBEInsts in {
+let mayRaiseFPException = 0, SubtargetPredicate = HasCubeInsts in {
defm V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubeid>;
defm V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubesc>;
defm V_CUBETC_F32 : VOP3Inst <"v_cubetc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubetc>;
defm V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubema>;
-} // mayRaiseFPException = 0, SubtargetPredicate = HasVCUBEInsts
+} // mayRaiseFPException = 0, SubtargetPredicate = HasCubeInsts
defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>;
defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>;
@@ -307,12 +307,12 @@ let SubtargetPredicate = HasMinimum3Maximum3F32, ReadsModeReg = 0 in {
defm V_MAXIMUM3_F32 : VOP3Inst <"v_maximum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmaximum3>;
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
-let isCommutable = 1, SubtargetPredicate = HasVSADInsts in {
+let isCommutable = 1, SubtargetPredicate = HasSadInsts in {
defm V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
defm V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
-} // End isCommutable = 1, SubtargetPredicate = HasVSADInsts
+} // End isCommutable = 1, SubtargetPredicate = HasSadInsts
defm V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_I32>, int_amdgcn_cvt_pk_u8_f32>;
defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", DIV_FIXUP_F32_PROF, AMDGPUdiv_fixup>;
@@ -425,7 +425,7 @@ def VOPProfileMQSAD : VOP3_Profile<VOP_V4I32_I64_I32_V4I32, VOP3_CLAMP> {
let SubtargetPredicate = isGFX7Plus in {
let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in {
-let SubtargetPredicate = HasVQSADInsts in
+let SubtargetPredicate = HasQsadInsts in
defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>;
} // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32]
@@ -995,10 +995,10 @@ def : GCNPat<(DivergentBinFrag<or> (or_oneuse i64:$src0, i64:$src1), i64:$src2),
} // End SubtargetPredicate = isGFX9Plus
-let SubtargetPredicate = HasVCVTPKNORMVOP3Insts in {
+let SubtargetPredicate = HasCvtPkNormVOP3Insts in {
defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>;
defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>;
-} // end SubtargetPredicate = HasVCVTPKNORMVOP3Insts
+} // end SubtargetPredicate = HasCvtPkNormVOP3Insts
// FIXME: Probably should hardcode clamp bit in pseudo and avoid this.
class OpSelBinOpClampPat<SDPatternOperator node,
More information about the llvm-commits
mailing list