[llvm] 0a80631 - AMDGPU: Ensure both wavesize features are not set (#159234)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 25 02:46:39 PDT 2025
Author: Matt Arsenault
Date: 2025-09-25T09:46:34Z
New Revision: 0a80631142db9eb2c357dee304d51c1ef1acc590
URL: https://github.com/llvm/llvm-project/commit/0a80631142db9eb2c357dee304d51c1ef1acc590
DIFF: https://github.com/llvm/llvm-project/commit/0a80631142db9eb2c357dee304d51c1ef1acc590.diff
LOG: AMDGPU: Ensure both wavesize features are not set (#159234)
Make sure we cannot be in a mode with both wavesizes. This
prevents assertions in a future change. This should probably
just be an error, but we do not have a good way to report
errors from the MCSubtargetInfo constructor.
Added:
llvm/test/MC/AMDGPU/wavesize-feature-unsupported-target.s
llvm/test/MC/Disassembler/AMDGPU/gfx1250_wave64_feature.s
llvm/test/MC/Disassembler/AMDGPU/gfx9_wave32_feature.txt
Modified:
llvm/lib/Target/AMDGPU/AMDGPU.td
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/GCNSubtarget.h
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/test/MC/Disassembler/AMDGPU/gfx10_vopc.txt
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index b2d1011eb506c..eaa1870f4be28 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1238,6 +1238,19 @@ def FeatureSetPrioIncWgInst : SubtargetFeature<"setprio-inc-wg-inst",
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
+// Ugly hack to accomodate assembling modules with mixed
+// wavesizes. Ideally we would have a mapping symbol in assembly which
+// would keep track of which sections of code should be treated as
+// wave32 and wave64. Instead what users do is assemble with both
+// wavesizes enabled. We translate this into this special mode so this
+// only influences assembler behavior and nothing else.
+def FeatureAssemblerPermissiveWavesize : SubtargetFeature<
+ "assembler-permissive-wavesize",
+ "AssemblerPermissiveWavesize",
+ "true",
+ "allow parsing wave32 and wave64 variants of instructions"
+>;
+
class FeatureMaxPrivateElementSize<int size> : SubtargetFeature<
"max-private-element-size-"#size,
"MaxPrivateElementSize",
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 21dfdfd6bed04..dfbde85231a6e 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1246,6 +1246,12 @@ raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
// AsmParser
//===----------------------------------------------------------------------===//
+// TODO: define GET_SUBTARGET_FEATURE_NAME
+#define GET_REGISTER_MATCHER
+#include "AMDGPUGenAsmMatcher.inc"
+#undef GET_REGISTER_MATCHER
+#undef GET_SUBTARGET_FEATURE_NAME
+
// Holds info related to the current kernel, e.g. count of SGPRs used.
// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
// .amdgpu_hsa_kernel or at EOF.
@@ -1536,6 +1542,10 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
return AMDGPU::isGFX10_BEncoding(getSTI());
}
+ bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
+
+ bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
+
bool hasInv2PiInlineImm() const {
return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
}
@@ -1603,6 +1613,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
return &MII;
}
+ // FIXME: This should not be used. Instead, should use queries derived from
+ // getAvailableFeatures().
const FeatureBitset &getFeatureBits() const {
return getSTI().getFeatureBits();
}
@@ -2259,9 +2271,8 @@ bool AMDGPUOperand::isSDWAInt32Operand() const {
}
bool AMDGPUOperand::isBoolReg() const {
- auto FB = AsmParser->getFeatureBits();
- return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
- (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
+ return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
+ (AsmParser->isWave32() && isSCSrc_b32()));
}
uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
@@ -5025,9 +5036,8 @@ bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
// Check if VCC register matches wavefront size
bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
- auto FB = getFeatureBits();
- return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
- (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
+ return (Reg == AMDGPU::VCC && isWave64()) ||
+ (Reg == AMDGPU::VCC_LO && isWave32());
}
// One unique literal can be used. VOP3 literal is only allowed in GFX10+
@@ -5717,7 +5727,7 @@ bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
// Check if this instruction may be used with a
diff erent wavesize.
if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
-
+ // FIXME: Use getAvailableFeatures, and do not manually recompute
FeatureBitset FeaturesWS32 = getFeatureBits();
FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
.flip(AMDGPU::FeatureWavefrontSize32);
@@ -6472,10 +6482,10 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
if (!isGFX10Plus())
return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
- if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
+ if (!isWave32())
return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
} else {
- if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
+ if (!isWave64())
return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
}
}
@@ -6484,10 +6494,10 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
if (C.wavefront_size == 5) {
if (!isGFX10Plus())
return TokError("wavefront_size=5 is only allowed on GFX10+");
- if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
+ if (!isWave32())
return TokError("wavefront_size=5 requires +WavefrontSize32");
} else if (C.wavefront_size == 6) {
- if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
+ if (!isWave64())
return TokError("wavefront_size=6 requires +WavefrontSize64");
}
}
@@ -10390,7 +10400,6 @@ LLVMInitializeAMDGPUAsmParser() {
RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
}
-#define GET_REGISTER_MATCHER
#define GET_MATCHER_IMPLEMENTATION
#define GET_MNEMONIC_SPELL_CHECKER
#define GET_MNEMONIC_CHECKER
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index f5367f3b88920..a54d6651c25c1 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -99,6 +99,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool EnableDS128 = false;
bool EnablePRTStrictNull = false;
bool DumpCode = false;
+ bool AssemblerPermissiveWavesize = false;
// Subtarget statically properties set by tablegen
bool FP64 = false;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index f2e2d0ed3f8a6..013cfeb364048 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -82,20 +82,36 @@ createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
MCSubtargetInfo *STI =
createAMDGPUMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+ bool IsWave64 = STI->hasFeature(AMDGPU::FeatureWavefrontSize64);
+ bool IsWave32 = STI->hasFeature(AMDGPU::FeatureWavefrontSize32);
+
// FIXME: We should error for the default target.
if (STI->getFeatureBits().none())
STI->ToggleFeature(AMDGPU::FeatureSouthernIslands);
- if (!STI->hasFeature(AMDGPU::FeatureWavefrontSize64) &&
- !STI->hasFeature(AMDGPU::FeatureWavefrontSize32)) {
+ if (!IsWave64 && !IsWave32) {
// If there is no default wave size it must be a generation before gfx10,
// these have FeatureWavefrontSize64 in their definition already. For gfx10+
// set wave32 as a default.
STI->ToggleFeature(AMDGPU::isGFX10Plus(*STI)
? AMDGPU::FeatureWavefrontSize32
: AMDGPU::FeatureWavefrontSize64);
+ } else if (IsWave64 && IsWave32) {
+ // The wave size is mutually exclusive. If both somehow end up set, wave32
+ // wins if supported.
+ STI->ToggleFeature(AMDGPU::supportsWave32(*STI)
+ ? AMDGPU::FeatureWavefrontSize64
+ : AMDGPU::FeatureWavefrontSize32);
+
+ // If both wavesizes were manually requested, hack in a feature to permit
+ // assembling modules with mixed wavesizes.
+ STI->ToggleFeature(AMDGPU::FeatureAssemblerPermissiveWavesize);
}
+ assert((STI->hasFeature(AMDGPU::FeatureWavefrontSize64) !=
+ STI->hasFeature(AMDGPU::FeatureWavefrontSize32)) &&
+ "wavesize features are mutually exclusive");
+
return STI;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index fb2cd04b364d7..18a53931a6390 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -7,9 +7,11 @@
//===----------------------------------------------------------------------===//
def isWave32 : Predicate<"Subtarget->isWave32()">,
- AssemblerPredicate <(all_of FeatureWavefrontSize32)>;
+ AssemblerPredicate <(any_of FeatureWavefrontSize32,
+ FeatureAssemblerPermissiveWavesize)>;
def isWave64 : Predicate<"Subtarget->isWave64()">,
- AssemblerPredicate <(all_of FeatureWavefrontSize64)>;
+ AssemblerPredicate <(any_of FeatureWavefrontSize64,
+ FeatureAssemblerPermissiveWavesize)>;
class AMDGPUMnemonicAlias<string From, string To, string VariantName = "">
: MnemonicAlias<From, To, VariantName>, PredicateControl;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 37b0262966160..2b9c063f42a5e 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1568,6 +1568,11 @@ bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
bool hasMAIInsts(const MCSubtargetInfo &STI);
bool hasVOPD(const MCSubtargetInfo &STI);
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
+
+inline bool supportsWave32(const MCSubtargetInfo &STI) {
+ return AMDGPU::isGFX10Plus(STI) && !AMDGPU::isGFX1250(STI);
+}
+
int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
unsigned hasKernargPreload(const MCSubtargetInfo &STI);
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST);
diff --git a/llvm/test/MC/AMDGPU/wavesize-feature-unsupported-target.s b/llvm/test/MC/AMDGPU/wavesize-feature-unsupported-target.s
new file mode 100644
index 0000000000000..3a8656c392ff5
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/wavesize-feature-unsupported-target.s
@@ -0,0 +1,23 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+wavefrontsize64 -o - %s | FileCheck -check-prefix=GFX1250 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx900 -mattr=+wavefrontsize32 -o - %s | FileCheck -check-prefix=GFX900 %s
+
+// Make sure setting both modes is supported at the same time.
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 %s | FileCheck -check-prefixes=GFX10 %s
+
+// Test that there is no assertion when using an explicit
+// wavefrontsize attribute on a target which does not support it.
+
+// GFX1250: v_add_f64_e32 v[0:1], 1.0, v[0:1]
+// GFX900: v_add_f64 v[0:1], 1.0, v[0:1]
+// GFX10: v_add_f64 v[0:1], 1.0, v[0:1]
+v_add_f64 v[0:1], 1.0, v[0:1]
+
+// GFX1250: v_cmp_eq_u32_e64 s[0:1], 1.0, s1
+// GFX900: v_cmp_eq_u32_e64 s[0:1], 1.0, s1
+// GFX10: v_cmp_eq_u32_e64 s[0:1], 1.0, s1
+v_cmp_eq_u32_e64 s[0:1], 1.0, s1
+
+// GFX1250: v_cndmask_b32_e64 v1, v2, v3, s[0:1]
+// GFX900: v_cndmask_b32_e64 v1, v2, v3, s[0:1]
+// GFX10: v_cndmask_b32_e64 v1, v2, v3, s[0:1]
+v_cndmask_b32 v1, v2, v3, s[0:1]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vopc.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vopc.txt
index 2156a682337e8..336f4b2e88f47 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vopc.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vopc.txt
@@ -1,6 +1,6 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=W32 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=W64 %s
-
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=W32 %s
# W32: v_cmp_class_f32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x10,0x7d]
# W64: v_cmp_class_f32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x10,0x7d]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_wave64_feature.s b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_wave64_feature.s
new file mode 100644
index 0000000000000..bdea636a9efe3
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_wave64_feature.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+wavefrontsize64 -disassemble -o - %s | FileCheck %s
+
+# Make sure there's no assertion when trying to use an unsupported
+# wave64 on a wave32-only target
+
+# CHECK: v_add_f64_e32 v[0:1], 1.0, v[0:1]
+0xf2,0x00,0x00,0x04
+
+# CHECK: v_cmp_eq_u32_e64 s[0:1], 1.0, s1
+0x00,0x00,0x4a,0xd4,0xf2,0x02,0x00,0x00
+
+# CHECK: v_cndmask_b32_e64 v1, v2, v3, s[0:1]
+0x01,0x00,0x01,0xd5,0x02,0x07,0x02,0x00
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_wave32_feature.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_wave32_feature.txt
new file mode 100644
index 0000000000000..40494b3dfa1ea
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_wave32_feature.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx900 -mattr=+wavefrontsize32 -disassemble -o - %s | FileCheck %s
+
+# Make sure there's no assertion when trying to use an unsupported
+# wave32 on a wave64-only target
+
+# CHECK: v_add_f64 v[0:1], 1.0, v[0:1]
+0x00,0x00,0x80,0xd2,0xf2,0x00,0x02,0x00
+
+# CHECK: v_cmp_eq_u32_e64 s[0:1], 1.0, s1
+0x00,0x00,0xca,0xd0,0xf2,0x02,0x00,0x00
+
+# CHECK: v_cndmask_b32_e64 v1, v2, v3, s[0:1]
+0x01,0x00,0x00,0xd1,0x02,0x07,0x02,0x00
More information about the llvm-commits
mailing list