[llvm] [AMDGPU] CodeGen for GFX12 VBUFFER instructions (PR #75492)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 14 08:24:12 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Mirko BrkuĊĦanin (mbrkusanin)
<details>
<summary>Changes</summary>
---
Patch is 1.67 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/75492.diff
58 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+12)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUGISel.td (+5)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (+27-8)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h (+1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+35-9)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h (+3)
- (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/BUFInstructions.td (+240-112)
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+3)
- (modified) llvm/lib/Target/AMDGPU/SIFrameLowering.cpp (+2-2)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+55-23)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+19-4)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.h (+2-4)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+10-2)
- (modified) llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp (+28-6)
- (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp (+7-8)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll (+58)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll (+62)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll (+322-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll (+408-200)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll (+332-163)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll (+118)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll (+292-143)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll (+1463-718)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll (+235)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll (+537-266)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll (+1368-674)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll (+160)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll (+366-179)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll (+256)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll (+164)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll (+1100-542)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll (+409-201)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll (+355-175)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll (+146)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll (+506-248)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll (+642-313)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll (+126)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll (+294-144)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll (+438-214)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll (+147)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll (+180)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll (+510)
- (modified) llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll (+578-7)
- (modified) llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll (+957-7)
- (modified) llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll (+481-7)
- (modified) llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll (+547-7)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll (+289)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.ll (+83)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll (+68)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll (+88-4)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll (+273)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll (+207)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll (+113)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll (+76)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll (+115)
- (added) llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx12.mir (+1529)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 799e102d56174d..89403ac3df4e78 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -816,6 +816,12 @@ def FeatureVGPRSingleUseHintInsts : SubtargetFeature<"vgpr-singleuse-hint",
"Has single-use VGPR hint instructions"
>;
+def FeatureHasRestrictedSOffset : SubtargetFeature<"restricted-soffset",
+ "HasRestrictedSOffset",
+ "true",
+ "Has restricted SOffset (immediate not supported)."
+>;
+
//===------------------------------------------------------------===//
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
@@ -1461,6 +1467,7 @@ def FeatureISAVersion12 : FeatureSet<
FeaturePackedTID,
FeatureVcmpxPermlaneHazard,
FeatureSALUFloatInsts,
+ FeatureHasRestrictedSOffset,
FeatureVGPRSingleUseHintInsts,
FeatureMADIntraFwdBug]>;
@@ -1773,6 +1780,11 @@ def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">,
def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">,
AssemblerPredicate<(all_of (not FeatureUnpackedD16VMem))>;
+def HasRestrictedSOffset : Predicate<"Subtarget->hasRestrictedSOffset()">,
+ AssemblerPredicate<(all_of FeatureHasRestrictedSOffset)>;
+def HasUnrestrictedSOffset : Predicate<"!Subtarget->hasRestrictedSOffset()">,
+ AssemblerPredicate<(all_of (not FeatureHasRestrictedSOffset))>;
+
def D16PreservesUnusedBits :
Predicate<"Subtarget->d16PreservesUnusedBits()">,
AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureSRAMECC))>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index c61aab4a45c6ad..2b85024a9b40be 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -105,6 +105,11 @@ def gi_global_saddr :
def gi_mubuf_scratch_offset :
GIComplexOperandMatcher<s32, "selectMUBUFScratchOffset">,
GIComplexPatternEquiv<MUBUFScratchOffset>;
+
+def gi_buf_soffset :
+ GIComplexOperandMatcher<s32, "selectBUFSOffset">,
+ GIComplexPatternEquiv<BUFSOffset>;
+
def gi_mubuf_scratch_offen :
GIComplexOperandMatcher<s32, "selectMUBUFScratchOffen">,
GIComplexPatternEquiv<MUBUFScratchOffen>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 66ba08ef0dc12a..98d90814f223c3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1319,7 +1319,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
- SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ SOffset = Subtarget->hasRestrictedSOffset()
+ ? CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32)
+ : CurDAG->getTargetConstant(0, DL, MVT::i32);
ConstantSDNode *C1 = nullptr;
SDValue N0 = Addr;
@@ -1374,7 +1376,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
return true;
}
- if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
+ if (TII->isLegalMUBUFImmOffset(C1->getZExtValue())) {
// Legal offset for instruction.
Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
return true;
@@ -1448,7 +1451,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
// Don't fold null pointer.
if (Imm != NullPtr) {
- const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
+ const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset(*Subtarget);
SDValue HighBits =
CurDAG->getTargetConstant(Imm & ~MaxOffset, DL, MVT::i32);
MachineSDNode *MovHighBits = CurDAG->getMachineNode(
@@ -1482,8 +1485,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
// Therefore it should be safe to fold any VGPR offset on gfx9 into the
// MUBUF vaddr, but not on older subtargets which can only do this if the
// sign bit is known 0.
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
- if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
+ if (TII->isLegalMUBUFImmOffset(C1->getZExtValue()) &&
(!Subtarget->privateMemoryResourceIsRangeChecked() ||
CurDAG->SignBitIsZero(N0))) {
std::tie(VAddr, SOffset) = foldFrameIndex(N0);
@@ -1515,6 +1519,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
SDValue &Offset) const {
const SIRegisterInfo *TRI =
static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
MachineFunction &MF = CurDAG->getMachineFunction();
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
SDLoc DL(Addr);
@@ -1531,14 +1536,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
if (Addr.getOpcode() == ISD::ADD) {
// Add (CopyFromReg <sgpr>) <constant>
CAddr = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
- if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
+ if (!CAddr || !TII->isLegalMUBUFImmOffset(CAddr->getZExtValue()))
return false;
if (!IsCopyFromSGPR(*TRI, Addr.getOperand(0)))
return false;
SOffset = Addr.getOperand(0);
} else if ((CAddr = dyn_cast<ConstantSDNode>(Addr)) &&
- SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
+ TII->isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
// <constant>
SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
} else {
@@ -1555,8 +1560,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &SOffset, SDValue &Offset
) const {
SDValue Ptr, VAddr, Offen, Idxen, Addr64;
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
return false;
@@ -1577,6 +1581,21 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
return false;
}
+bool AMDGPUDAGToDAGISel::SelectBUFSOffset(SDValue ByteOffsetNode,
+ SDValue &SOffset) const {
+ if (Subtarget->hasRestrictedSOffset()) {
+ if (auto SOffsetConst = dyn_cast<ConstantSDNode>(ByteOffsetNode)) {
+ if (SOffsetConst->isZero()) {
+ SOffset = CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);
+ return true;
+ }
+ }
+ }
+
+ SOffset = ByteOffsetNode;
+ return true;
+}
+
// Find a load or store from corresponding pattern root.
// Roots may be build_vector, bitconvert or their combinations.
static MemSDNode* findMemSDNode(SDNode *N) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 618c5e02c09406..374108af08cd5c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -179,6 +179,7 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset) const;
+ bool SelectBUFSOffset(SDValue Addr, SDValue &SOffset) const;
bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset, uint64_t FlatVariant) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 75fac09d0b99fa..3aff8bd86bf766 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3223,6 +3223,7 @@ bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
}
bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const {
+ assert(!AMDGPU::isGFX12Plus(STI));
unsigned Opc;
unsigned Size = MI.getOperand(3).getImm();
@@ -3289,8 +3290,8 @@ bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const {
MIB.add(MI.getOperand(5 + OpOffset)); // soffset
MIB.add(MI.getOperand(6 + OpOffset)); // imm offset
unsigned Aux = MI.getOperand(7 + OpOffset).getImm();
- MIB.addImm(Aux & AMDGPU::CPol::ALL); // cpol
- MIB.addImm((Aux >> 3) & 1); // swz
+ MIB.addImm(Aux & AMDGPU::CPol::ALL); // cpol
+ MIB.addImm(Aux & AMDGPU::CPol::SWZ_pregfx12 ? 1 : 0); // swz
MachineMemOperand *LoadMMO = *MI.memoperands_begin();
MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo();
@@ -4430,7 +4431,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
// TODO: Should this be inside the render function? The iterator seems to
// move.
- const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
+ const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset(*Subtarget);
BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
HighBits)
.addImm(Offset & ~MaxOffset);
@@ -4462,7 +4463,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
int64_t ConstOffset;
std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);
if (ConstOffset != 0) {
- if (SIInstrInfo::isLegalMUBUFImmOffset(ConstOffset) &&
+ if (TII.isLegalMUBUFImmOffset(ConstOffset) &&
(!STI.privateMemoryResourceIsRangeChecked() ||
KB->signBitIsZero(PtrBase))) {
const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
@@ -4541,6 +4542,11 @@ bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(Register Addr) const {
if (isNoUnsignedWrap(AddrMI))
return true;
+ // Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative
+ // values.
+ if (AMDGPU::isGFX12Plus(STI))
+ return true;
+
Register LHS = AddrMI->getOperand(1).getReg();
Register RHS = AddrMI->getOperand(2).getReg();
@@ -4645,7 +4651,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffset(
if (mi_match(Reg, *MRI,
m_GPtrAdd(m_Reg(BasePtr),
m_any_of(m_ICst(Offset), m_Copy(m_ICst(Offset)))))) {
- if (!SIInstrInfo::isLegalMUBUFImmOffset(Offset))
+ if (!TII.isLegalMUBUFImmOffset(Offset))
return {};
MachineInstr *BasePtrDef = getDefIgnoringCopies(BasePtr, *MRI);
Register WaveBase = getWaveAddress(BasePtrDef);
@@ -4664,7 +4670,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffset(
}
if (!mi_match(Root.getReg(), *MRI, m_ICst(Offset)) ||
- !SIInstrInfo::isLegalMUBUFImmOffset(Offset))
+ !TII.isLegalMUBUFImmOffset(Offset))
return {};
return {{
@@ -4907,7 +4913,7 @@ bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr) const {
/// component.
void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
MachineIRBuilder &B, Register &SOffset, int64_t &ImmOffset) const {
- if (SIInstrInfo::isLegalMUBUFImmOffset(ImmOffset))
+ if (TII.isLegalMUBUFImmOffset(ImmOffset))
return;
// Illegal offset, store it in soffset.
@@ -5016,6 +5022,8 @@ AMDGPUInstructionSelector::selectMUBUFAddr64(MachineOperand &Root) const {
[=](MachineInstrBuilder &MIB) { // soffset
if (SOffset)
MIB.addReg(SOffset);
+ else if (STI.hasRestrictedSOffset())
+ MIB.addReg(AMDGPU::SGPR_NULL);
else
MIB.addImm(0);
},
@@ -5044,6 +5052,8 @@ AMDGPUInstructionSelector::selectMUBUFOffset(MachineOperand &Root) const {
[=](MachineInstrBuilder &MIB) { // soffset
if (SOffset)
MIB.addReg(SOffset);
+ else if (STI.hasRestrictedSOffset())
+ MIB.addReg(AMDGPU::SGPR_NULL);
else
MIB.addImm(0);
},
@@ -5054,6 +5064,17 @@ AMDGPUInstructionSelector::selectMUBUFOffset(MachineOperand &Root) const {
}};
}
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectBUFSOffset(MachineOperand &Root) const {
+
+ Register SOffset = Root.getReg();
+
+ if (STI.hasRestrictedSOffset() && mi_match(SOffset, *MRI, m_ZeroInt()))
+ SOffset = AMDGPU::SGPR_NULL;
+
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); }}};
+}
+
/// Get an immediate that must be 32-bits, and treated as zero extended.
static std::optional<uint64_t>
getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI) {
@@ -5448,14 +5469,19 @@ void AMDGPUInstructionSelector::renderExtractCPol(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
assert(OpIdx >= 0 && "expected to match an immediate operand");
- MIB.addImm(MI.getOperand(OpIdx).getImm() & AMDGPU::CPol::ALL);
+ MIB.addImm(MI.getOperand(OpIdx).getImm() &
+ (AMDGPU::isGFX12Plus(STI) ? AMDGPU::CPol::ALL
+ : AMDGPU::CPol::ALL_pregfx12));
}
void AMDGPUInstructionSelector::renderExtractSWZ(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
assert(OpIdx >= 0 && "expected to match an immediate operand");
- MIB.addImm((MI.getOperand(OpIdx).getImm() >> 3) & 1);
+ const bool Swizzle = MI.getOperand(OpIdx).getImm() &
+ (AMDGPU::isGFX12Plus(STI) ? AMDGPU::CPol::SWZ
+ : AMDGPU::CPol::SWZ_pregfx12);
+ MIB.addImm(Swizzle);
}
void AMDGPUInstructionSelector::renderSetGLC(MachineInstrBuilder &MIB,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 00ff1747ce57a3..ab7cc0a6beb8c2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -292,6 +292,9 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
bool selectMUBUFOffsetImpl(MachineOperand &Root, Register &RSrcReg,
Register &SOffset, int64_t &Offset) const;
+ InstructionSelector::ComplexRendererFns
+ selectBUFSOffset(MachineOperand &Root) const;
+
InstructionSelector::ComplexRendererFns
selectMUBUFAddr64(MachineOperand &Root) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 489b4f5a8d86a5..d35c033723e604 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -5412,7 +5412,7 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI,
std::pair<Register, unsigned>
AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B,
Register OrigOffset) const {
- const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset();
+ const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset(ST);
Register BaseReg;
unsigned ImmOffset;
const LLT S32 = LLT::scalar(32);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 03b6d19b2b3c06..afda6a058922d8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1784,7 +1784,7 @@ getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
std::pair<Register, unsigned>
AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B,
Register OrigOffset) const {
- const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset();
+ const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset(Subtarget);
Register BaseReg;
unsigned ImmOffset;
const LLT S32 = LLT::scalar(32);
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 9832d89c6ac6f7..43d35fa5291ca0 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -12,6 +12,8 @@ def MUBUFOffset : ComplexPattern<iPTR, 3, "SelectMUBUFOffset">;
def MUBUFScratchOffen : ComplexPattern<iPTR, 4, "SelectMUBUFScratchOffen", [], [SDNPWantParent]>;
def MUBUFScratchOffset : ComplexPattern<iPTR, 3, "SelectMUBUFScratchOffset", [], [SDNPWantParent], 20>;
+def BUFSOffset : ComplexPattern<iPTR, 1, "SelectBUFSOffset">;
+
def BUFAddrKind {
int Offset = 0;
int OffEn = 1;
@@ -521,11 +523,18 @@ class MUBUF_Addr64_Load_Pat <Instruction inst,
(load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i32:$offset))
>;
-multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> {
+multiclass MUBUF_Pseudo_Load_Pats_Common<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> {
def : MUBUF_Offset_Load_Pat<!cast<Instruction>(BaseInst#"_OFFSET"), load_vt, ld>;
def : MUBUF_Addr64_Load_Pat<!cast<Instruction>(BaseInst#"_ADDR64"), load_vt, ld>;
}
+multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag>{
+ let SubtargetPredicate = HasUnrestrictedSOffset in {
+ defm : MUBUF_Pseudo_Load_Pats_Common<BaseInst, load_vt, ld>;
+ }
+ defm : MUBUF_Pseudo_Load_Pats_Common<BaseInst # "_VBUFFER", load_vt, ld>;
+}
+
multiclass MUBUF_Pseudo_Loads_Helper<string opName, ValueType load_vt,
bit TiedDest, bit isLds, bit isTFE, bit hasGFX12Enc> {
defvar legal_load_vt = !if(!eq(load_vt, v3f16), v4f16, load_vt);
@@ -606,7 +615,7 @@ class MUBUF_Store_Pseudo <string opName,
let tfe = isTFE;
}
-multiclass MUBUF_Pseudo_Store_Pats<string BaseInst, ValueType store_vt = i32, SDPatternOperator st = null_frag> {
+multiclass MUBUF_Pseudo_Store_Pats_Common<string BaseInst, ValueType store_vt = i32, SDPatternOperator st = null_frag> {
def : GCNPat <
(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, i32:$offset)),
@@ -617,6 +626,13 @@ multiclass MUBUF_Pseudo_Store_Pats<string BaseInst, ValueType store_vt = i32, SD
(!cast<MUBUF_Pseudo>(BaseInst # _ADDR64) store_vt:$vdata, i64:$vaddr, v4i32:$srsrc, i32:$soffset, i32:$offset)>;
}
+multiclass MUBUF_Pseudo_Store_Pats<string BaseInst, ValueType store_vt = i32, SDPatternOperator st = null_frag> {
+ let SubtargetPredicate = HasUnrestrictedSOffset in {
+ defm : MUBUF_Pseudo_Store_Pats_Common<BaseInst, store_vt, st>;
+ }
+ defm : MUBUF_Pseudo_Store_Pats_Common<BaseInst # "_VBUFFER", store_vt, st>;
+}
+
multiclass MUBUF_Pseudo_Stores_Helper<string opName, ValueType store_vt,
bit isTFE, bit hasGFX12Enc> {
defvar legal_store_vt = !if(!eq(store_vt, v3f16), v4f16, store_vt);
@@ -1314,33 +1330,33 @@ def BUFFER_GL1_INV : MUBUF_Invalidate<"buffer_gl1_inv">;
// buffer_load/store_format patterns
//===----------------------------------------------------------------------===//
-multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
+multiclass MUBUF_LoadIntrinsicPat_Common<SDPatternOperator name, ValueType vt,
string opcode, ValueType memoryVt = vt> {
defvar st = !if(!eq(memoryVt, vt), name, mubuf_intrinsic_load<name, memoryVt>);
def : GCNPat<
- (vt (st v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
+ (vt (st v4i32:$rsrc, 0, 0, (BUFSOffset i32:$soffset), timm:$offset,
timm:$auxiliary, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
(extract_cpol $auxiliary), (extract_swz $auxiliary))
>;
def : GCNPat<
- (vt (st v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
+ (vt (st v4i32:$rsrc, 0, i32:$voffset, (BUFSOffset i32:$soffset), timm:$offset,
timm:$auxiliary, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
(extract_cpol $auxili...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/75492
More information about the llvm-commits
mailing list