[llvm] c9f4df5 - [AMDGPU] Move splitMUBUFOffset from AMDGPUBaseInfo to SIInstrInfo
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 22 08:19:12 PST 2023
Author: Jay Foad
Date: 2023-02-22T16:19:05Z
New Revision: c9f4df57caea7e2269d8e9264e02d780bf8eebef
URL: https://github.com/llvm/llvm-project/commit/c9f4df57caea7e2269d8e9264e02d780bf8eebef
DIFF: https://github.com/llvm/llvm-project/commit/c9f4df57caea7e2269d8e9264e02d780bf8eebef.diff
LOG: [AMDGPU] Move splitMUBUFOffset from AMDGPUBaseInfo to SIInstrInfo
Moving this out of AMDGPUBaseInfo enforces that AMDGPUBaseInfo should
not be calling into GCNSubtarget.
Differential Revision: https://reviews.llvm.org/D144564
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.h
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index d462ee5ce1a26..7e4dfd9ee75ea 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1233,31 +1233,18 @@ bool AMDGPURegisterBankInfo::applyMappingImage(
return true;
}
-static Register getSrcRegIgnoringCopies(const MachineRegisterInfo &MRI,
- Register Reg) {
- MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
- if (!Def)
- return Reg;
-
- // TODO: Guard against this being an implicit def
- return Def->getOperand(0).getReg();
-}
-
// Analyze a combined offset from an llvm.amdgcn.s.buffer intrinsic and store
// the three offsets (voffset, soffset and instoffset)
-static unsigned setBufferOffsets(MachineIRBuilder &B,
- const AMDGPURegisterBankInfo &RBI,
- Register CombinedOffset, Register &VOffsetReg,
- Register &SOffsetReg, int64_t &InstOffsetVal,
- Align Alignment) {
+unsigned AMDGPURegisterBankInfo::setBufferOffsets(
+ MachineIRBuilder &B, Register CombinedOffset, Register &VOffsetReg,
+ Register &SOffsetReg, int64_t &InstOffsetVal, Align Alignment) const {
const LLT S32 = LLT::scalar(32);
MachineRegisterInfo *MRI = B.getMRI();
if (std::optional<int64_t> Imm =
getIConstantVRegSExtVal(CombinedOffset, *MRI)) {
uint32_t SOffset, ImmOffset;
- if (AMDGPU::splitMUBUFOffset(*Imm, SOffset, ImmOffset, &RBI.Subtarget,
- Alignment)) {
+ if (TII->splitMUBUFOffset(*Imm, SOffset, ImmOffset, Alignment)) {
VOffsetReg = B.buildConstant(S32, 0).getReg(0);
SOffsetReg = B.buildConstant(S32, SOffset).getReg(0);
InstOffsetVal = ImmOffset;
@@ -1275,9 +1262,9 @@ static unsigned setBufferOffsets(MachineIRBuilder &B,
AMDGPU::getBaseWithConstantOffset(*MRI, CombinedOffset);
uint32_t SOffset, ImmOffset;
- if ((int)Offset > 0 && AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset,
- &RBI.Subtarget, Alignment)) {
- if (RBI.getRegBank(Base, *MRI, *RBI.TRI) == &AMDGPU::VGPRRegBank) {
+ if ((int)Offset > 0 &&
+ TII->splitMUBUFOffset(Offset, SOffset, ImmOffset, Alignment)) {
+ if (getRegBank(Base, *MRI, *TRI) == &AMDGPU::VGPRRegBank) {
VOffsetReg = Base;
SOffsetReg = B.buildConstant(S32, SOffset).getReg(0);
B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
@@ -1298,11 +1285,11 @@ static unsigned setBufferOffsets(MachineIRBuilder &B,
// Handle the variable sgpr + vgpr case.
MachineInstr *Add = getOpcodeDef(AMDGPU::G_ADD, CombinedOffset, *MRI);
if (Add && (int)Offset >= 0) {
- Register Src0 = getSrcRegIgnoringCopies(*MRI, Add->getOperand(1).getReg());
- Register Src1 = getSrcRegIgnoringCopies(*MRI, Add->getOperand(2).getReg());
+ Register Src0 = getSrcRegIgnoringCopies(Add->getOperand(1).getReg(), *MRI);
+ Register Src1 = getSrcRegIgnoringCopies(Add->getOperand(2).getReg(), *MRI);
- const RegisterBank *Src0Bank = RBI.getRegBank(Src0, *MRI, *RBI.TRI);
- const RegisterBank *Src1Bank = RBI.getRegBank(Src1, *MRI, *RBI.TRI);
+ const RegisterBank *Src0Bank = getRegBank(Src0, *MRI, *TRI);
+ const RegisterBank *Src1Bank = getRegBank(Src1, *MRI, *TRI);
if (Src0Bank == &AMDGPU::VGPRRegBank && Src1Bank == &AMDGPU::SGPRRegBank) {
VOffsetReg = Src0;
@@ -1319,7 +1306,7 @@ static unsigned setBufferOffsets(MachineIRBuilder &B,
// Ensure we have a VGPR for the combined offset. This could be an issue if we
// have an SGPR offset and a VGPR resource.
- if (RBI.getRegBank(CombinedOffset, *MRI, *RBI.TRI) == &AMDGPU::VGPRRegBank) {
+ if (getRegBank(CombinedOffset, *MRI, *TRI) == &AMDGPU::VGPRRegBank) {
VOffsetReg = CombinedOffset;
} else {
VOffsetReg = B.buildCopy(S32, CombinedOffset).getReg(0);
@@ -1369,8 +1356,8 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
Register VOffset;
int64_t ImmOffset = 0;
- unsigned MMOOffset = setBufferOffsets(B, *this, MI.getOperand(2).getReg(),
- VOffset, SOffset, ImmOffset, Alignment);
+ unsigned MMOOffset = setBufferOffsets(B, MI.getOperand(2).getReg(), VOffset,
+ SOffset, ImmOffset, Alignment);
// TODO: 96-bit loads were widened to 128-bit results. Shrink the result if we
// can, but we need to track an MMO for that.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index c9741c2202e65..63c4e7e923995 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -82,6 +82,9 @@ class AMDGPURegisterBankInfo final : public AMDGPUGenRegisterBankInfo {
applyMappingImage(MachineInstr &MI,
const OperandsMapper &OpdMapper,
MachineRegisterInfo &MRI, int RSrcIdx) const;
+ unsigned setBufferOffsets(MachineIRBuilder &B, Register CombinedOffset,
+ Register &VOffsetReg, Register &SOffsetReg,
+ int64_t &InstOffsetVal, Align Alignment) const;
bool applyMappingSBufferLoad(const OperandsMapper &OpdMapper) const;
bool applyMappingBFE(const OperandsMapper &OpdMapper, bool Signed) const;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index c33250904529b..c247abf60360b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -8545,12 +8545,12 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
SelectionDAG &DAG, SDValue *Offsets,
Align Alignment) const {
+ const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
SDLoc DL(CombinedOffset);
if (auto C = dyn_cast<ConstantSDNode>(CombinedOffset)) {
uint32_t Imm = C->getZExtValue();
uint32_t SOffset, ImmOffset;
- if (AMDGPU::splitMUBUFOffset(Imm, SOffset, ImmOffset, Subtarget,
- Alignment)) {
+ if (TII->splitMUBUFOffset(Imm, SOffset, ImmOffset, Alignment)) {
Offsets[0] = DAG.getConstant(0, DL, MVT::i32);
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
@@ -8562,8 +8562,8 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
SDValue N1 = CombinedOffset.getOperand(1);
uint32_t SOffset, ImmOffset;
int Offset = cast<ConstantSDNode>(N1)->getSExtValue();
- if (Offset >= 0 && AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset,
- Subtarget, Alignment)) {
+ if (Offset >= 0 &&
+ TII->splitMUBUFOffset(Offset, SOffset, ImmOffset, Alignment)) {
Offsets[0] = N0;
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4955c794f53ba..b9b5091ba914d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -7897,6 +7897,51 @@ bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const {
return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
}
+// Given Imm, split it into the values to put into the SOffset and ImmOffset
+// fields in an MUBUF instruction. Return false if it is not possible (due to a
+// hardware bug needing a workaround).
+//
+// The required alignment ensures that individual address components remain
+// aligned if they are aligned to begin with. It also ensures that additional
+// offsets within the given alignment can be added to the resulting ImmOffset.
+bool SIInstrInfo::splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset,
+ uint32_t &ImmOffset, Align Alignment) const {
+ const uint32_t MaxImm = alignDown(4095, Alignment.value());
+ uint32_t Overflow = 0;
+
+ if (Imm > MaxImm) {
+ if (Imm <= MaxImm + 64) {
+ // Use an SOffset inline constant for 4..64
+ Overflow = Imm - MaxImm;
+ Imm = MaxImm;
+ } else {
+ // Try to keep the same value in SOffset for adjacent loads, so that
+ // the corresponding register contents can be re-used.
+ //
+ // Load values with all low-bits (except for alignment bits) set into
+ // SOffset, so that a larger range of values can be covered using
+ // s_movk_i32.
+ //
+ // Atomic operations fail to work correctly when individual address
+ // components are unaligned, even if their sum is aligned.
+ uint32_t High = (Imm + Alignment.value()) & ~4095;
+ uint32_t Low = (Imm + Alignment.value()) & 4095;
+ Imm = Low;
+ Overflow = High - Alignment.value();
+ }
+ }
+
+ // There is a hardware bug in SI and CI which prevents address clamping in
+ // MUBUF instructions from working correctly with SOffsets. The immediate
+ // offset is unaffected.
+ if (Overflow > 0 && ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
+ return false;
+
+ ImmOffset = Imm;
+ SOffset = Overflow;
+ return true;
+}
+
// Depending on the used address space and instructions, some immediate offsets
// are allowed and some are not.
// In general, flat instruction offsets can only be non-negative, global and
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index ce02b250084e6..9dfa986955061 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1135,6 +1135,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
return isUInt<12>(Imm);
}
+ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
+ Align Alignment = Align(4)) const;
+
/// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
/// encoded instruction. If \p Signed, this is for an instruction that
/// interprets the offset as signed.
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index fcc46625014cc..0643364de7bf1 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -10,15 +10,17 @@
#include "AMDGPU.h"
#include "AMDGPUAsmUtils.h"
#include "AMDKernelCodeT.h"
-#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/CommandLine.h"
@@ -2587,52 +2589,6 @@ unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) {
return 13;
}
-// Given Imm, split it into the values to put into the SOffset and ImmOffset
-// fields in an MUBUF instruction. Return false if it is not possible (due to a
-// hardware bug needing a workaround).
-//
-// The required alignment ensures that individual address components remain
-// aligned if they are aligned to begin with. It also ensures that additional
-// offsets within the given alignment can be added to the resulting ImmOffset.
-bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
- const GCNSubtarget *Subtarget, Align Alignment) {
- const uint32_t MaxImm = alignDown(4095, Alignment.value());
- uint32_t Overflow = 0;
-
- if (Imm > MaxImm) {
- if (Imm <= MaxImm + 64) {
- // Use an SOffset inline constant for 4..64
- Overflow = Imm - MaxImm;
- Imm = MaxImm;
- } else {
- // Try to keep the same value in SOffset for adjacent loads, so that
- // the corresponding register contents can be re-used.
- //
- // Load values with all low-bits (except for alignment bits) set into
- // SOffset, so that a larger range of values can be covered using
- // s_movk_i32.
- //
- // Atomic operations fail to work correctly when individual address
- // components are unaligned, even if their sum is aligned.
- uint32_t High = (Imm + Alignment.value()) & ~4095;
- uint32_t Low = (Imm + Alignment.value()) & 4095;
- Imm = Low;
- Overflow = High - Alignment.value();
- }
- }
-
- // There is a hardware bug in SI and CI which prevents address clamping in
- // MUBUF instructions from working correctly with SOffsets. The immediate
- // offset is unaffected.
- if (Overflow > 0 &&
- Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
- return false;
-
- ImmOffset = Imm;
- SOffset = Overflow;
- return true;
-}
-
SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
*this = getDefaultForCallingConv(F.getCallingConv());
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index ef332e162de70..1a99f4dc43568 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -26,7 +26,6 @@ namespace llvm {
struct Align;
class Argument;
class Function;
-class GCNSubtarget;
class GlobalValue;
class MCInstrInfo;
class MCRegisterClass;
@@ -1301,10 +1300,6 @@ unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
/// not the encoded offset.
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
-bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
- const GCNSubtarget *Subtarget,
- Align Alignment = Align(4));
-
LLVM_READNONE
inline bool isLegal64BitDPPControl(unsigned DC) {
return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
More information about the llvm-commits
mailing list