[llvm] a3d7b31 - [AMDGPU][NFC] Add getMaxMUBUFImmOffset
Piotr Sobczak via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 23 02:35:23 PST 2023
Author: Piotr Sobczak
Date: 2023-02-23T11:29:59+01:00
New Revision: a3d7b3121cfd038c21c3b19002aa1d695de993c3
URL: https://github.com/llvm/llvm-project/commit/a3d7b3121cfd038c21c3b19002aa1d695de993c3
DIFF: https://github.com/llvm/llvm-project/commit/a3d7b3121cfd038c21c3b19002aa1d695de993c3.diff
LOG: [AMDGPU][NFC] Add getMaxMUBUFImmOffset
Replace magic constant 4095 with the function getMaxMUBUFImmOffset().
Differential Revision: https://reviews.llvm.org/D144623
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 437598314ff02..35ff1c2f3ad6f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1375,13 +1375,15 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
// Don't fold null pointer.
if (Imm != NullPtr) {
- SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
+ const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
+ SDValue HighBits =
+ CurDAG->getTargetConstant(Imm & ~MaxOffset, DL, MVT::i32);
MachineSDNode *MovHighBits = CurDAG->getMachineNode(
AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
VAddr = SDValue(MovHighBits, 0);
SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
- ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
+ ImmOffset = CurDAG->getTargetConstant(Imm & MaxOffset, DL, MVT::i16);
return true;
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 583d8535f9f21..edc841a29fdcf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4195,9 +4195,10 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
// TODO: Should this be inside the render function? The iterator seems to
// move.
+ const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
HighBits)
- .addImm(Offset & ~4095);
+ .addImm(Offset & ~MaxOffset);
return {{[=](MachineInstrBuilder &MIB) { // rsrc
MIB.addReg(Info->getScratchRSrcReg());
@@ -4211,7 +4212,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
MIB.addImm(0);
},
[=](MachineInstrBuilder &MIB) { // offset
- MIB.addImm(Offset & 4095);
+ MIB.addImm(Offset & MaxOffset);
}}};
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 52f062927e11d..f5805c05a7813 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -4255,7 +4255,7 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI,
std::pair<Register, unsigned>
AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B,
Register OrigOffset) const {
- const unsigned MaxImm = 4095;
+ const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset();
Register BaseReg;
unsigned ImmOffset;
const LLT S32 = LLT::scalar(32);
@@ -4268,13 +4268,14 @@ AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B,
if (MRI.getType(BaseReg).isPointer())
BaseReg = B.buildPtrToInt(MRI.getType(OrigOffset), BaseReg).getReg(0);
- // If the immediate value is too big for the immoffset field, put the value
- // and -4096 into the immoffset field so that the value that is copied/added
- // for the voffset field is a multiple of 4096, and it stands more chance
- // of being CSEd with the copy/add for another similar load/store.
- // However, do not do that rounding down to a multiple of 4096 if that is a
- // negative number, as it appears to be illegal to have a negative offset
- // in the vgpr, even if adding the immediate offset makes it positive.
+ // If the immediate value is too big for the immoffset field, put only bits
+ // that would normally fit in the immoffset field. The remaining value that
+ // is copied/added for the voffset field is a large power of 2, and it
+ // stands more chance of being CSEd with the copy/add for another similar
+ // load/store.
+ // However, do not do that rounding down if that is a negative
+ // number, as it appears to be illegal to have a negative offset in the
+ // vgpr, even if adding the immediate offset makes it positive.
unsigned Overflow = ImmOffset & ~MaxImm;
ImmOffset -= Overflow;
if ((int32_t)Overflow < 0) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 7e4dfd9ee75ea..baad6e05510bb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1791,7 +1791,7 @@ getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
std::pair<Register, unsigned>
AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B,
Register OrigOffset) const {
- const unsigned MaxImm = 4095;
+ const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset();
Register BaseReg;
unsigned ImmOffset;
const LLT S32 = LLT::scalar(32);
@@ -1802,13 +1802,14 @@ AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B,
unsigned C1 = 0;
if (ImmOffset != 0) {
- // If the immediate value is too big for the immoffset field, put the value
- // and -4096 into the immoffset field so that the value that is copied/added
- // for the voffset field is a multiple of 4096, and it stands more chance
- // of being CSEd with the copy/add for another similar load/store.
- // However, do not do that rounding down to a multiple of 4096 if that is a
- // negative number, as it appears to be illegal to have a negative offset
- // in the vgpr, even if adding the immediate offset makes it positive.
+ // If the immediate value is too big for the immoffset field, put only bits
+ // that would normally fit in the immoffset field. The remaining value that
+ // is copied/added for the voffset field is a large power of 2, and it
+ // stands more chance of being CSEd with the copy/add for another similar
+ // load/store.
+ // However, do not do that rounding down if that is a negative
+ // number, as it appears to be illegal to have a negative offset in the
+ // vgpr, even if adding the immediate offset makes it positive.
unsigned Overflow = ImmOffset & ~MaxImm;
ImmOffset -= Overflow;
if ((int32_t)Overflow < 0) {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index c247abf60360b..1eb2e2975418d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -8495,7 +8495,7 @@ SDValue SITargetLowering::makeV_ILLEGAL(SDValue Op, SelectionDAG & DAG) const {
std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
SDValue Offset, SelectionDAG &DAG) const {
SDLoc DL(Offset);
- const unsigned MaxImm = 4095;
+ const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset();
SDValue N0 = Offset;
ConstantSDNode *C1 = nullptr;
@@ -8508,13 +8508,14 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
if (C1) {
unsigned ImmOffset = C1->getZExtValue();
- // If the immediate value is too big for the immoffset field, put the value
- // and -4096 into the immoffset field so that the value that is copied/added
- // for the voffset field is a multiple of 4096, and it stands more chance
- // of being CSEd with the copy/add for another similar load/store.
- // However, do not do that rounding down to a multiple of 4096 if that is a
- // negative number, as it appears to be illegal to have a negative offset
- // in the vgpr, even if adding the immediate offset makes it positive.
+ // If the immediate value is too big for the immoffset field, put only bits
+ // that would normally fit in the immoffset field. The remaining value that
+ // is copied/added for the voffset field is a large power of 2, and it
+ // stands more chance of being CSEd with the copy/add for another similar
+ // load/store.
+ // However, do not do that rounding down if that is a negative
+ // number, as it appears to be illegal to have a negative offset in the
+ // vgpr, even if adding the immediate offset makes it positive.
unsigned Overflow = ImmOffset & ~MaxImm;
ImmOffset -= Overflow;
if ((int32_t)Overflow < 0) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index b9b5091ba914d..463d89b087ce9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -7874,6 +7874,8 @@ const MCInstrDesc &SIInstrInfo::getKillTerminatorFromPseudo(unsigned Opcode) con
}
}
+unsigned SIInstrInfo::getMaxMUBUFImmOffset() { return (1 << 12) - 1; }
+
void SIInstrInfo::fixImplicitOperands(MachineInstr &MI) const {
if (!ST.isWave32())
return;
@@ -7906,7 +7908,8 @@ bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const {
// offsets within the given alignment can be added to the resulting ImmOffset.
bool SIInstrInfo::splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset,
uint32_t &ImmOffset, Align Alignment) const {
- const uint32_t MaxImm = alignDown(4095, Alignment.value());
+ const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
+ const uint32_t MaxImm = alignDown(MaxOffset, Alignment.value());
uint32_t Overflow = 0;
if (Imm > MaxImm) {
@@ -7924,8 +7927,8 @@ bool SIInstrInfo::splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset,
//
// Atomic operations fail to work correctly when individual address
// components are unaligned, even if their sum is aligned.
- uint32_t High = (Imm + Alignment.value()) & ~4095;
- uint32_t Low = (Imm + Alignment.value()) & 4095;
+ uint32_t High = (Imm + Alignment.value()) & ~MaxOffset;
+ uint32_t Low = (Imm + Alignment.value()) & MaxOffset;
Imm = Low;
Overflow = High - Alignment.value();
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 9dfa986955061..be1bc0d507e3e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1135,6 +1135,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
return isUInt<12>(Imm);
}
+ static unsigned getMaxMUBUFImmOffset();
+
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
Align Alignment = Align(4)) const;
More information about the llvm-commits
mailing list