[llvm] [AMDGPU] Folding imm offset in more cases for scratch access (PR #70634)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 28 17:43:51 PST 2023
================
@@ -4486,14 +4494,83 @@ bool AMDGPUInstructionSelector::isDSOffset2Legal(Register Base, int64_t Offset0,
return KB->signBitIsZero(Base);
}
+// Return whether the operation has NoUnsignedWrap property.
+bool isNoUnsignedWrap(MachineInstr *Addr) {
+ return Addr->getOpcode() == TargetOpcode::G_OR ||
+ (Addr->getOpcode() == TargetOpcode::G_PTR_ADD &&
+ Addr->getFlag(MachineInstr::NoUWrap));
+};
+
+// Check that the base address of flat scratch load/store in the form of `base +
+// offset` is legal to be put in SGPR/VGPR (i.e. unsigned per hardware
+// requirement). We always treat the first operand as the base address here.
bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
- Register Base, uint64_t FlatVariant) const {
+ Register Addr, uint64_t FlatVariant) const {
if (FlatVariant != SIInstrFlags::FlatScratch)
return true;
- // When value in 32-bit Base can be negative calculate scratch offset using
- // 32-bit add instruction, otherwise use Base(unsigned) + offset.
- return KB->signBitIsZero(Base);
+ auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI);
+ auto *AddrMI = AddrDef->MI;
+
+ if (isNoUnsignedWrap(AddrMI))
+ return true;
+
+ Register LHS = AddrMI->getOperand(1).getReg();
+ Register RHS = AddrMI->getOperand(2).getReg();
+
+ if (AddrMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
+ auto RhsValReg = getIConstantVRegValWithLookThrough(RHS, *MRI);
----------------
arsenm wrote:
I think auto hurts here
https://github.com/llvm/llvm-project/pull/70634
More information about the llvm-commits
mailing list