[llvm] [AMDGPU] Folding imm offset in more cases for scratch access (PR #70634)

Tue Nov 28 17:43:51 PST 2023

================
@@ -4486,14 +4494,83 @@ bool AMDGPUInstructionSelector::isDSOffset2Legal(Register Base, int64_t Offset0,
   return KB->signBitIsZero(Base);
 }
 
+// Return whether the operation has NoUnsignedWrap property.
+bool isNoUnsignedWrap(MachineInstr *Addr) {
+  return Addr->getOpcode() == TargetOpcode::G_OR ||
+         (Addr->getOpcode() == TargetOpcode::G_PTR_ADD &&
+          Addr->getFlag(MachineInstr::NoUWrap));
+};
+
+// Check that the base address of flat scratch load/store in the form of `base +
+// offset` is legal to be put in SGPR/VGPR (i.e. unsigned per hardware
+// requirement). We always treat the first operand as the base address here.
 bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
-    Register Base, uint64_t FlatVariant) const {
+    Register Addr, uint64_t FlatVariant) const {
   if (FlatVariant != SIInstrFlags::FlatScratch)
     return true;
 
-  // When value in 32-bit Base can be negative calculate scratch offset using
-  // 32-bit add instruction, otherwise use Base(unsigned) + offset.
-  return KB->signBitIsZero(Base);
+  auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI);
+  auto *AddrMI = AddrDef->MI;
+
+  if (isNoUnsignedWrap(AddrMI))
+    return true;
+
+  Register LHS = AddrMI->getOperand(1).getReg();
+  Register RHS = AddrMI->getOperand(2).getReg();
+
+  if (AddrMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
+    auto RhsValReg = getIConstantVRegValWithLookThrough(RHS, *MRI);
----------------
arsenm wrote:

I think auto hurts here

https://github.com/llvm/llvm-project/pull/70634