[llvm] f336453 - [AMDGPU][CodeGen] Support (soffset + offset) s_buffer_load's.

Mon Sep 5 04:56:36 PDT 2022

Author: Ivan Kosarev
Date: 2022-09-05T12:53:05+01:00
New Revision: f33645301e9d5213b68f3a152cd4d362876157d4

URL: https://github.com/llvm/llvm-project/commit/f33645301e9d5213b68f3a152cd4d362876157d4
DIFF: https://github.com/llvm/llvm-project/commit/f33645301e9d5213b68f3a152cd4d362876157d4.diff

LOG: [AMDGPU][CodeGen] Support (soffset + offset) s_buffer_load's.

Reviewed By: foad

Differential Revision: https://reviews.llvm.org/D130263

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUGISel.td
    llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
    llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
    llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
    llvm/lib/Target/AMDGPU/SMInstructions.td
    llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 229dfb62ef6ea..aa36045491701 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -149,6 +149,10 @@ def gi_smrd_buffer_imm32 :
     GIComplexOperandMatcher<s64, "selectSMRDBufferImm32">,
     GIComplexPatternEquiv<SMRDBufferImm32>;
 
+def gi_smrd_buffer_sgpr_imm :
+    GIComplexOperandMatcher<s64, "selectSMRDBufferSgprImm">,
+    GIComplexPatternEquiv<SMRDBufferSgprImm>;
+
 // Separate load nodes are defined to glue m0 initialization in
 // SelectionDAG. The GISel selector can just insert m0 initialization
 // directly before selecting a glue-less load, so hide this

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index deded50afd6d3..5972e9093ae90 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1884,12 +1884,15 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
   return true;
 }
 
-// Match an immediate (if Imm is true) or an SGPR (if Imm is false)
-// offset. If Imm32Only is true, match only 32-bit immediate offsets
-// available on CI.
-bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue Addr, SDValue ByteOffsetNode,
+// Match an immediate (if Offset is not null) or an SGPR (if SOffset is
+// not null) offset. If Imm32Only is true, match only 32-bit immediate
+// offsets available on CI.
+bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
                                           SDValue *SOffset, SDValue *Offset,
-                                          bool Imm32Only) const {
+                                          bool Imm32Only, bool IsBuffer) const {
+  assert((!SOffset || !Offset) &&
+         "Cannot match both soffset and offset at the same time!");
+
   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
   if (!C) {
     if (!SOffset)
@@ -1909,10 +1912,12 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue Addr, SDValue ByteOffsetNode,
   }
 
   SDLoc SL(ByteOffsetNode);
-  // GFX9 and GFX10 have signed byte immediate offsets.
-  int64_t ByteOffset = C->getSExtValue();
+
+  // GFX9 and GFX10 have signed byte immediate offsets. The immediate
+  // offset for S_BUFFER instructions is unsigned.
+  int64_t ByteOffset = IsBuffer ? C->getZExtValue() : C->getSExtValue();
   Optional<int64_t> EncodedOffset =
-      AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false);
+      AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, IsBuffer);
   if (EncodedOffset && Offset && !Imm32Only) {
     *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
     return true;
@@ -1966,16 +1971,15 @@ SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
                                         Ops), 0);
 }
 
-// Match a base and an immediate (if Imm is true) or an SGPR
-// (if Imm is false) offset. If Imm32Only is true, match only 32-bit
-// immediate offsets available on CI.
+// Match a base and an immediate (if Offset is not null) or an SGPR (if
+// SOffset is not null) or an immediate+SGPR offset. If Imm32Only is
+// true, match only 32-bit immediate offsets available on CI.
 bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
                                               SDValue *SOffset, SDValue *Offset,
-                                              bool Imm32Only) const {
-  SDLoc SL(Addr);
-
+                                              bool Imm32Only,
+                                              bool IsBuffer) const {
   if (SOffset && Offset) {
-    assert(!Imm32Only);
+    assert(!Imm32Only && !IsBuffer);
     SDValue B;
     return SelectSMRDBaseOffset(Addr, B, nullptr, Offset) &&
            SelectSMRDBaseOffset(B, SBase, SOffset, nullptr);
@@ -1983,32 +1987,25 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
 
   // A 32-bit (address + offset) should not cause unsigned 32-bit integer
   // wraparound, because s_load instructions perform the addition in 64 bits.
-  if ((Addr.getValueType() != MVT::i32 ||
-       Addr->getFlags().hasNoUnsignedWrap())) {
-    SDValue N0, N1;
-    // Extract the base and offset if possible.
-    if (CurDAG->isBaseWithConstantOffset(Addr) ||
-        Addr.getOpcode() == ISD::ADD) {
-      N0 = Addr.getOperand(0);
-      N1 = Addr.getOperand(1);
-    } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
-      assert(N0 && N1 && isa<ConstantSDNode>(N1));
-    }
-    if (N0 && N1) {
-      if (SelectSMRDOffset(N0, N1, SOffset, Offset, Imm32Only)) {
-        SBase = N0;
-        return true;
-      }
-      if (SelectSMRDOffset(N1, N0, SOffset, Offset, Imm32Only)) {
-        SBase = N1;
-        return true;
-      }
-    }
+  if (Addr.getValueType() == MVT::i32 && !Addr->getFlags().hasNoUnsignedWrap())
     return false;
+
+  SDValue N0, N1;
+  // Extract the base and offset if possible.
+  if (CurDAG->isBaseWithConstantOffset(Addr) || Addr.getOpcode() == ISD::ADD) {
+    N0 = Addr.getOperand(0);
+    N1 = Addr.getOperand(1);
+  } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
+    assert(N0 && N1 && isa<ConstantSDNode>(N1));
   }
-  if (Offset && !SOffset) {
-    SBase = Addr;
-    *Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
+  if (!N0 || !N1)
+    return false;
+  if (SelectSMRDOffset(N1, SOffset, Offset, Imm32Only, IsBuffer)) {
+    SBase = N0;
+    return true;
+  }
+  if (SelectSMRDOffset(N0, SOffset, Offset, Imm32Only, IsBuffer)) {
+    SBase = N1;
     return true;
   }
   return false;
@@ -2017,10 +2014,18 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
                                     SDValue *SOffset, SDValue *Offset,
                                     bool Imm32Only) const {
-  if (!SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only))
-    return false;
-  SBase = Expand32BitAddress(SBase);
-  return true;
+  if (SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only)) {
+    SBase = Expand32BitAddress(SBase);
+    return true;
+  }
+
+  if (Addr.getValueType() == MVT::i32 && Offset && !SOffset) {
+    SBase = Expand32BitAddress(Addr);
+    *Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+    return true;
+  }
+
+  return false;
 }
 
 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
@@ -2046,33 +2051,26 @@ bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDValue Addr, SDValue &SBase,
   return SelectSMRD(Addr, SBase, &SOffset, &Offset);
 }
 
-bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
-                                             SDValue &Offset) const {
-  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
-    // The immediate offset for S_BUFFER instructions is unsigned.
-    if (auto Imm =
-            AMDGPU::getSMRDEncodedOffset(*Subtarget, C->getZExtValue(), true)) {
-      Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32);
-      return true;
-    }
-  }
-
-  return false;
+bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue N, SDValue &Offset) const {
+  return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,
+                          /* Imm32Only */ false, /* IsBuffer */ true);
 }
 
-bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
+bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue N,
                                                SDValue &Offset) const {
   assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
-
-  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
-    if (auto Imm = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget,
-                                                         C->getZExtValue())) {
-      Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32);
-      return true;
-    }
-  }
-
-  return false;
+  return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,
+                          /* Imm32Only */ true, /* IsBuffer */ true);
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
+                                                 SDValue &Offset) const {
+  // Match the (soffset + offset) pair as a 32-bit register base and
+  // an immediate offset.
+  return N.getValueType() == MVT::i32 &&
+         SelectSMRDBaseOffset(N, /* SBase */ SOffset, /* SOffset*/ nullptr,
+                              &Offset, /* Imm32Only */ false,
+                              /* IsBuffer */ true);
 }
 
 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index fda2bfac71fcf..b5e39f6ed777f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -193,11 +193,13 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr,
                            SDValue &SAddr, SDValue &Offset) const;
 
-  bool SelectSMRDOffset(SDValue Base, SDValue ByteOffsetNode, SDValue *SOffset,
-                        SDValue *Offset, bool Imm32Only = false) const;
+  bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue *SOffset,
+                        SDValue *Offset, bool Imm32Only = false,
+                        bool IsBuffer = false) const;
   SDValue Expand32BitAddress(SDValue Addr) const;
   bool SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset,
-                            SDValue *Offset, bool Imm32Only = false) const;
+                            SDValue *Offset, bool Imm32Only = false,
+                            bool IsBuffer = false) const;
   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue *SOffset,
                   SDValue *Offset, bool Imm32Only = false) const;
   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
@@ -205,8 +207,10 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &SOffset) const;
   bool SelectSMRDSgprImm(SDValue Addr, SDValue &SBase, SDValue &SOffset,
                          SDValue &Offset) const;
-  bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
-  bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
+  bool SelectSMRDBufferImm(SDValue N, SDValue &Offset) const;
+  bool SelectSMRDBufferImm32(SDValue N, SDValue &Offset) const;
+  bool SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
+                               SDValue &Offset) const;
   bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
 
   bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index c1468bd71c8a3..d85bf107f34e8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4911,6 +4911,27 @@ AMDGPUInstructionSelector::selectSMRDBufferImm32(MachineOperand &Root) const {
   return {{ [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); }  }};
 }
 
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectSMRDBufferSgprImm(MachineOperand &Root) const {
+  // Match the (soffset + offset) pair as a 32-bit register base and
+  // an immediate offset.
+  Register SOffset;
+  unsigned Offset;
+  std::tie(SOffset, Offset) =
+      AMDGPU::getBaseWithConstantOffset(*MRI, Root.getReg());
+  if (!SOffset)
+    return None;
+
+  Optional<int64_t> EncodedOffset =
+      AMDGPU::getSMRDEncodedOffset(STI, Offset, /* IsBuffer */ true);
+  if (!EncodedOffset)
+    return None;
+
+  assert(MRI->getType(SOffset) == LLT::scalar(32));
+  return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); },
+           [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedOffset); }}};
+}
+
 void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
                                                  const MachineInstr &MI,
                                                  int OpIdx) const {

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 5baf55d234802..d8caaed2810ff 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -294,6 +294,7 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
 
   ComplexRendererFns selectSMRDBufferImm(MachineOperand &Root) const;
   ComplexRendererFns selectSMRDBufferImm32(MachineOperand &Root) const;
+  ComplexRendererFns selectSMRDBufferSgprImm(MachineOperand &Root) const;
 
   void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
                         int OpIdx = -1) const;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 04f413e463840..ba94c597d33dc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1806,6 +1806,7 @@ AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B,
   unsigned ImmOffset;
   const LLT S32 = LLT::scalar(32);
 
+  // TODO: Use AMDGPU::getBaseWithConstantOffset() instead.
   std::tie(BaseReg, ImmOffset) = getBaseWithConstantOffset(*B.getMRI(),
                                                            OrigOffset);
 

diff  --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 4699b0ecf2ecb..f271f6d42857d 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -834,6 +834,7 @@ def SMRDSgpr        : ComplexPattern<iPTR, 2, "SelectSMRDSgpr">;
 def SMRDSgprImm     : ComplexPattern<iPTR, 3, "SelectSMRDSgprImm">;
 def SMRDBufferImm   : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm">;
 def SMRDBufferImm32 : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm32">;
+def SMRDBufferSgprImm : ComplexPattern<iPTR, 2, "SelectSMRDBufferSgprImm">;
 
 multiclass SMRD_Pattern <string Instr, ValueType vt> {
 
@@ -889,9 +890,18 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
 
   // 3. Offset loaded in an 32bit SGPR
   def : GCNPat <
-    (SIsbuffer_load v4i32:$sbase, i32:$offset, timm:$cachepolicy),
-    (vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$offset, (extract_cpol $cachepolicy)))
+    (SIsbuffer_load v4i32:$sbase, i32:$soffset, timm:$cachepolicy),
+    (vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$soffset, (extract_cpol $cachepolicy)))
   >;
+
+  // 4. Offset as an 32-bit SGPR + immediate
+  def : GCNPat <
+    (SIsbuffer_load v4i32:$sbase, (SMRDBufferSgprImm i32:$soffset, i32:$offset),
+                    timm:$cachepolicy),
+    (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, i32imm:$offset,
+                                             (extract_cpol $cachepolicy)))> {
+    let OtherPredicates = [isGFX9Plus];
+  }
 }
 
 // Global and constant loads can be selected to either MUBUF or SMRD

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
index 1858dafab3674..bcf446eea0b75 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
@@ -88,8 +88,32 @@ define amdgpu_ps void @test_sgpr_plus_imm_offset_x2(i8 addrspace(4)* inreg %base
   ret void
 }
 
+; GCN-LABEL: name: test_buffer_load_sgpr_plus_imm_offset
+; SDAG-DAG: %[[BASE0:.*]]:sgpr_32 = COPY $sgpr0
+; SDAG-DAG: %[[BASE1:.*]]:sgpr_32 = COPY $sgpr1
+; SDAG-DAG: %[[BASE2:.*]]:sgpr_32 = COPY $sgpr2
+; SDAG-DAG: %[[BASE3:.*]]:sgpr_32 = COPY $sgpr3
+; SDAG-DAG: %[[OFFSET:.*]]:sgpr_32 = COPY $sgpr4
+; SDAG-DAG: %[[BASE:.*]]:sgpr_128 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1, %[[BASE2]], %subreg.sub2, %[[BASE3]], %subreg.sub3
+; SDAG: S_BUFFER_LOAD_DWORD_SGPR_IMM killed %[[BASE]], %[[OFFSET]], 77,
+; GISEL-DAG: %[[BASE0:.*]]:sreg_32 = COPY $sgpr0
+; GISEL-DAG: %[[BASE1:.*]]:sreg_32 = COPY $sgpr1
+; GISEL-DAG: %[[BASE2:.*]]:sreg_32 = COPY $sgpr2
+; GISEL-DAG: %[[BASE3:.*]]:sreg_32 = COPY $sgpr3
+; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = COPY $sgpr4
+; GISEL-DAG: %[[BASE:.*]]:sgpr_128 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1, %[[BASE2]], %subreg.sub2, %[[BASE3]], %subreg.sub3
+; GISEL: S_BUFFER_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[OFFSET]], 77,
+define amdgpu_cs void @test_buffer_load_sgpr_plus_imm_offset(<4 x i32> inreg %base, i32 inreg %i, i32 addrspace(1)* inreg %out) {
+  %off = add nuw nsw i32 %i, 77
+  %v = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %base, i32 %off, i32 0)
+  store i32 %v, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
 declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32 immarg) #1
 
+declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg) nounwind readnone willreturn
+
 ; Function Attrs: nounwind readnone speculatable
 declare i32 @llvm.amdgcn.reloc.constant(metadata) #3