[llvm] f336453 - [AMDGPU][CodeGen] Support (soffset + offset) s_buffer_load's.
Ivan Kosarev via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 5 04:56:36 PDT 2022
Author: Ivan Kosarev
Date: 2022-09-05T12:53:05+01:00
New Revision: f33645301e9d5213b68f3a152cd4d362876157d4
URL: https://github.com/llvm/llvm-project/commit/f33645301e9d5213b68f3a152cd4d362876157d4
DIFF: https://github.com/llvm/llvm-project/commit/f33645301e9d5213b68f3a152cd4d362876157d4.diff
LOG: [AMDGPU][CodeGen] Support (soffset + offset) s_buffer_load's.
Reviewed By: foad
Differential Revision: https://reviews.llvm.org/D130263
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUGISel.td
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/SMInstructions.td
llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 229dfb62ef6ea..aa36045491701 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -149,6 +149,10 @@ def gi_smrd_buffer_imm32 :
GIComplexOperandMatcher<s64, "selectSMRDBufferImm32">,
GIComplexPatternEquiv<SMRDBufferImm32>;
+def gi_smrd_buffer_sgpr_imm :
+ GIComplexOperandMatcher<s64, "selectSMRDBufferSgprImm">,
+ GIComplexPatternEquiv<SMRDBufferSgprImm>;
+
// Separate load nodes are defined to glue m0 initialization in
// SelectionDAG. The GISel selector can just insert m0 initialization
// directly before selecting a glue-less load, so hide this
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index deded50afd6d3..5972e9093ae90 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1884,12 +1884,15 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
return true;
}
-// Match an immediate (if Imm is true) or an SGPR (if Imm is false)
-// offset. If Imm32Only is true, match only 32-bit immediate offsets
-// available on CI.
-bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue Addr, SDValue ByteOffsetNode,
+// Match an immediate (if Offset is not null) or an SGPR (if SOffset is
+// not null) offset. If Imm32Only is true, match only 32-bit immediate
+// offsets available on CI.
+bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
SDValue *SOffset, SDValue *Offset,
- bool Imm32Only) const {
+ bool Imm32Only, bool IsBuffer) const {
+ assert((!SOffset || !Offset) &&
+ "Cannot match both soffset and offset at the same time!");
+
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
if (!C) {
if (!SOffset)
@@ -1909,10 +1912,12 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue Addr, SDValue ByteOffsetNode,
}
SDLoc SL(ByteOffsetNode);
- // GFX9 and GFX10 have signed byte immediate offsets.
- int64_t ByteOffset = C->getSExtValue();
+
+ // GFX9 and GFX10 have signed byte immediate offsets. The immediate
+ // offset for S_BUFFER instructions is unsigned.
+ int64_t ByteOffset = IsBuffer ? C->getZExtValue() : C->getSExtValue();
Optional<int64_t> EncodedOffset =
- AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false);
+ AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, IsBuffer);
if (EncodedOffset && Offset && !Imm32Only) {
*Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
return true;
@@ -1966,16 +1971,15 @@ SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
Ops), 0);
}
-// Match a base and an immediate (if Imm is true) or an SGPR
-// (if Imm is false) offset. If Imm32Only is true, match only 32-bit
-// immediate offsets available on CI.
+// Match a base and an immediate (if Offset is not null) or an SGPR (if
+// SOffset is not null) or an immediate+SGPR offset. If Imm32Only is
+// true, match only 32-bit immediate offsets available on CI.
bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
SDValue *SOffset, SDValue *Offset,
- bool Imm32Only) const {
- SDLoc SL(Addr);
-
+ bool Imm32Only,
+ bool IsBuffer) const {
if (SOffset && Offset) {
- assert(!Imm32Only);
+ assert(!Imm32Only && !IsBuffer);
SDValue B;
return SelectSMRDBaseOffset(Addr, B, nullptr, Offset) &&
SelectSMRDBaseOffset(B, SBase, SOffset, nullptr);
@@ -1983,32 +1987,25 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
// A 32-bit (address + offset) should not cause unsigned 32-bit integer
// wraparound, because s_load instructions perform the addition in 64 bits.
- if ((Addr.getValueType() != MVT::i32 ||
- Addr->getFlags().hasNoUnsignedWrap())) {
- SDValue N0, N1;
- // Extract the base and offset if possible.
- if (CurDAG->isBaseWithConstantOffset(Addr) ||
- Addr.getOpcode() == ISD::ADD) {
- N0 = Addr.getOperand(0);
- N1 = Addr.getOperand(1);
- } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
- assert(N0 && N1 && isa<ConstantSDNode>(N1));
- }
- if (N0 && N1) {
- if (SelectSMRDOffset(N0, N1, SOffset, Offset, Imm32Only)) {
- SBase = N0;
- return true;
- }
- if (SelectSMRDOffset(N1, N0, SOffset, Offset, Imm32Only)) {
- SBase = N1;
- return true;
- }
- }
+ if (Addr.getValueType() == MVT::i32 && !Addr->getFlags().hasNoUnsignedWrap())
return false;
+
+ SDValue N0, N1;
+ // Extract the base and offset if possible.
+ if (CurDAG->isBaseWithConstantOffset(Addr) || Addr.getOpcode() == ISD::ADD) {
+ N0 = Addr.getOperand(0);
+ N1 = Addr.getOperand(1);
+ } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
+ assert(N0 && N1 && isa<ConstantSDNode>(N1));
}
- if (Offset && !SOffset) {
- SBase = Addr;
- *Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
+ if (!N0 || !N1)
+ return false;
+ if (SelectSMRDOffset(N1, SOffset, Offset, Imm32Only, IsBuffer)) {
+ SBase = N0;
+ return true;
+ }
+ if (SelectSMRDOffset(N0, SOffset, Offset, Imm32Only, IsBuffer)) {
+ SBase = N1;
return true;
}
return false;
@@ -2017,10 +2014,18 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
SDValue *SOffset, SDValue *Offset,
bool Imm32Only) const {
- if (!SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only))
- return false;
- SBase = Expand32BitAddress(SBase);
- return true;
+ if (SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only)) {
+ SBase = Expand32BitAddress(SBase);
+ return true;
+ }
+
+ if (Addr.getValueType() == MVT::i32 && Offset && !SOffset) {
+ SBase = Expand32BitAddress(Addr);
+ *Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+ return true;
+ }
+
+ return false;
}
bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
@@ -2046,33 +2051,26 @@ bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDValue Addr, SDValue &SBase,
return SelectSMRD(Addr, SBase, &SOffset, &Offset);
}
-bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
- SDValue &Offset) const {
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
- // The immediate offset for S_BUFFER instructions is unsigned.
- if (auto Imm =
- AMDGPU::getSMRDEncodedOffset(*Subtarget, C->getZExtValue(), true)) {
- Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32);
- return true;
- }
- }
-
- return false;
+bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue N, SDValue &Offset) const {
+ return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,
+ /* Imm32Only */ false, /* IsBuffer */ true);
}
-bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
+bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue N,
SDValue &Offset) const {
assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
-
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
- if (auto Imm = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget,
- C->getZExtValue())) {
- Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32);
- return true;
- }
- }
-
- return false;
+ return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,
+ /* Imm32Only */ true, /* IsBuffer */ true);
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
+ SDValue &Offset) const {
+ // Match the (soffset + offset) pair as a 32-bit register base and
+ // an immediate offset.
+ return N.getValueType() == MVT::i32 &&
+ SelectSMRDBaseOffset(N, /* SBase */ SOffset, /* SOffset*/ nullptr,
+ &Offset, /* Imm32Only */ false,
+ /* IsBuffer */ true);
}
bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index fda2bfac71fcf..b5e39f6ed777f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -193,11 +193,13 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &SAddr, SDValue &Offset) const;
- bool SelectSMRDOffset(SDValue Base, SDValue ByteOffsetNode, SDValue *SOffset,
- SDValue *Offset, bool Imm32Only = false) const;
+ bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue *SOffset,
+ SDValue *Offset, bool Imm32Only = false,
+ bool IsBuffer = false) const;
SDValue Expand32BitAddress(SDValue Addr) const;
bool SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset,
- SDValue *Offset, bool Imm32Only = false) const;
+ SDValue *Offset, bool Imm32Only = false,
+ bool IsBuffer = false) const;
bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue *SOffset,
SDValue *Offset, bool Imm32Only = false) const;
bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
@@ -205,8 +207,10 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &SOffset) const;
bool SelectSMRDSgprImm(SDValue Addr, SDValue &SBase, SDValue &SOffset,
SDValue &Offset) const;
- bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
- bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
+ bool SelectSMRDBufferImm(SDValue N, SDValue &Offset) const;
+ bool SelectSMRDBufferImm32(SDValue N, SDValue &Offset) const;
+ bool SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
+ SDValue &Offset) const;
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index c1468bd71c8a3..d85bf107f34e8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4911,6 +4911,27 @@ AMDGPUInstructionSelector::selectSMRDBufferImm32(MachineOperand &Root) const {
return {{ [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); } }};
}
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectSMRDBufferSgprImm(MachineOperand &Root) const {
+ // Match the (soffset + offset) pair as a 32-bit register base and
+ // an immediate offset.
+ Register SOffset;
+ unsigned Offset;
+ std::tie(SOffset, Offset) =
+ AMDGPU::getBaseWithConstantOffset(*MRI, Root.getReg());
+ if (!SOffset)
+ return None;
+
+ Optional<int64_t> EncodedOffset =
+ AMDGPU::getSMRDEncodedOffset(STI, Offset, /* IsBuffer */ true);
+ if (!EncodedOffset)
+ return None;
+
+ assert(MRI->getType(SOffset) == LLT::scalar(32));
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedOffset); }}};
+}
+
void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 5baf55d234802..d8caaed2810ff 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -294,6 +294,7 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
ComplexRendererFns selectSMRDBufferImm(MachineOperand &Root) const;
ComplexRendererFns selectSMRDBufferImm32(MachineOperand &Root) const;
+ ComplexRendererFns selectSMRDBufferSgprImm(MachineOperand &Root) const;
void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx = -1) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 04f413e463840..ba94c597d33dc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1806,6 +1806,7 @@ AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B,
unsigned ImmOffset;
const LLT S32 = LLT::scalar(32);
+ // TODO: Use AMDGPU::getBaseWithConstantOffset() instead.
std::tie(BaseReg, ImmOffset) = getBaseWithConstantOffset(*B.getMRI(),
OrigOffset);
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 4699b0ecf2ecb..f271f6d42857d 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -834,6 +834,7 @@ def SMRDSgpr : ComplexPattern<iPTR, 2, "SelectSMRDSgpr">;
def SMRDSgprImm : ComplexPattern<iPTR, 3, "SelectSMRDSgprImm">;
def SMRDBufferImm : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm">;
def SMRDBufferImm32 : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm32">;
+def SMRDBufferSgprImm : ComplexPattern<iPTR, 2, "SelectSMRDBufferSgprImm">;
multiclass SMRD_Pattern <string Instr, ValueType vt> {
@@ -889,9 +890,18 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
// 3. Offset loaded in an 32bit SGPR
def : GCNPat <
- (SIsbuffer_load v4i32:$sbase, i32:$offset, timm:$cachepolicy),
- (vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$offset, (extract_cpol $cachepolicy)))
+ (SIsbuffer_load v4i32:$sbase, i32:$soffset, timm:$cachepolicy),
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$soffset, (extract_cpol $cachepolicy)))
>;
+
+ // 4. Offset as an 32-bit SGPR + immediate
+ def : GCNPat <
+ (SIsbuffer_load v4i32:$sbase, (SMRDBufferSgprImm i32:$soffset, i32:$offset),
+ timm:$cachepolicy),
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, i32imm:$offset,
+ (extract_cpol $cachepolicy)))> {
+ let OtherPredicates = [isGFX9Plus];
+ }
}
// Global and constant loads can be selected to either MUBUF or SMRD
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
index 1858dafab3674..bcf446eea0b75 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
@@ -88,8 +88,32 @@ define amdgpu_ps void @test_sgpr_plus_imm_offset_x2(i8 addrspace(4)* inreg %base
ret void
}
+; GCN-LABEL: name: test_buffer_load_sgpr_plus_imm_offset
+; SDAG-DAG: %[[BASE0:.*]]:sgpr_32 = COPY $sgpr0
+; SDAG-DAG: %[[BASE1:.*]]:sgpr_32 = COPY $sgpr1
+; SDAG-DAG: %[[BASE2:.*]]:sgpr_32 = COPY $sgpr2
+; SDAG-DAG: %[[BASE3:.*]]:sgpr_32 = COPY $sgpr3
+; SDAG-DAG: %[[OFFSET:.*]]:sgpr_32 = COPY $sgpr4
+; SDAG-DAG: %[[BASE:.*]]:sgpr_128 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1, %[[BASE2]], %subreg.sub2, %[[BASE3]], %subreg.sub3
+; SDAG: S_BUFFER_LOAD_DWORD_SGPR_IMM killed %[[BASE]], %[[OFFSET]], 77,
+; GISEL-DAG: %[[BASE0:.*]]:sreg_32 = COPY $sgpr0
+; GISEL-DAG: %[[BASE1:.*]]:sreg_32 = COPY $sgpr1
+; GISEL-DAG: %[[BASE2:.*]]:sreg_32 = COPY $sgpr2
+; GISEL-DAG: %[[BASE3:.*]]:sreg_32 = COPY $sgpr3
+; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = COPY $sgpr4
+; GISEL-DAG: %[[BASE:.*]]:sgpr_128 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1, %[[BASE2]], %subreg.sub2, %[[BASE3]], %subreg.sub3
+; GISEL: S_BUFFER_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[OFFSET]], 77,
+define amdgpu_cs void @test_buffer_load_sgpr_plus_imm_offset(<4 x i32> inreg %base, i32 inreg %i, i32 addrspace(1)* inreg %out) {
+ %off = add nuw nsw i32 %i, 77
+ %v = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %base, i32 %off, i32 0)
+ store i32 %v, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32 immarg) #1
+declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg) nounwind readnone willreturn
+
; Function Attrs: nounwind readnone speculatable
declare i32 @llvm.amdgcn.reloc.constant(metadata) #3
More information about the llvm-commits
mailing list