[llvm] 432cbd7 - [AMDGPU][CodeGen] Support (register + immediate) SMRD offsets.
Ivan Kosarev via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 18 03:31:08 PDT 2022
Author: Ivan Kosarev
Date: 2022-07-18T11:29:31+01:00
New Revision: 432cbd7827202e4316225820f3f90417b9fcce6c
URL: https://github.com/llvm/llvm-project/commit/432cbd7827202e4316225820f3f90417b9fcce6c
DIFF: https://github.com/llvm/llvm-project/commit/432cbd7827202e4316225820f3f90417b9fcce6c.diff
LOG: [AMDGPU][CodeGen] Support (register + immediate) SMRD offsets.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D129381
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUGISel.td
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SMInstructions.td
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 5747fc0ca8e60..229dfb62ef6ea 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -88,6 +88,10 @@ def gi_smrd_sgpr :
GIComplexOperandMatcher<s64, "selectSmrdSgpr">,
GIComplexPatternEquiv<SMRDSgpr>;
+def gi_smrd_sgpr_imm :
+ GIComplexOperandMatcher<s64, "selectSmrdSgprImm">,
+ GIComplexPatternEquiv<SMRDSgprImm>;
+
def gi_flat_offset :
GIComplexOperandMatcher<s64, "selectFlatOffset">,
GIComplexPatternEquiv<FlatOffset>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 2c171be433725..86c2c6d94d7c7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1886,21 +1886,21 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
// Match an immediate (if Imm is true) or an SGPR (if Imm is false)
// offset. If Imm32Only is true, match only 32-bit immediate offsets
// available on CI.
-bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
- SDValue &Offset, bool Imm,
+bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue Addr, SDValue ByteOffsetNode,
+ SDValue *SOffset, SDValue *Offset,
bool Imm32Only) const {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
if (!C) {
- if (Imm)
+ if (!SOffset)
return false;
if (ByteOffsetNode.getValueType().isScalarInteger() &&
ByteOffsetNode.getValueType().getSizeInBits() == 32) {
- Offset = ByteOffsetNode;
+ *SOffset = ByteOffsetNode;
return true;
}
if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
- Offset = ByteOffsetNode.getOperand(0);
+ *SOffset = ByteOffsetNode.getOperand(0);
return true;
}
}
@@ -1912,8 +1912,8 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
int64_t ByteOffset = C->getSExtValue();
Optional<int64_t> EncodedOffset =
AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false);
- if (EncodedOffset && Imm && !Imm32Only) {
- Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
+ if (EncodedOffset && Offset && !Imm32Only) {
+ *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
return true;
}
@@ -1922,17 +1922,17 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
return false;
EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
- if (EncodedOffset && Imm32Only) {
- Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
+ if (EncodedOffset && Offset && Imm32Only) {
+ *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
return true;
}
if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
return false;
- if (!Imm) {
+ if (SOffset) {
SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
- Offset = SDValue(
+ *SOffset = SDValue(
CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
return true;
}
@@ -1968,11 +1968,18 @@ SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
// Match a base and an immediate (if Imm is true) or an SGPR
// (if Imm is false) offset. If Imm32Only is true, match only 32-bit
// immediate offsets available on CI.
-bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
- SDValue &Offset, bool Imm,
- bool Imm32Only) const {
+bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
+ SDValue *SOffset, SDValue *Offset,
+ bool Imm32Only) const {
SDLoc SL(Addr);
+ if (SOffset && Offset) {
+ assert(!Imm32Only);
+ SDValue B;
+ return SelectSMRDBaseOffset(Addr, B, nullptr, Offset) &&
+ SelectSMRDBaseOffset(B, SBase, SOffset, nullptr);
+ }
+
// A 32-bit (address + offset) should not cause unsigned 32-bit integer
// wraparound, because s_load instructions perform the addition in 64 bits.
if ((Addr.getValueType() != MVT::i32 ||
@@ -1987,38 +1994,55 @@ bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
assert(N0 && N1 && isa<ConstantSDNode>(N1));
}
if (N0 && N1) {
- if (SelectSMRDOffset(N1, Offset, Imm, Imm32Only)) {
- SBase = Expand32BitAddress(N0);
+ if (SelectSMRDOffset(N0, N1, SOffset, Offset, Imm32Only)) {
+ SBase = N0;
return true;
}
- if (SelectSMRDOffset(N0, Offset, Imm, Imm32Only)) {
- SBase = Expand32BitAddress(N1);
+ if (SelectSMRDOffset(N1, N0, SOffset, Offset, Imm32Only)) {
+ SBase = N1;
return true;
}
}
return false;
}
- if (!Imm)
+ if (Offset && !SOffset) {
+ SBase = Addr;
+ *Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
+ return true;
+ }
+ return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
+ SDValue *SOffset, SDValue *Offset,
+ bool Imm32Only) const {
+ if (!SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only))
return false;
- SBase = Expand32BitAddress(Addr);
- Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
+ SBase = Expand32BitAddress(SBase);
return true;
}
bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
SDValue &Offset) const {
- return SelectSMRD(Addr, SBase, Offset, /* Imm */ true);
+ return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset);
}
bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
SDValue &Offset) const {
assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
- return SelectSMRD(Addr, SBase, Offset, /* Imm */ true, /* Imm32Only */ true);
+ return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset,
+ /* Imm32Only */ true);
}
bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
- SDValue &Offset) const {
- return SelectSMRD(Addr, SBase, Offset, /* Imm */ false);
+ SDValue &SOffset) const {
+ return SelectSMRD(Addr, SBase, &SOffset, /* Offset */ nullptr);
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDValue Addr, SDValue &SBase,
+ SDValue &SOffset,
+ SDValue &Offset) const {
+ return SelectSMRD(Addr, SBase, &SOffset, &Offset);
}
bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 7894b8eb5b67f..fda2bfac71fcf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -193,14 +193,18 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &SAddr, SDValue &Offset) const;
- bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool Imm,
- bool Imm32Only) const;
+ bool SelectSMRDOffset(SDValue Base, SDValue ByteOffsetNode, SDValue *SOffset,
+ SDValue *Offset, bool Imm32Only = false) const;
SDValue Expand32BitAddress(SDValue Addr) const;
- bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, bool Imm,
- bool Imm32Only = false) const;
+ bool SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset,
+ SDValue *Offset, bool Imm32Only = false) const;
+ bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue *SOffset,
+ SDValue *Offset, bool Imm32Only = false) const;
bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
- bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
+ bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &SOffset) const;
+ bool SelectSMRDSgprImm(SDValue Addr, SDValue &SBase, SDValue &SOffset,
+ SDValue &Offset) const;
bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 70fae9d784a21..278a85c41ba97 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2361,7 +2361,7 @@ void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
if (PtrMI->getOpcode() != TargetOpcode::G_PTR_ADD)
return;
- GEPInfo GEPInfo(*PtrMI);
+ GEPInfo GEPInfo;
for (unsigned i = 1; i != 3; ++i) {
const MachineOperand &GEPOp = PtrMI->getOperand(i);
@@ -3800,25 +3800,82 @@ AMDGPUInstructionSelector::selectVINTERPModsHi(MachineOperand &Root) const {
}};
}
-InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
+bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root,
+ Register &Base,
+ Register *SOffset,
+ int64_t *Offset) const {
+ MachineInstr *MI = Root.getParent();
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
+ // then we can select all ptr + 32-bit offsets.
SmallVector<GEPInfo, 4> AddrInfo;
- getAddrModeInfo(*Root.getParent(), *MRI, AddrInfo);
+ getAddrModeInfo(*MI, *MRI, AddrInfo);
- if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
- return None;
+ if (AddrInfo.empty())
+ return false;
- const GEPInfo &GEPInfo = AddrInfo[0];
+ const GEPInfo &GEPI = AddrInfo[0];
Optional<int64_t> EncodedImm =
- AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm, false);
- if (!EncodedImm)
+ AMDGPU::getSMRDEncodedOffset(STI, GEPI.Imm, false);
+
+ if (SOffset && Offset) {
+ if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
+ AddrInfo.size() > 1) {
+ const GEPInfo &GEPI2 = AddrInfo[1];
+ if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
+ if (Register OffsetReg =
+ matchZeroExtendFromS32(*MRI, GEPI2.SgprParts[1])) {
+ Base = GEPI2.SgprParts[0];
+ *SOffset = OffsetReg;
+ *Offset = *EncodedImm;
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ if (Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
+ Base = GEPI.SgprParts[0];
+ *Offset = *EncodedImm;
+ return true;
+ }
+
+ // SGPR offset is unsigned.
+ if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) &&
+ GEPI.Imm != 0) {
+ // If we make it this far we have a load with an 32-bit immediate offset.
+ // It is OK to select this using a sgpr offset, because we have already
+ // failed trying to select this load into one of the _IMM variants since
+ // the _IMM Patterns are considered before the _SGPR patterns.
+ Base = GEPI.SgprParts[0];
+ *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
+ .addImm(GEPI.Imm);
+ return true;
+ }
+
+ if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
+ if (Register OffsetReg = matchZeroExtendFromS32(*MRI, GEPI.SgprParts[1])) {
+ Base = GEPI.SgprParts[0];
+ *SOffset = OffsetReg;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
+ Register Base;
+ int64_t Offset;
+ if (!selectSmrdOffset(Root, Base, /* SOffset= */ nullptr, &Offset))
return None;
- unsigned PtrReg = GEPInfo.SgprParts[0];
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); }
- }};
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }}};
}
InstructionSelector::ComplexRendererFns
@@ -3844,43 +3901,24 @@ AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
- MachineInstr *MI = Root.getParent();
- MachineBasicBlock *MBB = MI->getParent();
-
- SmallVector<GEPInfo, 4> AddrInfo;
- getAddrModeInfo(*MI, *MRI, AddrInfo);
-
- // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
- // then we can select all ptr + 32-bit offsets.
- if (AddrInfo.empty())
+ Register Base, SOffset;
+ if (!selectSmrdOffset(Root, Base, &SOffset, /* Offset= */ nullptr))
return None;
- const GEPInfo &GEPInfo = AddrInfo[0];
- Register PtrReg = GEPInfo.SgprParts[0];
-
- // SGPR offset is unsigned.
- if (AddrInfo[0].SgprParts.size() == 1 && isUInt<32>(GEPInfo.Imm) &&
- GEPInfo.Imm != 0) {
- // If we make it this far we have a load with an 32-bit immediate offset.
- // It is OK to select this using a sgpr offset, because we have already
- // failed trying to select this load into one of the _IMM variants since
- // the _IMM Patterns are considered before the _SGPR patterns.
- Register OffsetReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
- BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
- .addImm(GEPInfo.Imm);
- return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
- [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }}};
- }
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); }}};
+}
- if (AddrInfo[0].SgprParts.size() == 2 && GEPInfo.Imm == 0) {
- if (Register OffsetReg =
- matchZeroExtendFromS32(*MRI, GEPInfo.SgprParts[1])) {
- return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
- [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }}};
- }
- }
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectSmrdSgprImm(MachineOperand &Root) const {
+ Register Base, SOffset;
+ int64_t Offset;
+ if (!selectSmrdOffset(Root, Base, &SOffset, &Offset))
+ return None;
- return None;
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }}};
}
std::pair<Register, int>
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 22672ba59e766..5baf55d234802 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -63,11 +63,9 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
private:
struct GEPInfo {
- const MachineInstr &GEP;
SmallVector<unsigned, 2> SgprParts;
SmallVector<unsigned, 2> VgprParts;
- int64_t Imm;
- GEPInfo(const MachineInstr &GEP) : GEP(GEP), Imm(0) { }
+ int64_t Imm = 0;
};
bool isSGPR(Register Reg) const;
@@ -200,12 +198,16 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
InstructionSelector::ComplexRendererFns
selectVINTERPModsHi(MachineOperand &Root) const;
+ bool selectSmrdOffset(MachineOperand &Root, Register &Base, Register *SOffset,
+ int64_t *Offset) const;
InstructionSelector::ComplexRendererFns
selectSmrdImm(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectSmrdImm32(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectSmrdSgpr(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectSmrdSgprImm(MachineOperand &Root) const;
std::pair<Register, int> selectFlatOffsetImpl(MachineOperand &Root,
uint64_t FlatVariant) const;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 799d34e32d272..0c6ee3348e999 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -220,16 +220,23 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::sbase) == -1)
return false;
- assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1));
+ unsigned NumOps = getNumOperandsNoGlue(Load0);
+ if (NumOps != getNumOperandsNoGlue(Load1))
+ return false;
// Check base reg.
if (Load0->getOperand(0) != Load1->getOperand(0))
return false;
+ // Match register offsets, if both register and immediate offsets present.
+ assert(NumOps == 4 || NumOps == 5);
+ if (NumOps == 5 && Load0->getOperand(1) != Load1->getOperand(1))
+ return false;
+
const ConstantSDNode *Load0Offset =
- dyn_cast<ConstantSDNode>(Load0->getOperand(1));
+ dyn_cast<ConstantSDNode>(Load0->getOperand(NumOps - 3));
const ConstantSDNode *Load1Offset =
- dyn_cast<ConstantSDNode>(Load1->getOperand(1));
+ dyn_cast<ConstantSDNode>(Load1->getOperand(NumOps - 3));
if (!Load0Offset || !Load1Offset)
return false;
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 882d13402a192..da4d9448d278f 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -883,6 +883,7 @@ def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformL
def SMRDImm : ComplexPattern<iPTR, 2, "SelectSMRDImm">;
def SMRDImm32 : ComplexPattern<iPTR, 2, "SelectSMRDImm32">;
def SMRDSgpr : ComplexPattern<iPTR, 2, "SelectSMRDSgpr">;
+def SMRDSgprImm : ComplexPattern<iPTR, 3, "SelectSMRDSgprImm">;
def SMRDBufferImm : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm">;
def SMRDBufferImm32 : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm32">;
@@ -903,11 +904,18 @@ multiclass SMRD_Pattern <string Instr, ValueType vt> {
// 3. SGPR offset
def : GCNPat <
- (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
- (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0))
+ (smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $soffset, 0))
>;
- // 4. No offset
+ // 4. SGPR+IMM offset
+ def : GCNPat <
+ (smrd_load (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)),
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, $offset, 0))> {
+ let OtherPredicates = [isGFX9Plus];
+ }
+
+ // 5. No offset
def : GCNPat <
(vt (smrd_load (i64 SReg_64:$sbase))),
(vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
index 530f42dff2aaf..8ca6f24793a10 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
@@ -1,12 +1,14 @@
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,SI,SICI,SIVI
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,CI,SICI
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,VI,SIVI
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,GFX9
--- |
define amdgpu_kernel void @smrd_imm(i32 addrspace(4)* %const0) { ret void }
define amdgpu_kernel void @smrd_wide() { ret void }
define amdgpu_kernel void @constant_address_positive() { ret void }
define amdgpu_kernel void @smrd_sgpr() { ret void }
+ define amdgpu_kernel void @smrd_sgpr_imm() { ret void }
...
---
@@ -232,3 +234,28 @@ body: |
%5:sgpr(s32) = G_LOAD %4 :: (dereferenceable invariant load (s32), align 4, addrspace 4)
S_ENDPGM 0, implicit %5
...
+
+---
+
+# Test a load with a (register + immediate) offset.
+# GCN-LABEL: name: smrd_sgpr_imm{{$}}
+# GFX9-DAG: %[[BASE:.*]]:sreg_64 = COPY $sgpr0_sgpr1
+# GFX9-DAG: %[[OFFSET:.*]]:sreg_32 = COPY $sgpr2
+# GFX9: S_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[OFFSET]], 16,
+
+name: smrd_sgpr_imm
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2
+ %0:sgpr(p4) = COPY $sgpr0_sgpr1
+ %1:sgpr(s32) = COPY $sgpr2
+ %2:sgpr(s64) = G_ZEXT %1:sgpr(s32)
+ %4:sgpr(p4) = G_PTR_ADD %0, %2
+ %5:sgpr(s64) = G_CONSTANT i64 16
+ %6:sgpr(p4) = G_PTR_ADD %4, %5
+ %7:sgpr(s32) = G_LOAD %6 :: (dereferenceable invariant load (s32), align 4, addrspace 4)
+ S_ENDPGM 0, implicit %7
+...
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
index 232c88eb19c03..e98d6d0fcaf35 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -global-isel=0 -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefixes=GCN,SDAG %s
-; RUN: llc -march=amdgcn -global-isel=1 -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefixes=GCN,GISEL %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -global-isel=0 -verify-machineinstrs -stop-after=finalize-isel -o - %s | FileCheck -check-prefixes=GCN,SDAG %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -global-isel=1 -verify-machineinstrs -stop-after=finalize-isel -o - %s | FileCheck -check-prefixes=GCN,GISEL %s
@0 = external dso_local addrspace(4) constant [4 x <2 x float>]
@1 = external dso_local addrspace(4) constant i32
@@ -7,10 +7,9 @@
; Test that DAG->DAG ISel is able to pick up the S_LOAD_DWORDX4_SGPR instruction that fetches the offset
; from a register.
; GCN-LABEL: name: test_load_zext
-; SDAG: %[[OFFSET:[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @DescriptorBuffer
+; GCN: %[[OFFSET:[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @DescriptorBuffer
; SDAG: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR killed %{{[0-9]+}}, killed %[[OFFSET]], 0 :: (invariant load (s128) from %ir.13, addrspace 4)
-; GISEL: $[[OFFSET:.*]] = S_MOV_B32 target-flags(amdgpu-abs32-lo) @DescriptorBuffer
-; GISEL: S_LOAD_DWORDX4_SGPR killed renamable {{.*}}, killed renamable $[[OFFSET]], 0 :: (invariant load (<4 x s32>) from {{.*}}, addrspace 4)
+; GISEL: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR %{{[0-9]+}}, %[[OFFSET]], 0 :: (invariant load (<4 x s32>) from {{.*}}, addrspace 4)
define amdgpu_cs void @test_load_zext(i32 inreg %0, i32 inreg %1, i32 inreg %resNode0, i32 inreg %resNode1, <3 x i32> inreg %2, i32 inreg %3, <3 x i32> %4) local_unnamed_addr #2 {
.entry:
%5 = call i64 @llvm.amdgcn.s.getpc() #3
@@ -31,14 +30,10 @@ define amdgpu_cs void @test_load_zext(i32 inreg %0, i32 inreg %1, i32 inreg %res
; Make sure we match constant bases with register offests, in which case
; the base may be the RHS operand of the load in SDAG.
; GCN-LABEL: name: test_complex_reg_offset
-; SDAG-DAG: %[[BASE:.*]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @0 + 4,
-; SDAG-DAG: %[[OFFSET:.*]]:sreg_32 = S_LSHL_B32
+; GCN-DAG: %[[BASE:.*]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @0 + 4,
+; GCN-DAG: %[[OFFSET:.*]]:sreg_32 = S_LSHL_B32
; SDAG: S_LOAD_DWORD_SGPR killed %[[BASE]], killed %[[OFFSET]],
-; GISEL-DAG: $[[BASE0:.*]] = S_ADD_U32 internal $sgpr0, target-flags(amdgpu-rel32-lo) @0 + 4,
-; GISEL-DAG: $[[BASE1:.*]] = S_ADDC_U32 internal $sgpr1, target-flags(amdgpu-rel32-hi) @0 + 12,
-; GISEL-DAG: $[[OFFSET:.*]] = S_LSHL_B32
-; GISEL-NOT: [[OFFSET]] =
-; GISEL: S_LOAD_DWORD_SGPR killed renamable $[[BASE0]]_[[BASE1]], killed renamable $[[OFFSET]],
+; GISEL: S_LOAD_DWORD_SGPR %[[BASE]], %[[OFFSET]],
define amdgpu_ps void @test_complex_reg_offset(float addrspace(1)* %out) {
%i = load i32, i32 addrspace(4)* @1
%i1 = and i32 %i, 3
@@ -49,6 +44,50 @@ define amdgpu_ps void @test_complex_reg_offset(float addrspace(1)* %out) {
ret void
}
+; GCN-LABEL: name: test_sgpr_plus_imm_offset
+; SDAG-DAG: %[[BASE0:.*]]:sgpr_32 = COPY $sgpr0
+; SDAG-DAG: %[[BASE1:.*]]:sgpr_32 = COPY $sgpr1
+; SDAG-DAG: %[[OFFSET:.*]]:sgpr_32 = COPY $sgpr2
+; SDAG-DAG: %[[BASE:.*]]:sgpr_64 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1
+; SDAG: S_LOAD_DWORD_SGPR_IMM killed %[[BASE]], %[[OFFSET]], 16,
+; GISEL-DAG: %[[BASE0:.*]]:sreg_32 = COPY $sgpr0
+; GISEL-DAG: %[[BASE1:.*]]:sreg_32 = COPY $sgpr1
+; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = COPY $sgpr2
+; GISEL-DAG: %[[BASE:.*]]:sreg_64 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1
+; GISEL: S_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[OFFSET]], 16,
+define amdgpu_ps void @test_sgpr_plus_imm_offset(i8 addrspace(4)* inreg %base, i32 inreg %offset,
+ i32 addrspace(1)* inreg %out) {
+ %v1 = getelementptr i8, i8 addrspace(4)* %base, i64 16
+ %v2 = zext i32 %offset to i64
+ %v3 = getelementptr i8, i8 addrspace(4)* %v1, i64 %v2
+ %v4 = bitcast i8 addrspace(4)* %v3 to i32 addrspace(4)*
+ %v5 = load i32, i32 addrspace(4)* %v4, align 4
+ store i32 %v5, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: name: test_sgpr_plus_imm_offset_x2
+; SDAG-DAG: %[[BASE0:.*]]:sgpr_32 = COPY $sgpr0
+; SDAG-DAG: %[[BASE1:.*]]:sgpr_32 = COPY $sgpr1
+; SDAG-DAG: %[[OFFSET:.*]]:sgpr_32 = COPY $sgpr2
+; SDAG-DAG: %[[BASE:.*]]:sgpr_64 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1
+; SDAG: S_LOAD_DWORDX2_SGPR_IMM killed %[[BASE]], %[[OFFSET]], 16,
+; GISEL-DAG: %[[BASE0:.*]]:sreg_32 = COPY $sgpr0
+; GISEL-DAG: %[[BASE1:.*]]:sreg_32 = COPY $sgpr1
+; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = COPY $sgpr2
+; GISEL-DAG: %[[BASE:.*]]:sreg_64 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1
+; GISEL: S_LOAD_DWORDX2_SGPR_IMM %[[BASE]], %[[OFFSET]], 16,
+define amdgpu_ps void @test_sgpr_plus_imm_offset_x2(i8 addrspace(4)* inreg %base, i32 inreg %offset,
+ <2 x i32> addrspace(1)* inreg %out) {
+ %v1 = getelementptr i8, i8 addrspace(4)* %base, i64 16
+ %v2 = zext i32 %offset to i64
+ %v3 = getelementptr i8, i8 addrspace(4)* %v1, i64 %v2
+ %v4 = bitcast i8 addrspace(4)* %v3 to <2 x i32> addrspace(4)*
+ %v5 = load <2 x i32>, <2 x i32> addrspace(4)* %v4, align 4
+ store <2 x i32> %v5, <2 x i32> addrspace(1)* %out, align 4
+ ret void
+}
+
declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32 immarg) #1
; Function Attrs: nounwind readnone speculatable
More information about the llvm-commits
mailing list