[PATCH] R600/SI: Consider adjacent offsets in getLdStBaseRegImmOfs
Tom Stellard
tom at stellard.net
Tue Jul 29 17:20:21 PDT 2014
On Tue, Jul 29, 2014 at 10:37:50PM +0000, Matt Arsenault wrote:
> Use size of data0 operand if it's a write2 instead of the first, non-result operand to get the element size
>
> http://reviews.llvm.org/D4711
>
> Files:
> lib/Target/R600/SIInstrInfo.cpp
>
> Index: lib/Target/R600/SIInstrInfo.cpp
> ===================================================================
> --- lib/Target/R600/SIInstrInfo.cpp
> +++ lib/Target/R600/SIInstrInfo.cpp
> @@ -37,25 +37,52 @@
> const TargetRegisterInfo *TRI) const {
> unsigned Opc = LdSt->getOpcode();
> if (isDS(Opc)) {
> -
> const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
> AMDGPU::OpName::offset);
> + if (OffsetImm) {
> + // Normal, single offset LDS instruction.
> + const MachineOperand *AddrReg = getNamedOperand(*LdSt,
> + AMDGPU::OpName::addr);
> +
> + BaseReg = AddrReg->getReg();
> + Offset = OffsetImm->getImm();
> + return true;
> + }
>
> - if (!OffsetImm) {
> - // The 2 offset instructions use offset0 and offset1 instead. This
> - // function only handles simple instructions with only a single offset, so
> - // we ignore them.
> + // The 2 offset instructions use offset0 and offset1 instead. We can treat
> + // these as a load with a single offset if the 2 offsets are consecutive. We
> + // will use this for some partially aligned loads.
> + const MachineOperand *Offset0Imm = getNamedOperand(*LdSt,
> + AMDGPU::OpName::offset0);
> + const MachineOperand *Offset1Imm = getNamedOperand(*LdSt,
> + AMDGPU::OpName::offset1);
> +
> + uint8_t Offset0 = Offset0Imm->getImm();
> + uint8_t Offset1 = Offset1Imm->getImm();
> + assert(Offset1 > Offset0);
> +
> + if (Offset1 - Offset0 == 1) {
> + // Each of these offsets is in element sized units, so we need to convert
> + // to bytes of the individual reads.
I think the offsets are always byte offsets. Have you tested this
with piglit?
-Tom
> +
> + unsigned EltSize;
> + if (LdSt->mayLoad())
> + EltSize = getOpRegClass(*LdSt, 0)->getSize() / 2;
> + else {
> + assert(LdSt->mayStore());
> + int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
> + EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize();
> + }
>
> - // TODO: Handle consecutive offsets as a single load.
> - return false;
> - }
>
> - const MachineOperand *AddrReg = getNamedOperand(*LdSt,
> - AMDGPU::OpName::addr);
> + const MachineOperand *AddrReg = getNamedOperand(*LdSt,
> + AMDGPU::OpName::addr);
> + BaseReg = AddrReg->getReg();
> + Offset = EltSize * Offset0;
> + return true;
> + }
>
> - BaseReg = AddrReg->getReg();
> - Offset = OffsetImm->getImm();
> - return true;
> + return false;
> }
>
> if (isMUBUF(Opc) || isMTBUF(Opc)) {
> Index: lib/Target/R600/SIInstrInfo.cpp
> ===================================================================
> --- lib/Target/R600/SIInstrInfo.cpp
> +++ lib/Target/R600/SIInstrInfo.cpp
> @@ -37,25 +37,52 @@
> const TargetRegisterInfo *TRI) const {
> unsigned Opc = LdSt->getOpcode();
> if (isDS(Opc)) {
> -
> const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
> AMDGPU::OpName::offset);
> + if (OffsetImm) {
> + // Normal, single offset LDS instruction.
> + const MachineOperand *AddrReg = getNamedOperand(*LdSt,
> + AMDGPU::OpName::addr);
> +
> + BaseReg = AddrReg->getReg();
> + Offset = OffsetImm->getImm();
> + return true;
> + }
>
> - if (!OffsetImm) {
> - // The 2 offset instructions use offset0 and offset1 instead. This
> - // function only handles simple instructions with only a single offset, so
> - // we ignore them.
> + // The 2 offset instructions use offset0 and offset1 instead. We can treat
> + // these as a load with a single offset if the 2 offsets are consecutive. We
> + // will use this for some partially aligned loads.
> + const MachineOperand *Offset0Imm = getNamedOperand(*LdSt,
> + AMDGPU::OpName::offset0);
> + const MachineOperand *Offset1Imm = getNamedOperand(*LdSt,
> + AMDGPU::OpName::offset1);
> +
> + uint8_t Offset0 = Offset0Imm->getImm();
> + uint8_t Offset1 = Offset1Imm->getImm();
> + assert(Offset1 > Offset0);
> +
> + if (Offset1 - Offset0 == 1) {
> + // Each of these offsets is in element sized units, so we need to convert
> + // to bytes of the individual reads.
> +
> + unsigned EltSize;
> + if (LdSt->mayLoad())
> + EltSize = getOpRegClass(*LdSt, 0)->getSize() / 2;
> + else {
> + assert(LdSt->mayStore());
> + int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
> + EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize();
> + }
>
> - // TODO: Handle consecutive offsets as a single load.
> - return false;
> - }
>
> - const MachineOperand *AddrReg = getNamedOperand(*LdSt,
> - AMDGPU::OpName::addr);
> + const MachineOperand *AddrReg = getNamedOperand(*LdSt,
> + AMDGPU::OpName::addr);
> + BaseReg = AddrReg->getReg();
> + Offset = EltSize * Offset0;
> + return true;
> + }
>
> - BaseReg = AddrReg->getReg();
> - Offset = OffsetImm->getImm();
> - return true;
> + return false;
> }
>
> if (isMUBUF(Opc) || isMTBUF(Opc)) {
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list