[PATCH] R600/SI: Consider adjacent offsets in getLdStBaseRegImmOfs

Tue Jul 29 17:20:21 PDT 2014

On Tue, Jul 29, 2014 at 10:37:50PM +0000, Matt Arsenault wrote:
> Use size of data0 operand if it's a write2 instead of the first, non-result operand to get the element size
> 
> http://reviews.llvm.org/D4711
> 
> Files:
>   lib/Target/R600/SIInstrInfo.cpp
> 
> Index: lib/Target/R600/SIInstrInfo.cpp
> ===================================================================
> --- lib/Target/R600/SIInstrInfo.cpp
> +++ lib/Target/R600/SIInstrInfo.cpp
> @@ -37,25 +37,52 @@
>                                         const TargetRegisterInfo *TRI) const {
>    unsigned Opc = LdSt->getOpcode();
>    if (isDS(Opc)) {
> -
>      const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
>                                                        AMDGPU::OpName::offset);
> +    if (OffsetImm) {
> +      // Normal, single offset LDS instruction.
> +      const MachineOperand *AddrReg = getNamedOperand(*LdSt,
> +                                                      AMDGPU::OpName::addr);
> +
> +      BaseReg = AddrReg->getReg();
> +      Offset = OffsetImm->getImm();
> +      return true;
> +    }
>  
> -    if (!OffsetImm) {
> -      // The 2 offset instructions use offset0 and offset1 instead. This
> -      // function only handles simple instructions with only a single offset, so
> -      // we ignore them.
> +    // The 2 offset instructions use offset0 and offset1 instead. We can treat
> +    // these as a load with a single offset if the 2 offsets are consecutive. We
> +    // will use this for some partially aligned loads.
> +    const MachineOperand *Offset0Imm = getNamedOperand(*LdSt,
> +                                                       AMDGPU::OpName::offset0);
> +    const MachineOperand *Offset1Imm = getNamedOperand(*LdSt,
> +                                                       AMDGPU::OpName::offset1);
> +
> +    uint8_t Offset0 = Offset0Imm->getImm();
> +    uint8_t Offset1 = Offset1Imm->getImm();
> +    assert(Offset1 > Offset0);
> +
> +    if (Offset1 - Offset0 == 1) {
> +      // Each of these offsets is in element sized units, so we need to convert
> +      // to bytes of the individual reads.

I think the offsets are always byte offsets.  Have you tested this
with piglit?

-Tom

> +
> +      unsigned EltSize;
> +      if (LdSt->mayLoad())
> +        EltSize = getOpRegClass(*LdSt, 0)->getSize() / 2;
> +      else {
> +        assert(LdSt->mayStore());
> +        int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
> +        EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize();
> +      }
>  
> -      // TODO: Handle consecutive offsets as a single load.
> -      return false;
> -    }
>  
> -    const MachineOperand *AddrReg = getNamedOperand(*LdSt,
> -                                                    AMDGPU::OpName::addr);
> +      const MachineOperand *AddrReg = getNamedOperand(*LdSt,
> +                                                      AMDGPU::OpName::addr);
> +      BaseReg = AddrReg->getReg();
> +      Offset = EltSize * Offset0;
> +      return true;
> +    }
>  
> -    BaseReg = AddrReg->getReg();
> -    Offset = OffsetImm->getImm();
> -    return true;
> +    return false;
>    }
>  
>    if (isMUBUF(Opc) || isMTBUF(Opc)) {

> Index: lib/Target/R600/SIInstrInfo.cpp
> ===================================================================
> --- lib/Target/R600/SIInstrInfo.cpp
> +++ lib/Target/R600/SIInstrInfo.cpp
> @@ -37,25 +37,52 @@
>                                         const TargetRegisterInfo *TRI) const {
>    unsigned Opc = LdSt->getOpcode();
>    if (isDS(Opc)) {
> -
>      const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
>                                                        AMDGPU::OpName::offset);
> +    if (OffsetImm) {
> +      // Normal, single offset LDS instruction.
> +      const MachineOperand *AddrReg = getNamedOperand(*LdSt,
> +                                                      AMDGPU::OpName::addr);
> +
> +      BaseReg = AddrReg->getReg();
> +      Offset = OffsetImm->getImm();
> +      return true;
> +    }
>  
> -    if (!OffsetImm) {
> -      // The 2 offset instructions use offset0 and offset1 instead. This
> -      // function only handles simple instructions with only a single offset, so
> -      // we ignore them.
> +    // The 2 offset instructions use offset0 and offset1 instead. We can treat
> +    // these as a load with a single offset if the 2 offsets are consecutive. We
> +    // will use this for some partially aligned loads.
> +    const MachineOperand *Offset0Imm = getNamedOperand(*LdSt,
> +                                                       AMDGPU::OpName::offset0);
> +    const MachineOperand *Offset1Imm = getNamedOperand(*LdSt,
> +                                                       AMDGPU::OpName::offset1);
> +
> +    uint8_t Offset0 = Offset0Imm->getImm();
> +    uint8_t Offset1 = Offset1Imm->getImm();
> +    assert(Offset1 > Offset0);
> +
> +    if (Offset1 - Offset0 == 1) {
> +      // Each of these offsets is in element sized units, so we need to convert
> +      // to bytes of the individual reads.
> +
> +      unsigned EltSize;
> +      if (LdSt->mayLoad())
> +        EltSize = getOpRegClass(*LdSt, 0)->getSize() / 2;
> +      else {
> +        assert(LdSt->mayStore());
> +        int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
> +        EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize();
> +      }
>  
> -      // TODO: Handle consecutive offsets as a single load.
> -      return false;
> -    }
>  
> -    const MachineOperand *AddrReg = getNamedOperand(*LdSt,
> -                                                    AMDGPU::OpName::addr);
> +      const MachineOperand *AddrReg = getNamedOperand(*LdSt,
> +                                                      AMDGPU::OpName::addr);
> +      BaseReg = AddrReg->getReg();
> +      Offset = EltSize * Offset0;
> +      return true;
> +    }
>  
> -    BaseReg = AddrReg->getReg();
> -    Offset = OffsetImm->getImm();
> -    return true;
> +    return false;
>    }
>  
>    if (isMUBUF(Opc) || isMTBUF(Opc)) {

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits