[PATCH] R600/SI: Implement add i64 support

Fri Nov 15 10:19:05 PST 2013

Hi Matt,

Overall, these look good, just a few comments.  I want to test these
before they are pushed.  I will let you know when I am done with this.

-Tom

On Thu, Nov 14, 2013 at 03:52:08PM -0800, Matt Arsenault wrote:
> This set of patches implements i64 add for SI and fixes problems I ran into when working on it. It doesn?t yet enable it since I still need to fix expanding global pointer adds in most cases where it isn?t necessary.

> From 5a652197362274458354fb1110a9894cf123981f Mon Sep 17 00:00:00 2001
> From: Matt Arsenault <Matthew.Arsenault at amd.com>
> Date: Tue, 15 Oct 2013 13:24:45 -0700
> Subject: [PATCH 01/10] R600/SI: Replace uses of SCC with VCC.
> 
> When replacing scalar operations with vector,
> the wrong implicit output register was used.

Has this patch been obsoleted by #8 ?

> ---
>  lib/Target/R600/SIInstrInfo.cpp | 9 +++++++++
>  1 file changed, 9 insertions(+)
> 
> diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
> index 11710b4..0b782d8 100644
> --- a/lib/Target/R600/SIInstrInfo.cpp
> +++ b/lib/Target/R600/SIInstrInfo.cpp
> @@ -450,6 +450,13 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
>        }
>        legalizeOpWithMove(MI, Src1Idx);
>      }
> +
> +    // Replace uses of SCC with VCC.
> +    for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
> +      MachineOperand &Op = MI->getOperand(I);
> +      if (Op.isReg() && Op.getReg() == AMDGPU::SCC)
> +        Op.setReg(AMDGPU::VCC);
> +    }
>    }
>  
>    // Legalize VOP3
> @@ -466,6 +473,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
>          if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
>            continue; // VGPRs are legal
>  
> +        assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction");
> +
>          if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
>            SGPRReg = MO.getReg();
>            // We can use one SGPR in each VOP3 instruction.
> -- 
> 1.8.4.1
> 

> From 9d95888414300adb50ed1f3b88e57c4f40cf3d9b Mon Sep 17 00:00:00 2001
> From: Matt Arsenault <Matthew.Arsenault at amd.com>
> Date: Thu, 17 Oct 2013 13:21:20 -0700
> Subject: [PATCH 10/10] R600/SI: Fix moveToVALU when the first operand is VSrc.
> 
> Moving into a VSrc doesn't always work, since it could be
> replaced with an SGPR later.
> ---
>  lib/Target/R600/SIInstrInfo.cpp | 36 ++++++++++++++++++++++++++++++++++--
>  lib/Target/R600/SIInstrInfo.h   |  6 ++++++
>  test/CodeGen/R600/add_i64.ll    | 21 ++++++++++-----------
>  3 files changed, 50 insertions(+), 13 deletions(-)
> 
> diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
> index 7f23ecf..d29f3ae 100644
> --- a/lib/Target/R600/SIInstrInfo.cpp
> +++ b/lib/Target/R600/SIInstrInfo.cpp
> @@ -386,6 +386,38 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
>    }
>  }
>  
> +unsigned SIInstrInfo::getVRegClassID(unsigned ScalarRegClassID) {

I added a function called SIRegisterInfo::getEquivalentVGPRClass() which
does this exact same thing.  I would prefer to use this function,
because it doesn't require such a large switch statement.  You will
need to add a case for SCC/VCC to it though.

> +  switch (ScalarRegClassID) {
> +  case AMDGPU::SCCRegRegClassID:
> +    return AMDGPU::VCCRegRegClassID;
> +
> +  case AMDGPU::SReg_32RegClassID:
> +  case AMDGPU::SSrc_32RegClassID:
> +  case AMDGPU::SGPR_32RegClassID:
> +  case AMDGPU::VReg_32RegClassID:
> +  case AMDGPU::VSrc_32RegClassID:
> +  case AMDGPU::VGPR_32RegClassID:
> +    return AMDGPU::VReg_32RegClassID;
> +
> +  case AMDGPU::SReg_64RegClassID:
> +  case AMDGPU::SSrc_64RegClassID:
> +  case AMDGPU::SGPR_64RegClassID:
> +  case AMDGPU::VReg_64RegClassID:
> +  case AMDGPU::VSrc_64RegClassID:
> +    return AMDGPU::VReg_64RegClassID;
> +
> +  case AMDGPU::SReg_128RegClassID:
> +  case AMDGPU::VReg_128RegClassID:
> +    return AMDGPU::VReg_128RegClassID;
> +
> +  case AMDGPU::SReg_256RegClassID:
> +  case AMDGPU::VReg_256RegClassID:
> +    return AMDGPU::VReg_256RegClassID;
> +  default:
> +    llvm_unreachable("Bad SI register class ID");
> +  }
> +}
> +
>  bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const {
>    return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
>  }
> @@ -417,7 +449,6 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
>    MachineOperand &MO = MI->getOperand(OpIdx);
>    MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
>    unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass;
> -  // XXX - This shouldn't be VSrc
>    const TargetRegisterClass *RC = RI.getRegClass(RCID);
>    unsigned Opcode = AMDGPU::V_MOV_B32_e32;
>    if (MO.isReg()) {
> @@ -426,7 +457,8 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
>      Opcode = AMDGPU::S_MOV_B32;
>    }
>  
> -  unsigned Reg = MRI.createVirtualRegister(RI.getRegClass(RCID));
> +  unsigned VRCID = getVRegClassID(RCID);
> +  unsigned Reg = MRI.createVirtualRegister(RI.getRegClass(VRCID));
>    BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode),
>            Reg).addOperand(MO);
>    MO.ChangeToRegister(Reg, false);
> diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
> index 4af6348..7cb1857 100644
> --- a/lib/Target/R600/SIInstrInfo.h
> +++ b/lib/Target/R600/SIInstrInfo.h
> @@ -69,6 +69,12 @@ public:
>  
>    bool isSALUInstr(const MachineInstr &MI) const;
>    static unsigned getVALUOp(const MachineInstr &MI);
> +
> +  /// \brief For a given register ID of scalar register operand, get the
> +  /// appropriately sized vector register class to move it into. This is mostly
> +  /// to replace VSrc operands with VReg. We don't want to move into a VSrc,
> +  /// since that could later be replaced with an SGPR which we don't want.
> +  static unsigned getVRegClassID(unsigned ScalarRCID);
>    bool isSALUOpSupportedOnVALU(const MachineInstr &MI) const;
>  
>    /// \brief Return the correct register class for \p OpNo.  For target-specific
> diff --git a/test/CodeGen/R600/add_i64.ll b/test/CodeGen/R600/add_i64.ll
> index c02bb52..3ef044a 100644
> --- a/test/CodeGen/R600/add_i64.ll
> +++ b/test/CodeGen/R600/add_i64.ll
> @@ -15,26 +15,25 @@ define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noa
>    ret void
>  }
>  
> -; SI-LABEL: @one_sgpr:
> -define void @one_sgpr(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 addrspace(1)* noalias %in_bar, i64 %a) {
> +; Check that the SGPR add operand is correctly moved to a VGPR.
> +; SI-LABEL: @sgpr_operand:
> +define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 addrspace(1)* noalias %in_bar, i64 %a) {
>    %foo = load i64 addrspace(1)* %in, align 8
>    %result = add i64 %foo, %a
>    store i64 %result, i64 addrspace(1)* %out
>    ret void
>  }
>  
> -; FIXME: This case is broken
> -;
>  ; Swap the arguments. Check that the SGPR -> VGPR copy works with the
>  ; SGPR as other operand.
>  ;
> -; XXXSI-LABEL: @one_sgpr_reversed:
> -; define void @one_sgpr_reversed(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %a) {
> -;   %foo = load i64 addrspace(1)* %in, align 8
> -;   %result = add i64 %a, %foo
> -;   store i64 %result, i64 addrspace(1)* %out
> -;   ret void
> -; }
> +; SI-LABEL: @sgpr_operand_reversed:
> +define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %a) {
> +  %foo = load i64 addrspace(1)* %in, align 8
> +  %result = add i64 %a, %foo
> +  store i64 %result, i64 addrspace(1)* %out
> +  ret void
> +}
>  
>  
>  ; SI-LABEL: @test_v2i64_sreg:
> -- 
> 1.8.4.1
>