[PATCH] R600/SI: Implement add i64 support
Tom Stellard
tom at stellard.net
Fri Nov 15 10:19:05 PST 2013
Hi Matt,
Overall, these look good, just a few comments. I want to test these
before they are pushed. I will let you know when I am done with this.
-Tom
On Thu, Nov 14, 2013 at 03:52:08PM -0800, Matt Arsenault wrote:
> This set of patches implements i64 add for SI and fixes problems I ran into when working on it. It doesn?t yet enable it since I still need to fix expanding global pointer adds in most cases where it isn?t necessary.
> From 5a652197362274458354fb1110a9894cf123981f Mon Sep 17 00:00:00 2001
> From: Matt Arsenault <Matthew.Arsenault at amd.com>
> Date: Tue, 15 Oct 2013 13:24:45 -0700
> Subject: [PATCH 01/10] R600/SI: Replace uses of SCC with VCC.
>
> When replacing scalar operations with vector,
> the wrong implicit output register was used.
Has this patch been obsoleted by #8 ?
> ---
> lib/Target/R600/SIInstrInfo.cpp | 9 +++++++++
> 1 file changed, 9 insertions(+)
>
> diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
> index 11710b4..0b782d8 100644
> --- a/lib/Target/R600/SIInstrInfo.cpp
> +++ b/lib/Target/R600/SIInstrInfo.cpp
> @@ -450,6 +450,13 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
> }
> legalizeOpWithMove(MI, Src1Idx);
> }
> +
> + // Replace uses of SCC with VCC.
> + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
> + MachineOperand &Op = MI->getOperand(I);
> + if (Op.isReg() && Op.getReg() == AMDGPU::SCC)
> + Op.setReg(AMDGPU::VCC);
> + }
> }
>
> // Legalize VOP3
> @@ -466,6 +473,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
> if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
> continue; // VGPRs are legal
>
> + assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction");
> +
> if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
> SGPRReg = MO.getReg();
> // We can use one SGPR in each VOP3 instruction.
> --
> 1.8.4.1
>
> From 9d95888414300adb50ed1f3b88e57c4f40cf3d9b Mon Sep 17 00:00:00 2001
> From: Matt Arsenault <Matthew.Arsenault at amd.com>
> Date: Thu, 17 Oct 2013 13:21:20 -0700
> Subject: [PATCH 10/10] R600/SI: Fix moveToVALU when the first operand is VSrc.
>
> Moving into a VSrc doesn't always work, since it could be
> replaced with an SGPR later.
> ---
> lib/Target/R600/SIInstrInfo.cpp | 36 ++++++++++++++++++++++++++++++++++--
> lib/Target/R600/SIInstrInfo.h | 6 ++++++
> test/CodeGen/R600/add_i64.ll | 21 ++++++++++-----------
> 3 files changed, 50 insertions(+), 13 deletions(-)
>
> diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
> index 7f23ecf..d29f3ae 100644
> --- a/lib/Target/R600/SIInstrInfo.cpp
> +++ b/lib/Target/R600/SIInstrInfo.cpp
> @@ -386,6 +386,38 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
> }
> }
>
> +unsigned SIInstrInfo::getVRegClassID(unsigned ScalarRegClassID) {
I added a function called SIRegisterInfo::getEquivalentVGPRClass() which
does this exact same thing. I would prefer to use this function,
because it doesn't require such a large switch statement. You will
need to add a case for SCC/VCC to it though.
> + switch (ScalarRegClassID) {
> + case AMDGPU::SCCRegRegClassID:
> + return AMDGPU::VCCRegRegClassID;
> +
> + case AMDGPU::SReg_32RegClassID:
> + case AMDGPU::SSrc_32RegClassID:
> + case AMDGPU::SGPR_32RegClassID:
> + case AMDGPU::VReg_32RegClassID:
> + case AMDGPU::VSrc_32RegClassID:
> + case AMDGPU::VGPR_32RegClassID:
> + return AMDGPU::VReg_32RegClassID;
> +
> + case AMDGPU::SReg_64RegClassID:
> + case AMDGPU::SSrc_64RegClassID:
> + case AMDGPU::SGPR_64RegClassID:
> + case AMDGPU::VReg_64RegClassID:
> + case AMDGPU::VSrc_64RegClassID:
> + return AMDGPU::VReg_64RegClassID;
> +
> + case AMDGPU::SReg_128RegClassID:
> + case AMDGPU::VReg_128RegClassID:
> + return AMDGPU::VReg_128RegClassID;
> +
> + case AMDGPU::SReg_256RegClassID:
> + case AMDGPU::VReg_256RegClassID:
> + return AMDGPU::VReg_256RegClassID;
> + default:
> + llvm_unreachable("Bad SI register class ID");
> + }
> +}
> +
> bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const {
> return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
> }
> @@ -417,7 +449,6 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
> MachineOperand &MO = MI->getOperand(OpIdx);
> MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
> unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass;
> - // XXX - This shouldn't be VSrc
> const TargetRegisterClass *RC = RI.getRegClass(RCID);
> unsigned Opcode = AMDGPU::V_MOV_B32_e32;
> if (MO.isReg()) {
> @@ -426,7 +457,8 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
> Opcode = AMDGPU::S_MOV_B32;
> }
>
> - unsigned Reg = MRI.createVirtualRegister(RI.getRegClass(RCID));
> + unsigned VRCID = getVRegClassID(RCID);
> + unsigned Reg = MRI.createVirtualRegister(RI.getRegClass(VRCID));
> BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode),
> Reg).addOperand(MO);
> MO.ChangeToRegister(Reg, false);
> diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
> index 4af6348..7cb1857 100644
> --- a/lib/Target/R600/SIInstrInfo.h
> +++ b/lib/Target/R600/SIInstrInfo.h
> @@ -69,6 +69,12 @@ public:
>
> bool isSALUInstr(const MachineInstr &MI) const;
> static unsigned getVALUOp(const MachineInstr &MI);
> +
> + /// \brief For a given register ID of scalar register operand, get the
> + /// appropriately sized vector register class to move it into. This is mostly
> + /// to replace VSrc operands with VReg. We don't want to move into a VSrc,
> + /// since that could later be replaced with an SGPR which we don't want.
> + static unsigned getVRegClassID(unsigned ScalarRCID);
> bool isSALUOpSupportedOnVALU(const MachineInstr &MI) const;
>
> /// \brief Return the correct register class for \p OpNo. For target-specific
> diff --git a/test/CodeGen/R600/add_i64.ll b/test/CodeGen/R600/add_i64.ll
> index c02bb52..3ef044a 100644
> --- a/test/CodeGen/R600/add_i64.ll
> +++ b/test/CodeGen/R600/add_i64.ll
> @@ -15,26 +15,25 @@ define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noa
> ret void
> }
>
> -; SI-LABEL: @one_sgpr:
> -define void @one_sgpr(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 addrspace(1)* noalias %in_bar, i64 %a) {
> +; Check that the SGPR add operand is correctly moved to a VGPR.
> +; SI-LABEL: @sgpr_operand:
> +define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 addrspace(1)* noalias %in_bar, i64 %a) {
> %foo = load i64 addrspace(1)* %in, align 8
> %result = add i64 %foo, %a
> store i64 %result, i64 addrspace(1)* %out
> ret void
> }
>
> -; FIXME: This case is broken
> -;
> ; Swap the arguments. Check that the SGPR -> VGPR copy works with the
> ; SGPR as other operand.
> ;
> -; XXXSI-LABEL: @one_sgpr_reversed:
> -; define void @one_sgpr_reversed(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %a) {
> -; %foo = load i64 addrspace(1)* %in, align 8
> -; %result = add i64 %a, %foo
> -; store i64 %result, i64 addrspace(1)* %out
> -; ret void
> -; }
> +; SI-LABEL: @sgpr_operand_reversed:
> +define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %a) {
> + %foo = load i64 addrspace(1)* %in, align 8
> + %result = add i64 %a, %foo
> + store i64 %result, i64 addrspace(1)* %out
> + ret void
> +}
>
>
> ; SI-LABEL: @test_v2i64_sreg:
> --
> 1.8.4.1
>
More information about the llvm-commits
mailing list