[PATCH] R600/SI: Adjust buffer_load opcodes
Tom Stellard
tom at stellard.net
Wed Apr 17 20:40:56 PDT 2013
On Tue, Apr 16, 2013 at 10:55:24PM +0200, Christian König wrote:
> From: Christian König <christian.koenig at amd.com>
>
> Only try to load the used vector components.
>
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
> lib/Target/R600/SIISelLowering.cpp | 72 ++++++++++++++++++++++++----
> lib/Target/R600/SIISelLowering.h | 1 +
> lib/Target/R600/SIInstructions.td | 6 +--
> test/CodeGen/R600/llvm.SI.vs.load.input.ll | 25 ++++++++++
> 4 files changed, 91 insertions(+), 13 deletions(-)
> create mode 100644 test/CodeGen/R600/llvm.SI.vs.load.input.ll
>
> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> index 0147464..859fcc6 100644
> --- a/lib/Target/R600/SIISelLowering.cpp
> +++ b/lib/Target/R600/SIISelLowering.cpp
> @@ -706,7 +706,7 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
> }
>
> /// \brief Helper function for adjustWritemask
> -unsigned SubIdx2Lane(unsigned Idx) {
> +static unsigned subIdx2Lane(unsigned Idx) {
> switch (Idx) {
> default: return 0;
> case AMDGPU::sub0: return 0;
> @@ -716,11 +716,11 @@ unsigned SubIdx2Lane(unsigned Idx) {
> }
> }
>
> -/// \brief Adjust the writemask of MIMG instructions
> -void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
> - SelectionDAG &DAG) const {
> - SDNode *Users[4] = { };
> - unsigned Writemask = 0, Lane = 0;
> +/// \brief Get the sub components used in a nodes result
> +static unsigned getUsedComponents(SDNode *Node, SDNode **Users,
> + unsigned &Lane) {
> +
> + unsigned Writemask = 0;
>
> // Try to figure out the used register components
> for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
> @@ -729,18 +729,29 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
> // Abort if we can't understand the usage
> if (!I->isMachineOpcode() ||
> I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
> - return;
> + return 0xf;
>
> - Lane = SubIdx2Lane(I->getConstantOperandVal(1));
> + Lane = subIdx2Lane(I->getConstantOperandVal(1));
>
> // Abort if we have more than one user per component
> if (Users[Lane])
> - return;
> + return 0xf;
>
> Users[Lane] = *I;
> Writemask |= 1 << Lane;
> }
>
> + return Writemask;
> +}
> +
> +/// \brief Adjust the writemask of MIMG instructions
> +void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
> + SelectionDAG &DAG) const {
> + unsigned Writemask = 0, Lane = 0;
> + SDNode *Users[4] = { };
> +
> + Writemask = getUsedComponents(Node, Users, Lane);
> +
> // Abort if all components are used
> if (Writemask == 0xf)
> return;
> @@ -754,6 +765,7 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
>
> // If we only got one lane, replace it with a copy
> if (Writemask == (1U << Lane)) {
> +
> SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32);
> SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
> DebugLoc(), MVT::f32,
> @@ -781,13 +793,53 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
> }
> }
>
> +void SITargetLowering::adjustBufferLoad(MachineSDNode *&Node,
> + SelectionDAG &DAG) const {
> + unsigned Writemask = 0, Lane = 0;
> + SDNode *Users[4] = { };
> +
> + Writemask = getUsedComponents(Node, Users, Lane);
> +
> + unsigned Opcode;
> + if (Writemask >= 0x8)
> + return;
> + else if (Writemask >= 0x4)
> + Opcode = AMDGPU::BUFFER_LOAD_FORMAT_XYZ;
> + else if (Writemask >= 0x2)
> + Opcode = AMDGPU::BUFFER_LOAD_FORMAT_XY;
> + else if (Writemask == 0x1) {
> + Opcode = AMDGPU::BUFFER_LOAD_FORMAT_X;
> +
> + // If only X is used replace it with a copy
> + SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32);
> + SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
> + DebugLoc(), MVT::f32,
> + SDValue(Node, 0), RC);
> + DAG.ReplaceAllUsesWith(Users[0], Copy);
> + } else
> + llvm_unreachable("Unused buffer load not optimized away!");
> +
> + // Create a node with the new opcode
> + std::vector<SDValue> Ops;
> + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
> + Ops.push_back(Node->getOperand(i));
> +
> + Node = DAG.getMachineNode(Opcode, Node->getDebugLoc(),
> + Node->getValueType(0), Ops.data(),
> + Ops.size());
> +}
> +
> /// \brief Fold the instructions after slecting them
> SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
> SelectionDAG &DAG) const {
>
> - if (AMDGPU::isMIMG(Node->getMachineOpcode()) != -1)
> + unsigned Opcode = Node->getMachineOpcode();
> + if (AMDGPU::isMIMG(Opcode) != -1)
> adjustWritemask(Node, DAG);
>
> + if (AMDGPU::BUFFER_LOAD_FORMAT_XYZW == Opcode)
> + adjustBufferLoad(Node, DAG);
> +
> return foldOperands(Node, DAG);
> }
>
> diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
> index de637be..80859d5 100644
> --- a/lib/Target/R600/SIISelLowering.h
> +++ b/lib/Target/R600/SIISelLowering.h
> @@ -36,6 +36,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
>
> SDNode *foldOperands(MachineSDNode *N, SelectionDAG &DAG) const;
> void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
> + void adjustBufferLoad(MachineSDNode *&Node, SelectionDAG &DAG) const;
>
> public:
> SITargetLowering(TargetMachine &tm);
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index e481ef9..01a1ddd 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -391,9 +391,9 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
>
> } // End isCompare = 1
>
> -//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
> -//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>;
> -//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>;
> +def BUFFER_LOAD_FORMAT_X : MUBUF_Load_Helper <0x00000000, "BUFFER_LOAD_FORMAT_X", VReg_32>;
> +def BUFFER_LOAD_FORMAT_XY : MUBUF_Load_Helper <0x00000001, "BUFFER_LOAD_FORMAT_XY", VReg_64>;
> +def BUFFER_LOAD_FORMAT_XYZ : MUBUF_Load_Helper <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", VReg_96>;
> def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT_XYZW", VReg_128>;
> //def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "BUFFER_STORE_FORMAT_X", []>;
> //def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "BUFFER_STORE_FORMAT_XY", []>;
> diff --git a/test/CodeGen/R600/llvm.SI.vs.load.input.ll b/test/CodeGen/R600/llvm.SI.vs.load.input.ll
> new file mode 100644
> index 0000000..905794d
> --- /dev/null
> +++ b/test/CodeGen/R600/llvm.SI.vs.load.input.ll
> @@ -0,0 +1,25 @@
> +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
> +
> +;CHECK: BUFFER_LOAD_FORMAT_XYZW {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}
> +;CHECK: BUFFER_LOAD_FORMAT_XYZ {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}
> +;CHECK: BUFFER_LOAD_FORMAT_XY {{VGPR[0-9]+_VGPR[0-9]+}}
> +;CHECK: BUFFER_LOAD_FORMAT_X {{VGPR[0-9]+}}
> +
> +define void @main(<16 x i8> addrspace(2)* inreg %rp, i32 %i0, i32 %i1, i32 %i2, i32 %i3) {
> +main_body:
> + %r = load <16 x i8> addrspace(2)* %rp
> + %v0 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %r, i32 0, i32 %i0)
> + %e0 = extractelement <4 x float> %v0, i32 0
> + %v1 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %r, i32 0, i32 %i1)
> + %e1 = extractelement <4 x float> %v1, i32 1
> + %v2 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %r, i32 0, i32 %i2)
> + %e2 = extractelement <4 x float> %v2, i32 2
> + %v3 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %r, i32 0, i32 %i3)
> + %e3 = extractelement <4 x float> %v3, i32 3
> + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %e0, float %e1, float %e2, float %e3)
> + ret void
> +}
> +
> +declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) nounwind readnone
> +
> +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
> --
> 1.7.10.4
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list