[PATCH] R600/SI: Fix indirect addressing with a negative constant offset

Matt Arsenault Matthew.Arsenault at amd.com
Fri Mar 20 10:51:19 PDT 2015


On 03/19/2015 06:35 PM, Tom Stellard wrote:
> When the base register index of the vector plus the constant offset
> was less than zero, we were passing the wrong base register to the indirect
> addressing instruction.
>
> In this case, we need to set the base register to v0 and then add
> the computed (negative) index to m0.
> ---
>   lib/Target/R600/SILowerControlFlow.cpp      | 71 ++++++++++++++++++++++-------
>   test/CodeGen/R600/indirect-addressing-si.ll | 57 +++++++++++++++++++++++
>   2 files changed, 112 insertions(+), 16 deletions(-)
>
> diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
> index 2e08c9f..c319b32 100644
> --- a/lib/Target/R600/SILowerControlFlow.cpp
> +++ b/lib/Target/R600/SILowerControlFlow.cpp
> @@ -88,7 +88,8 @@ private:
>     void Kill(MachineInstr &MI);
>     void Branch(MachineInstr &MI);
>   
> -  void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
> +  void LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset = 0);
> +  void computeIndirectRegAndOffset(unsigned VecReg, unsigned &Reg, int &Offset);
>     void IndirectSrc(MachineInstr &MI);
>     void IndirectDst(MachineInstr &MI);
>   
> @@ -323,7 +324,7 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
>     MI.eraseFromParent();
>   }
>   
> -void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
> +void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) {
>   
>     MachineBasicBlock &MBB = *MI.getParent();
>     DebugLoc DL = MI.getDebugLoc();
> @@ -333,8 +334,14 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
>     unsigned Idx = MI.getOperand(3).getReg();
>   
>     if (AMDGPU::SReg_32RegClass.contains(Idx)) {
> -    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
> -            .addReg(Idx);
> +    if (Offset) {
> +      BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
> +              .addReg(Idx)
> +              .addImm(Offset);
> +    } else {
> +      BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
> +              .addReg(Idx);
> +    }
>       MBB.insert(I, MovRel);
>     } else {
>   
> @@ -363,6 +370,11 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
>       BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
>               .addReg(AMDGPU::VCC);
>   
> +    if (Offset) {
> +      BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
> +              .addReg(AMDGPU::M0)
> +              .addImm(Offset);
> +    }
>       // Do the actual move
>       MBB.insert(I, MovRel);
>   
> @@ -384,6 +396,33 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
>     MI.eraseFromParent();
>   }
>   
> +/// \param @VecReg The register which holds element zero of the vector
> +///                 being addressed into.
> +/// \param[out] @Reg The base register to use in the indirect addressing instruction.
> +/// \param[in,out] @Offset As an input, this is the constant offset part of the
> +//                         indirect Index. e.g. v0 = v[VecReg + Offset]
> +//                         As an output, this is a constant value that needs
> +//                         to be added to the value stored in M0.
> +void SILowerControlFlowPass::computeIndirectRegAndOffset(unsigned VecReg,
> +                                                         unsigned &Reg,
> +                                                         int &Offset) {
> +  unsigned SubReg = TRI->getSubReg(VecReg, AMDGPU::sub0);
> +  if (!SubReg)
> +    SubReg = VecReg;
> +
> +  const TargetRegisterClass *RC = TRI->getPhysRegClass(SubReg);
> +  int RegIdx = TRI->getHWRegIndex(SubReg) + Offset;
> +
> +  if (RegIdx < 0) {
> +    Offset = RegIdx;
> +    RegIdx = 0;
> +  } else {
> +    Offset = 0;
> +  }
> +
> +  Reg = RC->getRegister(RegIdx);
> +}
> +
>   void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
>   
>     MachineBasicBlock &MBB = *MI.getParent();
> @@ -391,18 +430,18 @@ void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
>   
>     unsigned Dst = MI.getOperand(0).getReg();
>     unsigned Vec = MI.getOperand(2).getReg();
> -  unsigned Off = MI.getOperand(4).getImm();
> -  unsigned SubReg = TRI->getSubReg(Vec, AMDGPU::sub0);
> -  if (!SubReg)
> -    SubReg = Vec;
> +  int Off = MI.getOperand(4).getImm();
> +  unsigned Reg;
> +
> +  computeIndirectRegAndOffset(Vec, Reg, Off);
>   
>     MachineInstr *MovRel =
>       BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
> -            .addReg(SubReg + Off)
> +            .addReg(Reg)
>               .addReg(AMDGPU::M0, RegState::Implicit)
>               .addReg(Vec, RegState::Implicit);
>   
> -  LoadM0(MI, MovRel);
> +  LoadM0(MI, MovRel, Off);
>   }
>   
>   void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
> @@ -411,20 +450,20 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
>     DebugLoc DL = MI.getDebugLoc();
>   
>     unsigned Dst = MI.getOperand(0).getReg();
> -  unsigned Off = MI.getOperand(4).getImm();
> +  int Off = MI.getOperand(4).getImm();
>     unsigned Val = MI.getOperand(5).getReg();
> -  unsigned SubReg = TRI->getSubReg(Dst, AMDGPU::sub0);
> -  if (!SubReg)
> -    SubReg = Dst;
> +  unsigned Reg;
> +
> +  computeIndirectRegAndOffset(Dst, Reg, Off);
>   
>     MachineInstr *MovRel =
>       BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
> -            .addReg(SubReg + Off, RegState::Define)
> +            .addReg(Reg, RegState::Define)
>               .addReg(Val)
>               .addReg(AMDGPU::M0, RegState::Implicit)
>               .addReg(Dst, RegState::Implicit);
>   
> -  LoadM0(MI, MovRel);
> +  LoadM0(MI, MovRel, Off);
>   }
>   
>   bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
> diff --git a/test/CodeGen/R600/indirect-addressing-si.ll b/test/CodeGen/R600/indirect-addressing-si.ll
> index 319910f..d6624f2 100644
> --- a/test/CodeGen/R600/indirect-addressing-si.ll
> +++ b/test/CodeGen/R600/indirect-addressing-si.ll
> @@ -25,6 +25,33 @@ entry:
>     ret void
>   }
>   
> +; CHECK-LABEL: {{^}}extract_neg_offset_sgpr:
> +; The offset depends on the register that holds the first element of the vector.
> +; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
> +; CHECK: v_movrels_b32_e32 v{{[0-9]}}, v0
> +define void @extract_neg_offset_sgpr(i32 addrspace(1)* %out, i32 %offset) {
> +entry:
> +  %index = add i32 %offset, -512
> +  %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
> +  store i32 %value, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; CHECK-LABEL: {{^}}extract_neg_offset_vgpr:
> +; The offset depends on the register that holds the first element of the vector.
> +; CHECK: v_readfirstlane_b32
> +; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}}
> +; CHECK-NEXT: v_movrels_b32_e32 v{{[0-9]}}, v0
> +; CHECK: s_cbranch_execnz
> +define void @extract_neg_offset_vgpr(i32 addrspace(1)* %out) {
> +entry:
> +  %id = call i32 @llvm.r600.read.tidig.x() #1
> +  %index = add i32 %id, -512
> +  %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
> +  store i32 %value, i32 addrspace(1)* %out
> +  ret void
> +}
> +
>   ; CHECK-LABEL: {{^}}insert_w_offset:
>   ; CHECK: s_mov_b32 m0
>   ; CHECK-NEXT: v_movreld_b32_e32
> @@ -47,3 +74,33 @@ entry:
>     store float %1, float addrspace(1)* %out
>     ret void
>   }
> +
> +; CHECK-LABEL: {{^}}insert_neg_offset_sgpr:
> +; The offset depends on the register that holds the first element of the vector.
> +; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
> +; CHECK: v_movreld_b32_e32 v0, v{{[0-9]}}
> +define void @insert_neg_offset_sgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, i32 %offset) {
> +entry:
> +  %index = add i32 %offset, -512
> +  %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
> +  store <4 x i32> %value, <4 x i32> addrspace(1)* %out
> +  ret void
> +}
> +
> +; CHECK-LABEL: {{^}}insert_neg_offset_vgpr:
> +; The offset depends on the register that holds the first element of the vector.
> +; CHECK: v_readfirstlane_b32
> +; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}}
> +; CHECK-NEXT: v_movreld_b32_e32 v0, v{{[0-9]}}
> +; CHECK: s_cbranch_execnz
> +define void @insert_neg_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) {
> +entry:
> +  %id = call i32 @llvm.r600.read.tidig.x() #1
> +  %index = add i32 %id, -512
> +  %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
> +  store <4 x i32> %value, <4 x i32> addrspace(1)* %out
> +  ret void
> +}
> +
> +declare i32 @llvm.r600.read.tidig.x() #1
> +attributes #1 = { nounwind readnone }
> -- 2.0.4 _______________________________________________ llvm-commits 
> mailing list llvm-commits at cs.uiuc.edu 
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
LGTM. Tests with negative inline immediates might be useful
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150320/0f2c5df4/attachment.html>


More information about the llvm-commits mailing list