[llvm] r324353 - AMDGPU: Fix S_BUFFER_LOAD_DWORD_SGPR moveToVALU

Hans Wennborg via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 19 05:57:26 PST 2018


Merged to 6.0 in r325497.

On Tue, Feb 6, 2018 at 4:17 PM, Marek Olsak via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
> Author: mareko
> Date: Tue Feb  6 07:17:55 2018
> New Revision: 324353
>
> URL: http://llvm.org/viewvc/llvm-project?rev=324353&view=rev
> Log:
> AMDGPU: Fix S_BUFFER_LOAD_DWORD_SGPR moveToVALU
>
> Author: Bas Nieuwenhuizen
>
> https://reviews.llvm.org/D42881
>
> Modified:
>     llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
>     llvm/trunk/test/CodeGen/AMDGPU/smrd.ll
>
> Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=324353&r1=324352&r2=324353&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Tue Feb  6 07:17:55 2018
> @@ -3797,7 +3797,8 @@ void SIInstrInfo::moveToVALU(MachineInst
>          }
>        }
>
> -      BuildMI(*MBB, Inst, Inst.getDebugLoc(),
> +      MachineInstr *NewInstr =
> +        BuildMI(*MBB, Inst, Inst.getDebugLoc(),
>                get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), VDst)
>          .add(*VAddr) // vaddr
>          .add(*getNamedOperand(Inst, AMDGPU::OpName::sbase)) // srsrc
> @@ -3806,12 +3807,17 @@ void SIInstrInfo::moveToVALU(MachineInst
>          .addImm(getNamedOperand(Inst, AMDGPU::OpName::glc)->getImm())
>          .addImm(0) // slc
>          .addImm(0) // tfe
> -        .setMemRefs(Inst.memoperands_begin(), Inst.memoperands_end());
> +        .setMemRefs(Inst.memoperands_begin(), Inst.memoperands_end())
> +        .getInstr();
>
>        MRI.replaceRegWith(getNamedOperand(Inst, AMDGPU::OpName::sdst)->getReg(),
>                           VDst);
>        addUsersToMoveToVALUWorklist(VDst, MRI, Worklist);
>        Inst.eraseFromParent();
> +
> +      // Legalize all operands other than the offset. Notably, convert the srsrc
> +      // into SGPRs using v_readfirstlane if needed.
> +      legalizeOperands(*NewInstr);
>        continue;
>      }
>      }
>
> Modified: llvm/trunk/test/CodeGen/AMDGPU/smrd.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/smrd.ll?rev=324353&r1=324352&r2=324353&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/smrd.ll (original)
> +++ llvm/trunk/test/CodeGen/AMDGPU/smrd.ll Tue Feb  6 07:17:55 2018
> @@ -261,8 +261,42 @@ main_body:
>    ret void
>  }
>
> +; GCN-LABEL: {{^}}smrd_sgpr_descriptor_promoted
> +; GCN: v_readfirstlane
> +define amdgpu_cs void @smrd_sgpr_descriptor_promoted([0 x i8] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), i32) #0 {
> +main_body:
> +  %descptr = bitcast [0 x i8] addrspace(2)* %0 to <4 x i32> addrspace(2)*, !amdgpu.uniform !0
> +  br label %.outer_loop_header
> +
> +ret_block:                                       ; preds = %.outer, %.label22, %main_body
> +  ret void
> +
> +.outer_loop_header:
> +  br label %.inner_loop_header
> +
> +.inner_loop_header:                                     ; preds = %.inner_loop_body, %.outer_loop_header
> +  %loopctr.1 = phi i32 [ 0, %.outer_loop_header ], [ %loopctr.2, %.inner_loop_body ]
> +  %loopctr.2 = add i32 %loopctr.1, 1
> +  %inner_br1 = icmp slt i32 %loopctr.2, 10
> +  br i1 %inner_br1, label %.inner_loop_body, label %ret_block
> +
> +.inner_loop_body:
> +  %descriptor = load <4 x i32>, <4 x i32> addrspace(2)* %descptr, align 16, !invariant.load !0
> +  %load1result = call float @llvm.SI.load.const.v4i32(<4 x i32> %descriptor, i32 0)
> +  %inner_br2 = icmp uge i32 %1, 10
> +  br i1 %inner_br2, label %.inner_loop_header, label %.outer_loop_body
> +
> +.outer_loop_body:
> +  %offset = shl i32 %loopctr.2, 6
> +  %load2result = call float @llvm.SI.load.const.v4i32(<4 x i32> %descriptor, i32 %offset)
> +  %outer_br = fcmp ueq float %load2result, 0x0
> +  br i1 %outer_br, label %.outer_loop_header, label %ret_block
> +}
> +
>  declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
>  declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
>
>  attributes #0 = { nounwind }
>  attributes #1 = { nounwind readnone }
> +
> +!0 = !{}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


More information about the llvm-commits mailing list