[Mesa-dev] [PATCH] R600/SI: fix MIMG writemask adjustement

Tue Oct 22 19:57:32 PDT 2013

Pushed, thanks.

-Tom

On Tue, Oct 22, 2013 at 02:15:01AM +0200, Marek Ol????k wrote:
> From: Marek Ol????k <marek.olsak at amd.com>
> 
> This fixes piglit:
> - shaders/glsl-fs-texture2d-masked
> - shaders/glsl-fs-texture2d-masked-4
> 
> Signed-off-by: Marek Ol????k <marek.olsak at amd.com>
> Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
> ---
>  lib/Target/R600/SIISelLowering.cpp         | 27 +++++++--
>  test/CodeGen/R600/llvm.SI.sample-masked.ll | 93 ++++++++++++++++++++++++++++++
>  2 files changed, 114 insertions(+), 6 deletions(-)
>  create mode 100644 test/CodeGen/R600/llvm.SI.sample-masked.ll
> 
> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> index 2c9270e..bfc9e8d 100644
> --- a/lib/Target/R600/SIISelLowering.cpp
> +++ b/lib/Target/R600/SIISelLowering.cpp
> @@ -1065,7 +1065,9 @@ static unsigned SubIdx2Lane(unsigned Idx) {
>  void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
>                                         SelectionDAG &DAG) const {
>    SDNode *Users[4] = { };
> -  unsigned Writemask = 0, Lane = 0;
> +  unsigned Lane = 0;
> +  unsigned OldDmask = Node->getConstantOperandVal(0);
> +  unsigned NewDmask = 0;
>  
>    // Try to figure out the used register components
>    for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
> @@ -1076,29 +1078,42 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
>          I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
>        return;
>  
> +    // Lane means which subreg of %VGPRa_VGPRb_VGPRc_VGPRd is used.
> +    // Note that subregs are packed, i.e. Lane==0 is the first bit set
> +    // in OldDmask, so it can be any of X,Y,Z,W; Lane==1 is the second bit
> +    // set, etc.
>      Lane = SubIdx2Lane(I->getConstantOperandVal(1));
>  
> +    // Set which texture component corresponds to the lane.
> +    unsigned Comp;
> +    for (unsigned i = 0, Dmask = OldDmask; i <= Lane; i++) {
> +      assert(Dmask);
> +      Comp = ffs(Dmask)-1;
> +      Dmask &= ~(1 << Comp);
> +    }
> +
>      // Abort if we have more than one user per component
>      if (Users[Lane])
>        return;
>  
>      Users[Lane] = *I;
> -    Writemask |= 1 << Lane;
> +    NewDmask |= 1 << Comp;
>    }
>  
> -  // Abort if all components are used
> -  if (Writemask == 0xf)
> +  // Abort if there's no change
> +  if (NewDmask == OldDmask)
>      return;
>  
>    // Adjust the writemask in the node
>    std::vector<SDValue> Ops;
> -  Ops.push_back(DAG.getTargetConstant(Writemask, MVT::i32));
> +  Ops.push_back(DAG.getTargetConstant(NewDmask, MVT::i32));
>    for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
>      Ops.push_back(Node->getOperand(i));
>    Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
>  
>    // If we only got one lane, replace it with a copy
> -  if (Writemask == (1U << Lane)) {
> +  // (if NewDmask has only one bit set...)
> +  if (NewDmask && (NewDmask & (NewDmask-1)) == 0) {
>      SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32);
>      SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
>                                        SDLoc(), Users[Lane]->getValueType(0),
> diff --git a/test/CodeGen/R600/llvm.SI.sample-masked.ll b/test/CodeGen/R600/llvm.SI.sample-masked.ll
> new file mode 100644
> index 0000000..454e48b
> --- /dev/null
> +++ b/test/CodeGen/R600/llvm.SI.sample-masked.ll
> @@ -0,0 +1,93 @@
> +;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
> +
> +; CHECK-LABEL: @v1
> +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 13
> +define void @v1(i32 %a1) {
> +entry:
> +  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
> +  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
> +  %2 = extractelement <4 x float> %1, i32 0
> +  %3 = extractelement <4 x float> %1, i32 2
> +  %4 = extractelement <4 x float> %1, i32 3
> +  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
> +  ret void
> +}
> +
> +; CHECK-LABEL: @v2
> +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 11
> +define void @v2(i32 %a1) {
> +entry:
> +  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
> +  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
> +  %2 = extractelement <4 x float> %1, i32 0
> +  %3 = extractelement <4 x float> %1, i32 1
> +  %4 = extractelement <4 x float> %1, i32 3
> +  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
> +  ret void
> +}
> +
> +; CHECK-LABEL: @v3
> +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 14
> +define void @v3(i32 %a1) {
> +entry:
> +  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
> +  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
> +  %2 = extractelement <4 x float> %1, i32 1
> +  %3 = extractelement <4 x float> %1, i32 2
> +  %4 = extractelement <4 x float> %1, i32 3
> +  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
> +  ret void
> +}
> +
> +; CHECK-LABEL: @v4
> +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 7
> +define void @v4(i32 %a1) {
> +entry:
> +  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
> +  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
> +  %2 = extractelement <4 x float> %1, i32 0
> +  %3 = extractelement <4 x float> %1, i32 1
> +  %4 = extractelement <4 x float> %1, i32 2
> +  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
> +  ret void
> +}
> +
> +; CHECK-LABEL: @v5
> +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}, 10
> +define void @v5(i32 %a1) {
> +entry:
> +  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
> +  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
> +  %2 = extractelement <4 x float> %1, i32 1
> +  %3 = extractelement <4 x float> %1, i32 3
> +  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
> +  ret void
> +}
> +
> +; CHECK-LABEL: @v6
> +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}, 6
> +define void @v6(i32 %a1) {
> +entry:
> +  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
> +  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
> +  %2 = extractelement <4 x float> %1, i32 1
> +  %3 = extractelement <4 x float> %1, i32 2
> +  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
> +  ret void
> +}
> +
> +; CHECK-LABEL: @v7
> +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}, 9
> +define void @v7(i32 %a1) {
> +entry:
> +  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
> +  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
> +  %2 = extractelement <4 x float> %1, i32 0
> +  %3 = extractelement <4 x float> %1, i32 3
> +  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
> +  ret void
> +}
> +
> +declare <4 x float> @llvm.SI.sample.v1i32(<1 x i32>, <32 x i8>, <16 x i8>, i32) readnone
> +
> +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
> -- 
> 1.8.1.2
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev