[PATCH] R600/SI: Also try to use 0 base for misaligned 8-byte DS loads.

Tom Stellard tom at stellard.net
Wed Oct 15 10:20:03 PDT 2014


On Wed, Oct 15, 2014 at 04:56:00PM +0000, Matt Arsenault wrote:
> http://reviews.llvm.org/D5803
> 
> Files:
>   lib/Target/R600/AMDGPUISelDAGToDAG.cpp
>   test/CodeGen/R600/ds_read2.ll
>   test/CodeGen/R600/ds_write2.ll

LGTM.

-Tom

> Index: lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> +++ lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> @@ -823,6 +823,23 @@
>      }
>    }
>  
> +  if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
> +    unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
> +    unsigned DWordOffset1 = DWordOffset0 + 1;
> +    assert(4 * DWordOffset0 == CAddr->getZExtValue());
> +
> +    if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
> +      SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
> +      MachineSDNode *MovZero
> +        = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
> +                                 SDLoc(Addr), MVT::i32, Zero);
> +      Base = SDValue(MovZero, 0);
> +      Offset0 = CurDAG->getTargetConstant(DWordOffset0, MVT::i8);
> +      Offset1 = CurDAG->getTargetConstant(DWordOffset1, MVT::i8);
> +      return true;
> +    }
> +  }
> +
>    // default case
>    Base = Addr;
>    Offset0 = CurDAG->getTargetConstant(0, MVT::i8);
> Index: test/CodeGen/R600/ds_read2.ll
> ===================================================================
> --- test/CodeGen/R600/ds_read2.ll
> +++ test/CodeGen/R600/ds_read2.ll
> @@ -406,6 +406,36 @@
>    ret void
>  }
>  
> + at bar = addrspace(3) global [4 x i64] zeroinitializer, align 4
> +
> +; SI-LABEL: @load_misaligned64_constant_offsets
> +; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
> +; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
> +; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3
> +define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
> +  %val0 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
> +  %val1 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
> +  %sum = add i64 %val0, %val1
> +  store i64 %sum, i64 addrspace(1)* %out, align 8
> +  ret void
> +}
> +
> + at bar.large = addrspace(3) global [4096 x i64] zeroinitializer, align 4
> +
> +; SI-LABEL: @load_misaligned64_constant_large_offsets
> +; SI-DAG: V_MOV_B32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
> +; SI-DAG: V_MOV_B32_e32 [[BASE1:v[0-9]+]], 0x4000
> +; SI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset0:0 offset1:1
> +; SI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1
> +; SI: S_ENDPGM
> +define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
> +  %val0 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
> +  %val1 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
> +  %sum = add i64 %val0, %val1
> +  store i64 %sum, i64 addrspace(1)* %out, align 8
> +  ret void
> +}
> +
>  @sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] zeroinitializer, align 4
>  @sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] zeroinitializer, align 4
>  
> Index: test/CodeGen/R600/ds_write2.ll
> ===================================================================
> --- test/CodeGen/R600/ds_write2.ll
> +++ test/CodeGen/R600/ds_write2.ll
> @@ -341,6 +341,32 @@
>    ret void
>  }
>  
> + at bar = addrspace(3) global [4 x i64] zeroinitializer, align 4
> +
> +; SI-LABEL: @store_misaligned64_constant_offsets
> +; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
> +; SI: DS_WRITE2_B32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
> +; SI: DS_WRITE2_B32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
> +define void @store_misaligned64_constant_offsets() {
> +  store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
> +  store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
> +  ret void
> +}
> +
> + at bar.large = addrspace(3) global [4096 x i64] zeroinitializer, align 4
> +
> +; SI-LABEL: @store_misaligned64_constant_large_offsets
> +; SI-DAG: V_MOV_B32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
> +; SI-DAG: V_MOV_B32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}}
> +; SI-DAG: DS_WRITE2_B32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
> +; SI-DAG: DS_WRITE2_B32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
> +; SI: S_ENDPGM
> +define void @store_misaligned64_constant_large_offsets() {
> +  store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
> +  store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
> +  ret void
> +}
> +
>  @sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] zeroinitializer, align 4
>  @sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] zeroinitializer, align 4
>  

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list