[PATCH] R600/SI: Use DS offsets for constant addresses

Tom Stellard tom at stellard.net
Tue Oct 14 09:42:10 PDT 2014


On Sat, Oct 11, 2014 at 12:38:47AM +0000, Matt Arsenault wrote:
> Use 0 as the base address for a constant address, so if
> we have a constant address we can save moves and form
> read2/write2s.
> 
> http://reviews.llvm.org/D5737
> 
> Files:
>   lib/Target/R600/AMDGPUISelDAGToDAG.cpp
>   test/CodeGen/R600/ds_read2.ll
>   test/CodeGen/R600/ds_write2.ll

> Index: lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> +++ lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> @@ -787,6 +787,18 @@
>      }
>    }
>  
> +  // If we have a constant address, prefer to put the constant into the
> +  // offset. This can save moves to load the constant address since multiple
> +  // operations can share the zero base address register, and enables merging
> +  // into read2 / write2 instructions.
> +  if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
> +    if (isUInt<16>(CAddr->getZExtValue())) {
> +      Base = CurDAG->getConstant(0, MVT::i32);
> +      Offset = Addr;
> +      return true;
> +    }
> +  }
> +

LGTM.

-Tom

>    // default case
>    Base = Addr;
>    Offset = CurDAG->getTargetConstant(0, MVT::i16);
> Index: test/CodeGen/R600/ds_read2.ll
> ===================================================================
> --- test/CodeGen/R600/ds_read2.ll
> +++ test/CodeGen/R600/ds_read2.ll
> @@ -382,6 +382,30 @@
>    ret void
>  }
>  
> + at foo = addrspace(3) global [4 x i32] zeroinitializer, align 4
> +
> +; SI-LABEL: @load_constant_adjacent_offsets
> +; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
> +; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
> +define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
> +  %val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
> +  %val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
> +  %sum = add i32 %val0, %val1
> +  store i32 %sum, i32 addrspace(1)* %out, align 4
> +  ret void
> +}
> +
> +; SI-LABEL: @load_constant_disjoint_offsets
> +; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
> +; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:2
> +define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
> +  %val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
> +  %val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
> +  %sum = add i32 %val0, %val1
> +  store i32 %sum, i32 addrspace(1)* %out, align 4
> +  ret void
> +}
> +
>  @sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] zeroinitializer, align 4
>  @sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] zeroinitializer, align 4
>  
> Index: test/CodeGen/R600/ds_write2.ll
> ===================================================================
> --- test/CodeGen/R600/ds_write2.ll
> +++ test/CodeGen/R600/ds_write2.ll
> @@ -320,6 +320,27 @@
>    ret void
>  }
>  
> + at foo = addrspace(3) global [4 x i32] zeroinitializer, align 4
> +
> +; SI-LABEL: @store_constant_adjacent_offsets
> +; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
> +; SI: DS_WRITE2_B32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
> +define void @store_constant_adjacent_offsets() {
> +  store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
> +  store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
> +  ret void
> +}
> +
> +; SI-LABEL: @store_constant_disjoint_offsets
> +; SI-DAG: V_MOV_B32_e32 [[VAL:v[0-9]+]], 0x7b{{$}}
> +; SI-DAG: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
> +; SI: DS_WRITE2_B32 [[ZERO]], [[VAL]], [[VAL]] offset0:0 offset1:2
> +define void @store_constant_disjoint_offsets() {
> +  store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
> +  store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
> +  ret void
> +}
> +
>  @sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] zeroinitializer, align 4
>  @sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] zeroinitializer, align 4
>  

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list