[PATCH] R600/SI: Select BUILD_PAIR using SSrc instead of VSrc.

Tom Stellard tom at stellard.net
Tue Apr 1 06:40:14 PDT 2014


On Mon, Mar 31, 2014 at 05:49:50PM -0700, Matt Arsenault wrote:
> The test changes are somewhat unfortunate. Instead of moving the immediate 0 directly into the VGPR, it is moved into an SGPR, and then that SGPR is copied. This is more or less OK since the 0 needs to be loaded anyway into the SGPR, but it would probably be better if it also just used the immediate 0 for the v_mov.
> 

I have a similar patch here:
http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20140317/209824.html

But I think it makes sense to use SSrc_64 instead of SGPR_64 like you've
done here.

There are a number of regressions related to these patches that I'm
working on fixing now.  I'll let you know when this patch is safe to
commit.

-Tom

> http://llvm-reviews.chandlerc.com/D3236
> 
> Files:
>   lib/Target/R600/AMDGPUISelDAGToDAG.cpp
>   test/CodeGen/R600/extload.ll
>   test/CodeGen/R600/zero_extend.ll
> 
> Index: lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> +++ lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> @@ -346,7 +346,7 @@
>        SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
>        SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
>      } else if (N->getValueType(0) == MVT::i64) {
> -      RC = CurDAG->getTargetConstant(AMDGPU::VSrc_64RegClassID, MVT::i32);
> +      RC = CurDAG->getTargetConstant(AMDGPU::SSrc_64RegClassID, MVT::i32);
>        SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
>        SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
>      } else {
> Index: test/CodeGen/R600/extload.ll
> ===================================================================
> --- test/CodeGen/R600/extload.ll
> +++ test/CodeGen/R600/extload.ll
> @@ -87,8 +87,9 @@
>  }
>  
>  ; FUNC-LABEL: @zextload_global_i8_to_i64
> -; SI: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]],
> -; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
> +; SI-DAG: S_MOV_B32 [[SZERO:s[0-9]+]], 0
> +; SI-DAG: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]],
> +; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[SZERO]]
>  ; SI: BUFFER_STORE_DWORDX2
>  define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
>    %a = load i8 addrspace(1)* %in, align 8
> @@ -98,8 +99,9 @@
>  }
>  
>  ; FUNC-LABEL: @zextload_global_i16_to_i64
> -; SI: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]],
> -; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
> +; SI-DAG: S_MOV_B32 [[SZERO:s[0-9]+]], 0
> +; SI-DAG: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]],
> +; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[SZERO]]
>  ; SI: BUFFER_STORE_DWORDX2
>  define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
>    %a = load i16 addrspace(1)* %in, align 8
> @@ -109,8 +111,9 @@
>  }
>  
>  ; FUNC-LABEL: @zextload_global_i32_to_i64
> -; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
> -; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
> +; SI-DAG: S_MOV_B32 {{s[0-9]+}}, 0
> +; SI-DAG: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
> +; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[SZERO]]
>  ; SI: BUFFER_STORE_DWORDX2
>  define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
>    %a = load i32 addrspace(1)* %in, align 8
> Index: test/CodeGen/R600/zero_extend.ll
> ===================================================================
> --- test/CodeGen/R600/zero_extend.ll
> +++ test/CodeGen/R600/zero_extend.ll
> @@ -6,7 +6,8 @@
>  ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW
>  
>  ; SI-CHECK: @test
> -; SI-CHECK: V_MOV_B32_e32 v[[ZERO:[0-9]]], 0
> +; SI-CHECK: S_MOV_B32 [[SZERO:s[0-9]+]], 0
> +; SI-CHECK: V_MOV_B32_e32 v[[ZERO:[0-9]+]], [[SZERO]]
>  ; SI-CHECK: BUFFER_STORE_DWORDX2 v[0:[[ZERO]]{{\]}}
>  define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
>  entry:

> Index: lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> +++ lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> @@ -346,7 +346,7 @@
>        SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
>        SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
>      } else if (N->getValueType(0) == MVT::i64) {
> -      RC = CurDAG->getTargetConstant(AMDGPU::VSrc_64RegClassID, MVT::i32);
> +      RC = CurDAG->getTargetConstant(AMDGPU::SSrc_64RegClassID, MVT::i32);
>        SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
>        SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
>      } else {
> Index: test/CodeGen/R600/extload.ll
> ===================================================================
> --- test/CodeGen/R600/extload.ll
> +++ test/CodeGen/R600/extload.ll
> @@ -87,8 +87,9 @@
>  }
>  
>  ; FUNC-LABEL: @zextload_global_i8_to_i64
> -; SI: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]],
> -; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
> +; SI-DAG: S_MOV_B32 [[SZERO:s[0-9]+]], 0
> +; SI-DAG: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]],
> +; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[SZERO]]
>  ; SI: BUFFER_STORE_DWORDX2
>  define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
>    %a = load i8 addrspace(1)* %in, align 8
> @@ -98,8 +99,9 @@
>  }
>  
>  ; FUNC-LABEL: @zextload_global_i16_to_i64
> -; SI: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]],
> -; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
> +; SI-DAG: S_MOV_B32 [[SZERO:s[0-9]+]], 0
> +; SI-DAG: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]],
> +; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[SZERO]]
>  ; SI: BUFFER_STORE_DWORDX2
>  define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
>    %a = load i16 addrspace(1)* %in, align 8
> @@ -109,8 +111,9 @@
>  }
>  
>  ; FUNC-LABEL: @zextload_global_i32_to_i64
> -; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
> -; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
> +; SI-DAG: S_MOV_B32 {{s[0-9]+}}, 0
> +; SI-DAG: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
> +; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[SZERO]]
>  ; SI: BUFFER_STORE_DWORDX2
>  define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
>    %a = load i32 addrspace(1)* %in, align 8
> Index: test/CodeGen/R600/zero_extend.ll
> ===================================================================
> --- test/CodeGen/R600/zero_extend.ll
> +++ test/CodeGen/R600/zero_extend.ll
> @@ -6,7 +6,8 @@
>  ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW
>  
>  ; SI-CHECK: @test
> -; SI-CHECK: V_MOV_B32_e32 v[[ZERO:[0-9]]], 0
> +; SI-CHECK: S_MOV_B32 [[SZERO:s[0-9]+]], 0
> +; SI-CHECK: V_MOV_B32_e32 v[[ZERO:[0-9]+]], [[SZERO]]
>  ; SI-CHECK: BUFFER_STORE_DWORDX2 v[0:[[ZERO]]{{\]}}
>  define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
>  entry:

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list