[llvm] r248467 - AMDGPU: Reduce number of copies emitted

Thu Sep 24 00:16:38 PDT 2015

Author: arsenm
Date: Thu Sep 24 02:16:37 2015
New Revision: 248467

URL: http://llvm.org/viewvc/llvm-project?rev=248467&view=rev
Log:
AMDGPU: Reduce number of copies emitted

Instead of always inserting a copy in case
the super register is itself a subregister,
only extract to the super reg class if this is
actually the case.

This shouldn't really change codegen, but
makes looking at the output of SIFixSGPRCopies
easier to read.

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/trunk/test/CodeGen/AMDGPU/and.ll
    llvm/trunk/test/CodeGen/AMDGPU/ctpop64.ll
    llvm/trunk/test/CodeGen/AMDGPU/s_movk_i32.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=248467&r1=248466&r2=248467&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Thu Sep 24 02:16:37 2015
@@ -1556,17 +1556,21 @@ unsigned SIInstrInfo::buildExtractSubReg
                                          unsigned SubIdx,
                                          const TargetRegisterClass *SubRC)
                                          const {
-  assert(SuperReg.isReg());
-
-  unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
+  MachineBasicBlock *MBB = MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
   unsigned SubReg = MRI.createVirtualRegister(SubRC);
 
+  if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) {
+    BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
+      .addReg(SuperReg.getReg(), 0, SubIdx);
+    return SubReg;
+  }
+
   // Just in case the super register is itself a sub-register, copy it to a new
   // value so we don't need to worry about merging its subreg index with the
   // SubIdx passed to this function. The register coalescer should be able to
   // eliminate this extra copy.
-  MachineBasicBlock *MBB = MI->getParent();
-  DebugLoc DL = MI->getDebugLoc();
+  unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
 
   BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
     .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());

Modified: llvm/trunk/test/CodeGen/AMDGPU/and.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/and.ll?rev=248467&r1=248466&r2=248467&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/and.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/and.ll Thu Sep 24 02:16:37 2015
@@ -147,9 +147,10 @@ endif:
   ret void
 }
 
+; FIXME: and 0 should be replaced witht copy
 ; FUNC-LABEL: {{^}}v_and_constant_i64:
 ; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_and_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
 define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
   %a = load i64, i64 addrspace(1)* %aptr, align 8
   %and = and i64 %a, 1234567

Modified: llvm/trunk/test/CodeGen/AMDGPU/ctpop64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ctpop64.ll?rev=248467&r1=248466&r2=248467&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ctpop64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ctpop64.ll Thu Sep 24 02:16:37 2015
@@ -36,15 +36,14 @@ define void @v_ctpop_i64(i32 addrspace(1
   ret void
 }
 
-; FIXME: We shouldn't emit the v_mov_b32 0
+; FIXME: or 0 should be replaxed with copy
 ; FUNC-LABEL: {{^}}v_ctpop_i64_user:
 ; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
 ; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
 ; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
 ; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
-; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; GCN-DAG: v_or_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, [[RESULT]]
-; GCN-DAG: v_or_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[ZERO]]
+; GCN-DAG: v_or_b32_e64 v[[RESULT_HI:[0-9]+]], 0, s{{[0-9]+}}
 ; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
 ; GCN: s_endpgm
 define void @v_ctpop_i64_user(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %s.val) nounwind {

Modified: llvm/trunk/test/CodeGen/AMDGPU/s_movk_i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/s_movk_i32.ll?rev=248467&r1=248466&r2=248467&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/s_movk_i32.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/s_movk_i32.ll Thu Sep 24 02:16:37 2015
@@ -3,10 +3,9 @@
 
 ; SI-LABEL: {{^}}s_movk_i32_k0:
 ; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xffff{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
   %loada = load i64, i64 addrspace(1)* %a, align 4
@@ -17,10 +16,9 @@ define void @s_movk_i32_k0(i64 addrspace
 
 ; SI-LABEL: {{^}}s_movk_i32_k1:
 ; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
   %loada = load i64, i64 addrspace(1)* %a, align 4
@@ -31,10 +29,9 @@ define void @s_movk_i32_k1(i64 addrspace
 
 ; SI-LABEL: {{^}}s_movk_i32_k2:
 ; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 64{{$}}
 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 64, v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
   %loada = load i64, i64 addrspace(1)* %a, align 4
@@ -45,10 +42,9 @@ define void @s_movk_i32_k2(i64 addrspace
 
 ; SI-LABEL: {{^}}s_movk_i32_k3:
 ; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
   %loada = load i64, i64 addrspace(1)* %a, align 4
@@ -59,10 +55,9 @@ define void @s_movk_i32_k3(i64 addrspace
 
 ; SI-LABEL: {{^}}s_movk_i32_k4:
 ; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x20000{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
   %loada = load i64, i64 addrspace(1)* %a, align 4
@@ -87,10 +82,9 @@ define void @s_movk_i32_k5(i64 addrspace
 
 ; SI-LABEL: {{^}}s_movk_i32_k6:
 ; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x41{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 63{{$}}
 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 63, v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
   %loada = load i64, i64 addrspace(1)* %a, align 4