[PATCH] D22556: Replace subregister uses when processing tied operands

Fri Aug 12 17:03:13 PDT 2016

arsenm retitled this revision from "Replace other uses of register when processind tied operands" to "Replace subregister uses when processing tied operands".
arsenm updated the summary for this revision.
arsenm updated this revision to Diff 67942.
arsenm added a comment.

There was already code there trying to accomplish the same thing, it was just skipping for subregisters for some reason


https://reviews.llvm.org/D22556

Files:
  lib/CodeGen/TwoAddressInstructionPass.cpp
  test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll
  test/CodeGen/AMDGPU/indirect-addressing-si.ll

Index: test/CodeGen/AMDGPU/indirect-addressing-si.ll
===================================================================

--- test/CodeGen/AMDGPU/indirect-addressing-si.ll
+++ test/CodeGen/AMDGPU/indirect-addressing-si.ll
@@ -125,27 +125,32 @@
 }
 
 ; CHECK-LABEL: {{^}}insert_w_offset:
-; CHECK: s_load_dword [[IN:s[0-9]+]]
-; CHECK: s_mov_b32 m0, [[IN]]
-; CHECK: v_movreld_b32_e32
-define void @insert_w_offset(float addrspace(1)* %out, i32 %in) {
+; CHECK-DAG: s_load_dword [[IN:s[0-9]+]]
+; CHECK-DAG: s_mov_b32 m0, [[IN]]
+; CHECK-DAG: v_mov_b32_e32 v[[ELT0:[0-9]+]], 1.0
+; CHECK-DAG: v_mov_b32_e32 v[[ELT1:[0-9]+]], 2.0
+; CHECK-DAG: v_mov_b32_e32 v[[ELT2:[0-9]+]], 0x40400000
+; CHECK-DAG: v_mov_b32_e32 v[[ELT3:[0-9]+]], 4.0
+; CHECK-DAG: v_mov_b32_e32 v[[INS:[0-9]+]], 0x40a00000
+; CHECK: v_movreld_b32_e32 v[[ELT1]], v[[INS]]
+; CHECK: buffer_store_dwordx4 v{{\[}}[[ELT0]]:[[ELT3]]{{\]}}
+define void @insert_w_offset(<4 x float> addrspace(1)* %out, i32 %in) {
 entry:
   %0 = add i32 %in, 1
   %1 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %0
-  %2 = extractelement <4 x float> %1, i32 2
-  store float %2, float addrspace(1)* %out
+  store <4 x float> %1, <4 x float> addrspace(1)* %out
   ret void
 }
 
 ; CHECK-LABEL: {{^}}insert_wo_offset:
 ; CHECK: s_load_dword [[IN:s[0-9]+]]
 ; CHECK: s_mov_b32 m0, [[IN]]
-; CHECK: v_movreld_b32_e32
-define void @insert_wo_offset(float addrspace(1)* %out, i32 %in) {
+; CHECK: v_movreld_b32_e32 v[[ELT0:[0-9]+]]
+; CHECK: buffer_store_dwordx4 v{{\[}}[[ELT0]]:
+define void @insert_wo_offset(<4 x float> addrspace(1)* %out, i32 %in) {
 entry:
   %0 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %in
-  %1 = extractelement <4 x float> %0, i32 2
-  store float %1, float addrspace(1)* %out
+  store <4 x float> %0, <4 x float> addrspace(1)* %out
   ret void
 }
 
Index: test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll
@@ -0,0 +1,19 @@
+; RUN: llc -O0 -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
+
+; FIXME: Merge into indirect-addressing-si.ll
+
+; Make sure that TwoAddressInstructions keeps src0 as subregister sub0
+; of the tied implicit use and def of the super register.
+
+; CHECK-LABEL: {{^}}insert_wo_offset:
+; CHECK: s_load_dword [[IN:s[0-9]+]]
+; CHECK: s_mov_b32 m0, [[IN]]
+; CHECK: v_movreld_b32_e32 v[[ELT0:[0-9]+]]
+; CHECK-NEXT: buffer_store_dwordx4 v{{\[}}[[ELT0]]:
+define void @insert_wo_offset(<4 x float> addrspace(1)* %out, i32 %in) {
+entry:
+  %ins = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %in
+  store <4 x float> %ins, <4 x float> addrspace(1)* %out
+  ret void
+}
+
Index: lib/CodeGen/TwoAddressInstructionPass.cpp
===================================================================
--- lib/CodeGen/TwoAddressInstructionPass.cpp
+++ lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1567,14 +1567,14 @@
     if (!IsEarlyClobber) {
       // Replace other (un-tied) uses of regB with LastCopiedReg.
       for (MachineOperand &MO : MI->operands()) {
-        if (MO.isReg() && MO.getReg() == RegB && MO.getSubReg() == SubRegB &&
+        if (MO.isReg() && MO.getReg() == RegB &&
             MO.isUse()) {
           if (MO.isKill()) {
             MO.setIsKill(false);
             RemovedKillFlag = true;
           }
           MO.setReg(LastCopiedReg);
-          MO.setSubReg(0);
+          MO.setSubReg(MO.getSubReg());
         }
       }
     }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D22556.67942.patch
Type: text/x-patch
Size: 3620 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160813/7e3bb5ab/attachment.bin>