[llvm-branch-commits] [llvm-branch] r362634 - Merging r359891:

Wed Jun 5 12:06:41 PDT 2019

Author: arsenm
Date: Wed Jun  5 12:06:41 2019
New Revision: 362634

URL: http://llvm.org/viewvc/llvm-project?rev=362634&view=rev
Log:
Merging r359891:
------------------------------------------------------------------------
r359891 | arsenm | 2019-05-03 07:40:10 -0700 (Fri, 03 May 2019) | 9 lines

AMDGPU: Replace shrunk instruction with dummy implicit_def

This was broken if the original operand was killed. The kill flag
would appear on both instructions, and fail the verifier. Keep the
kill flag, but remove the operands from the old instruction. This has
an added benefit of really reducing the use count for future folds.

Ideally the pass would be structured more like what PeepholeOptimizer
does to avoid this hack to avoid breaking instruction iterators.
------------------------------------------------------------------------

Modified:
    llvm/branches/release_80/   (props changed)
    llvm/branches/release_80/lib/Target/AMDGPU/SIFoldOperands.cpp
    llvm/branches/release_80/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir

Propchange: llvm/branches/release_80/
------------------------------------------------------------------------------

--- svn:mergeinfo (original)
+++ svn:mergeinfo Wed Jun  5 12:06:41 2019
@@ -1,3 +1,3 @@
 /llvm/branches/Apple/Pertwee:110850,110961
 /llvm/branches/type-system-rewrite:133420-134817
-/llvm/trunk:155241,351322,351325,351344-351345,351349,351351,351370,351381,351387,351421,351426,351436,351475,351485,351753-351754,351765,351910,351930,351932,352034,352204,352246,352374,352465,352555,352607-352608,352707,352714,352770,352886,352889,352892,352895,352908,352917,352935,352945,353015,353061,353082,353138,353141,353155,353213,353218,353304,353308,353334,353367,353374,353383,353463,353480,353489,353551,353733,353758,353809,353907,354034,354117,354128,354131,354144,354207,354497,354505,354733,354756,354764,355116-355117,355136,355227-355228,359883
+/llvm/trunk:155241,351322,351325,351344-351345,351349,351351,351370,351381,351387,351421,351426,351436,351475,351485,351753-351754,351765,351910,351930,351932,352034,352204,352246,352374,352465,352555,352607-352608,352707,352714,352770,352886,352889,352892,352895,352908,352917,352935,352945,353015,353061,353082,353138,353141,353155,353213,353218,353304,353308,353334,353367,353374,353383,353463,353480,353489,353551,353733,353758,353809,353907,354034,354117,354128,354131,354144,354207,354497,354505,354733,354756,354764,355116-355117,355136,355227-355228,359883,359891

Modified: llvm/branches/release_80/lib/Target/AMDGPU/SIFoldOperands.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_80/lib/Target/AMDGPU/SIFoldOperands.cpp?rev=362634&r1=362633&r2=362634&view=diff
==============================================================================
--- llvm/branches/release_80/lib/Target/AMDGPU/SIFoldOperands.cpp (original)
+++ llvm/branches/release_80/lib/Target/AMDGPU/SIFoldOperands.cpp Wed Jun  5 12:06:41 2019
@@ -218,8 +218,6 @@ static bool updateOperand(FoldCandidate
 
       const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
       unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
-      const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg());
-      unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC);
 
       MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
 
@@ -229,9 +227,15 @@ static bool updateOperand(FoldCandidate
       }
 
       // Keep the old instruction around to avoid breaking iterators, but
-      // replace the outputs with dummy registers.
+      // replace it with a dummy instruction to remove uses.
+      //
+      // FIXME: We should not invert how this pass looks at operands to avoid
+      // this. Should track set of foldable movs instead of looking for uses
+      // when looking at a use.
       Dst0.setReg(NewReg0);
-      Dst1.setReg(NewReg1);
+      for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
+        MI->RemoveOperand(I);
+      MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
 
       if (Fold.isCommuted())
         TII.commuteInstruction(*Inst32, false);

Modified: llvm/branches/release_80/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_80/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir?rev=362634&r1=362633&r2=362634&view=diff
==============================================================================
--- llvm/branches/release_80/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir (original)
+++ llvm/branches/release_80/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir Wed Jun  5 12:06:41 2019
@@ -590,3 +590,59 @@ body:             |
     S_ENDPGM implicit %2
 
 ...
+
+---
+name: shrink_add_kill_flags_src0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: shrink_add_kill_flags_src0
+    ; GCN: liveins: $vgpr0
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
+    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 killed [[V_MOV_B32_e32_]], [[COPY]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
+    %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 killed %1, %0, 0, implicit $exec
+   S_ENDPGM 0, implicit %2
+...
+
+---
+name: shrink_add_kill_flags_src1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+    ; GCN-LABEL: name: shrink_add_kill_flags_src1
+    ; GCN: liveins: $vgpr0
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
+    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_MOV_B32_e32_]], killed [[COPY]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
+    %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 %1, killed %0, 0, implicit $exec
+   S_ENDPGM 0, implicit %2
+...
+
+---
+name: shrink_addc_kill_flags_src2
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vcc
+    ; GCN-LABEL: name: shrink_addc_kill_flags_src2
+    ; GCN: liveins: $vgpr0, $vcc
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
+    ; GCN: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $vcc
+    ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[V_MOV_B32_e32_]], [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]]
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
+    %2:sreg_64_xexec = COPY $vcc
+    %3:vgpr_32, %4:sreg_64_xexec = V_ADDC_U32_e64 %1, %0, %2, 0, implicit $exec
+   S_ENDPGM 0, implicit %3
+...