[llvm] 8d7fd73 - [AMDGPU] Fix merging m0 inits

Piotr Sobczak via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 23 00:14:58 PDT 2020


Author: Piotr Sobczak
Date: 2020-09-23T09:13:43+02:00
New Revision: 8d7fd73c3a8ce069cfe48dfcf949b4a59c05c673

URL: https://github.com/llvm/llvm-project/commit/8d7fd73c3a8ce069cfe48dfcf949b4a59c05c673
DIFF: https://github.com/llvm/llvm-project/commit/8d7fd73c3a8ce069cfe48dfcf949b4a59c05c673.diff

LOG: [AMDGPU] Fix merging m0 inits

Fix incorrect merges of m0 inits in loops.

It was assumed that if a clobbering instruction appears in
the same block as an init and the clobbering instruction
does not dominate the init then it does not interfere with
init.

This does not work in the presence of loops, where in this
scenario, the clobbering instruction does interfere with
the init in another iteration.

To fix this, do not check for block equality and defer the
decision to the predecessor check.

Differential Revision: https://reviews.llvm.org/D87882

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
    llvm/test/CodeGen/AMDGPU/merge-m0.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 775cec6904a4..a6df41f6beec 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -386,17 +386,13 @@ static bool isReachable(const MachineInstr *From,
                         const MachineInstr *To,
                         const MachineBasicBlock *CutOff,
                         MachineDominatorTree &MDT) {
-  // If either From block dominates To block or instructions are in the same
-  // block and From is higher.
   if (MDT.dominates(From, To))
     return true;
 
   const MachineBasicBlock *MBBFrom = From->getParent();
   const MachineBasicBlock *MBBTo = To->getParent();
-  if (MBBFrom == MBBTo)
-    return false;
 
-  // Instructions are in 
diff erent blocks, do predecessor search.
+  // Do predecessor search.
   // We should almost never get here since we do not usually produce M0 stores
   // other than -1.
   return searchPredecessors(MBBTo, CutOff, [MBBFrom]

diff  --git a/llvm/test/CodeGen/AMDGPU/merge-m0.mir b/llvm/test/CodeGen/AMDGPU/merge-m0.mir
index 0afc5d1cb1a1..81d9491c68e1 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-m0.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-m0.mir
@@ -295,3 +295,386 @@ body:             |
     SI_INIT_M0 -1, implicit-def $m0
     DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
 ...
+
+
+# GCN-LABEL: name: m0-in-loop-0
+# GCN:    bb.0.entry:
+# GCN:      SI_INIT_M0 -1
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: DS_WRITE_B32
+
+# GCN:    bb.1:
+# GCN:      SI_INIT_M0 -1
+# GCN-NEXT: DS_WRITE_B32
+# GCN-NEXT: $m0 = COPY %2
+
+---
+name: m0-in-loop-0
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: sgpr_32 }
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+
+    %0 = IMPLICIT_DEF
+    %1 = IMPLICIT_DEF
+    %2 = IMPLICIT_DEF
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.1, %bb.2
+
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    $m0 = COPY %2:sgpr_32
+    S_SENDMSG 34, implicit $exec, implicit $m0
+    S_CBRANCH_VCCZ %bb.1, implicit undef $vcc
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: name: m0-in-loop-1
+# GCN:    bb.0.entry:
+# GCN:      SI_INIT_M0 -1
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: DS_WRITE_B32
+
+# GCN:    bb.1:
+# GCN-NOT:      SI_INIT_M0 -1
+# GCN: DS_WRITE_B32
+
+---
+name: m0-in-loop-1
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: sgpr_32 }
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+
+    %0 = IMPLICIT_DEF
+    %1 = IMPLICIT_DEF
+    %2 = IMPLICIT_DEF
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.1, %bb.2
+
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_CBRANCH_VCCZ %bb.1, implicit undef $vcc
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: name: m0-in-loop-2
+# GCN:    bb.0.entry:
+# GCN:      SI_INIT_M0 -1
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: DS_WRITE_B32
+
+# GCN:    bb.1:
+# GCN: $m0 = COPY %2
+# GCN-NEXT:      SENDMSG
+# GCN-NEXT:      SI_INIT_M0 -1
+# GCN-NEXT: DS_WRITE_B32
+
+---
+name: m0-in-loop-2
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: sgpr_32 }
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+
+    %0 = IMPLICIT_DEF
+    %1 = IMPLICIT_DEF
+    %2 = IMPLICIT_DEF
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.1, %bb.2
+
+    $m0 = COPY %2:sgpr_32
+    S_SENDMSG 34, implicit $exec, implicit $m0
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_CBRANCH_VCCZ %bb.1, implicit undef $vcc
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: name: m0-in-loop-3
+# GCN:    bb.0.entry:
+# GCN:      SI_INIT_M0 -1
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: DS_WRITE_B32
+
+# GCN:    bb.1:
+# GCN: $m0 = COPY %2
+# GCN-NEXT:      SENDMSG
+# GCN-NEXT:      SI_INIT_M0 -1
+# GCN-NEXT: DS_WRITE_B32
+# GCN-NEXT: DS_WRITE_B32
+
+---
+name: m0-in-loop-3
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: sgpr_32 }
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+
+    %0 = IMPLICIT_DEF
+    %1 = IMPLICIT_DEF
+    %2 = IMPLICIT_DEF
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.1, %bb.2
+
+    $m0 = COPY %2:sgpr_32
+    S_SENDMSG 34, implicit $exec, implicit $m0
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_CBRANCH_VCCZ %bb.1, implicit undef $vcc
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: name: m0-in-loop-4
+# GCN:    bb.0.entry:
+# GCN:      SI_INIT_M0 -1
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: DS_WRITE_B32
+
+# GCN:    bb.1:
+# GCN:  SI_INIT_M0 -1
+# GCN-NEXT: DS_WRITE_B32
+# GCN-NEXT: DS_WRITE_B32
+# GCN-NEXT: $m0 = COPY %2
+# GCN-NEXT:      SENDMSG
+
+---
+name: m0-in-loop-4
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: sgpr_32 }
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+
+    %0 = IMPLICIT_DEF
+    %1 = IMPLICIT_DEF
+    %2 = IMPLICIT_DEF
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.1, %bb.2
+
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    $m0 = COPY %2:sgpr_32
+    S_SENDMSG 34, implicit $exec, implicit $m0
+    S_CBRANCH_VCCZ %bb.1, implicit undef $vcc
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: name: m0-in-loop-5
+# GCN:    bb.0.entry:
+# GCN:      SI_INIT_M0 -1
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: DS_WRITE_B32
+
+# GCN:    bb.1:
+# GCN:  SI_INIT_M0 65536
+# GCN-NEXT: DS_WRITE_B32
+# GCN-NEXT:  SI_INIT_M0 -1
+# GCN-NEXT: DS_WRITE_B32
+# GCN-NEXT: $m0 = COPY %2
+# GCN-NEXT:      SENDMSG
+
+---
+name: m0-in-loop-5
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: sgpr_32 }
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+
+    %0 = IMPLICIT_DEF
+    %1 = IMPLICIT_DEF
+    %2 = IMPLICIT_DEF
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.1, %bb.2
+
+    SI_INIT_M0 65536, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    $m0 = COPY %2:sgpr_32
+    S_SENDMSG 34, implicit $exec, implicit $m0
+    S_CBRANCH_VCCZ %bb.1, implicit undef $vcc
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: name: m0-in-loop-6
+# GCN:    bb.0.entry:
+# GCN:      SI_INIT_M0 -1
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: DS_WRITE_B32
+
+# GCN:    bb.1:
+# GCN:  SI_INIT_M0 -1
+# GCN-NEXT: DS_WRITE_B32
+# GCN-NEXT: DS_WRITE_B32
+# GCN-NEXT: $m0 = COPY %2
+# GCN-NEXT:      SENDMSG
+
+---
+name: m0-in-loop-6
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: sgpr_32 }
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+
+    %0 = IMPLICIT_DEF
+    %1 = IMPLICIT_DEF
+    %2 = IMPLICIT_DEF
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2
+
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    $m0 = COPY %2:sgpr_32
+    S_SENDMSG 34, implicit $exec, implicit $m0
+    S_BRANCH %bb.2
+
+  bb.2:
+    successors: %bb.3, %bb.1
+    S_CBRANCH_VCCZ %bb.1, implicit undef $vcc
+    S_BRANCH %bb.3
+
+
+  bb.3:
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: name: m0-in-loop-7
+# GCN:    bb.0.entry:
+# GCN:      SI_INIT_M0 -1
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: DS_WRITE_B32
+
+# GCN:    bb.1:
+# GCN:  SI_INIT_M0 -1
+# GCN-NEXT: DS_WRITE_B32
+# GCN-NEXT: DS_WRITE_B32
+
+# GCN:    bb.2:
+# GCN: $m0 = COPY %2
+# GCN-NEXT:      SENDMSG
+
+---
+name: m0-in-loop-7
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: sgpr_32 }
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+
+    %0 = IMPLICIT_DEF
+    %1 = IMPLICIT_DEF
+    %2 = IMPLICIT_DEF
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2
+
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    successors: %bb.3, %bb.1
+    $m0 = COPY %2:sgpr_32
+    S_SENDMSG 34, implicit $exec, implicit $m0
+    S_CBRANCH_VCCZ %bb.1, implicit undef $vcc
+    S_BRANCH %bb.3
+
+  bb.3:
+    S_ENDPGM 0
+...


        


More information about the llvm-commits mailing list