[llvm] r310987 - [AMDGPU] Eliminate no effect instructions before s_endpgm

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 15 21:43:49 PDT 2017


Author: rampitec
Date: Tue Aug 15 21:43:49 2017
New Revision: 310987

URL: http://llvm.org/viewvc/llvm-project?rev=310987&view=rev
Log:
[AMDGPU] Eliminate no effect instructions before s_endpgm

Differential Revision: https://reviews.llvm.org/D36585

Added:
    llvm/trunk/test/CodeGen/AMDGPU/endpgm-dce.mir
Modified:
    llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
    llvm/trunk/test/CodeGen/AMDGPU/branch-condition-and.ll
    llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll
    llvm/trunk/test/CodeGen/AMDGPU/loop_break.ll
    llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
    llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir
    llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll
    llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
    llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll
    llvm/trunk/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
    llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll
    llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll
    llvm/trunk/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll
    llvm/trunk/test/CodeGen/AMDGPU/write_register.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp?rev=310987&r1=310986&r2=310987&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp Tue Aug 15 21:43:49 2017
@@ -111,9 +111,62 @@ bool SIOptimizeExecMaskingPreRA::runOnMa
   const SIInstrInfo *TII = ST.getInstrInfo();
   MachineRegisterInfo &MRI = MF.getRegInfo();
   LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
+  DenseSet<unsigned> RecalcRegs({AMDGPU::EXEC_LO, AMDGPU::EXEC_HI});
   bool Changed = false;
 
   for (MachineBasicBlock &MBB : MF) {
+
+    // Try to remove unneeded instructions before s_endpgm.
+    if (MBB.succ_empty()) {
+      if (MBB.empty() || MBB.back().getOpcode() != AMDGPU::S_ENDPGM)
+        continue;
+
+      SmallVector<MachineBasicBlock*, 4> Blocks({&MBB});
+
+      while (!Blocks.empty()) {
+        auto CurBB = Blocks.pop_back_val();
+        auto I = CurBB->rbegin(), E = CurBB->rend();
+        if (I != E) {
+          if (I->isUnconditionalBranch() || I->getOpcode() == AMDGPU::S_ENDPGM)
+            ++I;
+          else if (I->isBranch())
+            continue;
+        }
+
+        while (I != E) {
+          if (I->isDebugValue())
+            continue;
+          if (I->mayStore() || I->isBarrier() || I->isCall() ||
+              I->hasUnmodeledSideEffects() || I->hasOrderedMemoryRef())
+            break;
+
+          DEBUG(dbgs() << "Removing no effect instruction: " << *I << '\n');
+
+          for (auto &Op : I->operands())
+            if (Op.isReg())
+              RecalcRegs.insert(Op.getReg());
+
+          auto Next = std::next(I);
+          LIS->RemoveMachineInstrFromMaps(*I);
+          I->eraseFromParent();
+          I = Next;
+
+          Changed = true;
+        }
+
+        if (I != E)
+          continue;
+
+        // Try to ascend predecessors.
+        for (auto *Pred : CurBB->predecessors()) {
+          if (Pred->succ_size() == 1)
+            Blocks.push_back(Pred);
+        }
+      }
+      continue;
+    }
+
+    // Try to collapse adjacent endifs.
     auto Lead = MBB.begin(), E = MBB.end();
     if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI))
       continue;
@@ -174,9 +227,16 @@ bool SIOptimizeExecMaskingPreRA::runOnMa
   }
 
   if (Changed) {
-    // Recompute liveness for both reg units of exec.
-    LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC_LO, TRI));
-    LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC_HI, TRI));
+    for (auto Reg : RecalcRegs) {
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        LIS->removeInterval(Reg);
+        if (!MRI.reg_empty(Reg))
+          LIS->createAndComputeVirtRegInterval(Reg);
+      } else {
+        for (MCRegUnitIterator U(Reg, TRI); U.isValid(); ++U)
+          LIS->removeRegUnit(*U);
+      }
+    }
   }
 
   return Changed;

Modified: llvm/trunk/test/CodeGen/AMDGPU/branch-condition-and.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/branch-condition-and.ll?rev=310987&r1=310986&r2=310987&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/branch-condition-and.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/branch-condition-and.ll Tue Aug 15 21:43:49 2017
@@ -20,7 +20,6 @@
 ; GCN: ds_write_b32
 
 ; GCN: [[BB5]]
-; GCN: s_or_b64 exec, exec
 ; GCN-NEXT: s_endpgm
 ; GCN-NEXT: .Lfunc_end
 define amdgpu_ps void @ham(float %arg, float %arg1) #0 {

Modified: llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll?rev=310987&r1=310986&r2=310987&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll Tue Aug 15 21:43:49 2017
@@ -9,7 +9,6 @@
 ; GCN-NEXT: {{^BB[0-9_]+}}:
 ; GCN:      store_dword
 ; GCN-NEXT: {{^}}[[ENDIF]]:
-; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
 ; GCN-NEXT: s_endpgm
 define amdgpu_kernel void @simple_nested_if(i32 addrspace(1)* nocapture %arg) {
 bb:
@@ -45,7 +44,6 @@ bb.outer.end:
 ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER]]
 ; GCN:      store_dword
 ; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
-; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]]
 ; GCN-NEXT: s_endpgm
 define amdgpu_kernel void @uncollapsable_nested_if(i32 addrspace(1)* nocapture %arg) {
 bb:
@@ -90,7 +88,6 @@ bb.outer.end:
 ; GCN-NEXT: ; mask branch [[ENDIF_OUTER]]
 ; GCN:      store_dword
 ; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
-; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]]
 ; GCN-NEXT: s_endpgm
 define amdgpu_kernel void @nested_if_if_else(i32 addrspace(1)* nocapture %arg) {
 bb:
@@ -141,13 +138,10 @@ bb.outer.end:
 ; GCN-NEXT: {{^BB[0-9_]+}}:
 ; GCN:      store_dword
 ; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_THEN:s\[[0-9:]+\]]]
-; GCN-NEXT: ; mask branch [[ENDIF_INNER_OUTER_THEN:BB[0-9_]+]]
+; GCN-NEXT: ; mask branch [[ENDIF_OUTER]]
 ; GCN-NEXT: {{^BB[0-9_]+}}:
 ; GCN:      store_dword
-; GCN-NEXT: {{^}}[[ENDIF_INNER_OUTER_THEN]]:
-; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_THEN]]
 ; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
-; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER3]]
 ; GCN-NEXT: s_endpgm
 define amdgpu_kernel void @nested_if_else_if(i32 addrspace(1)* nocapture %arg) {
 bb:
@@ -183,6 +177,33 @@ bb.outer.end:
   ret void
 }
 
+; GCN-LABEL: {{^}}s_endpgm_unsafe_barrier:
+; GCN:      s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
+; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]]
+; GCN-NEXT: {{^BB[0-9_]+}}:
+; GCN:      store_dword
+; GCN-NEXT: {{^}}[[ENDIF]]:
+; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
+; GCN:      s_barrier
+; GCN-NEXT: s_endpgm
+define amdgpu_kernel void @s_endpgm_unsafe_barrier(i32 addrspace(1)* nocapture %arg) {
+bb:
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %tmp1 = icmp ugt i32 %tmp, 1
+  br i1 %tmp1, label %bb.then, label %bb.end
+
+bb.then:                                          ; preds = %bb
+  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
+  store i32 0, i32 addrspace(1)* %tmp4, align 4
+  br label %bb.end
+
+bb.end:                                           ; preds = %bb.then, %bb
+  call void @llvm.amdgcn.s.barrier()
+  ret void
+}
+
 declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare void @llvm.amdgcn.s.barrier() #1
 
 attributes #0 = { nounwind readnone speculatable }
+attributes #1 = { nounwind convergent }

Added: llvm/trunk/test/CodeGen/AMDGPU/endpgm-dce.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/endpgm-dce.mir?rev=310987&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/endpgm-dce.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/endpgm-dce.mir Tue Aug 15 21:43:49 2017
@@ -0,0 +1,297 @@
+# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-optimize-exec-masking-pre-ra %s -o - | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: name: kill_all
+# GCN:      bb.0:
+# GCN-NEXT: S_ENDPGM
+name: kill_all
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vreg_64 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: vgpr_32 }
+  - { id: 3, class: sgpr_32 }
+  - { id: 4, class: sgpr_32 }
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %0 = IMPLICIT_DEF
+    %3 = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
+    %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4)
+    %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit %exec
+    %4 = S_ADD_U32 %3, 1, implicit-def %scc
+    S_ENDPGM
+...
+---
+# GCN-LABEL: name: load_without_memoperand
+# GCN:      %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
+# GCN-NEXT: dead %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit %exec, implicit %flat_scr
+# GCN-NEXT: S_ENDPGM
+name: load_without_memoperand
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vreg_64 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: vgpr_32 }
+  - { id: 3, class: sgpr_32 }
+  - { id: 4, class: sgpr_32 }
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %0 = IMPLICIT_DEF
+    %3 = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
+    %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit %exec, implicit %flat_scr
+    %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit %exec
+    %4 = S_ADD_U32 %3, 1, implicit-def %scc
+    S_ENDPGM
+...
+---
+# GCN-LABEL: name: load_volatile
+# GCN:      %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
+# GCN-NEXT: dead %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile load 4)
+# GCN-NEXT: S_ENDPGM
+name: load_volatile
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vreg_64 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: vgpr_32 }
+  - { id: 3, class: sgpr_32 }
+  - { id: 4, class: sgpr_32 }
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %0 = IMPLICIT_DEF
+    %3 = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
+    %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile load 4)
+    %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit %exec
+    %4 = S_ADD_U32 %3, 1, implicit-def %scc
+    S_ENDPGM
+...
+---
+# GCN-LABEL: name: store
+# GCN:      %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
+# GCN-NEXT: FLAT_STORE_DWORD %0, %1, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4)
+# GCN-NEXT: S_ENDPGM
+name: store
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vreg_64 }
+  - { id: 1, class: vgpr_32 }
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %0 = IMPLICIT_DEF
+    %1 = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
+    FLAT_STORE_DWORD %0, %1, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4)
+    S_ENDPGM
+...
+---
+# GCN-LABEL: name: barrier
+# GCN:      %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
+# GCN-NEXT: S_BARRIER
+# GCN-NEXT: S_ENDPGM
+name: barrier
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
+    S_BARRIER
+    S_ENDPGM
+...
+---
+# GCN-LABEL: name: call
+# GCN:      %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
+# GCN-NEXT: %sgpr4_sgpr5 = S_SWAPPC_B64 %sgpr2_sgpr3
+# GCN-NEXT: S_ENDPGM
+name: call
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
+    %sgpr4_sgpr5 = S_SWAPPC_B64 %sgpr2_sgpr3
+    S_ENDPGM
+...
+---
+# GCN-LABEL: name: exp
+# GCN:      %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
+# GCN-NEXT: EXP 32, undef %0, undef %1, %2, undef %3, 0, 0, 15, implicit %exec
+# GCN-NEXT: S_ENDPGM
+name: exp
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: vgpr_32 }
+  - { id: 3, class: vgpr_32 }
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %2 = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
+    EXP 32, undef %0, undef %1, killed %2, undef %3, 0, 0, 15, implicit %exec
+    S_ENDPGM
+...
+---
+# GCN-LABEL: name: return_to_epilog
+# GCN:      %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
+# GCN-NEXT: SI_RETURN_TO_EPILOG killed %vgpr0
+name: return_to_epilog
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %vgpr0 = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
+    SI_RETURN_TO_EPILOG killed %vgpr0
+...
+---
+# GCN-LABEL: name: split_block
+# GCN:      bb.0:
+# GCN-NEXT:   successors: %bb.1
+# GCN-NOT:  S_OR_B64
+# GCN:      bb.1:
+# GCN-NEXT:   S_ENDPGM
+name: split_block
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: sgpr_32 }
+  - { id: 3, class: sgpr_32 }
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
+
+  bb.1:
+    %0 = IMPLICIT_DEF
+    %2 = IMPLICIT_DEF
+    %1 = V_ADD_F32_e64 0, killed %0, 0, 1, 0, 0, implicit %exec
+    %3 = S_ADD_U32 %2, 1, implicit-def %scc
+    S_ENDPGM
+...
+---
+# GCN-LABEL: name: split_block_empty_block
+# GCN:      bb.0:
+# GCN-NEXT:   successors: %bb.1
+# GCN-NOT:  S_OR_B64
+# GCN:      bb.1:
+# GCN:      bb.2:
+# GCN-NEXT:   S_ENDPGM
+name: split_block_empty_block
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
+
+  bb.1:
+
+  bb.2:
+    S_ENDPGM
+...
+---
+# GCN-LABEL: name: split_block_uncond_branch
+# GCN:      bb.0:
+# GCN-NEXT:   successors: %bb.1
+# GCN:        S_BRANCH %bb.1
+# GCN-NOT:  S_OR_B64
+# GCN:      bb.1:
+# GCN-NEXT:   S_ENDPGM
+name: split_block_uncond_branch
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
+    S_BRANCH %bb.1
+
+  bb.1:
+    S_ENDPGM
+...
+---
+# GCN-LABEL: name: split_block_cond_branch
+# GCN:      bb.0:
+# GCN-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+# GCN:        %sgpr0_sgpr1 = S_OR_B64 %exec, %vcc, implicit-def %scc
+# GCN:        S_CBRANCH_VCCNZ %bb.2, implicit undef %vcc
+# GCN:      bb.1:
+# GCN:      bb.2:
+# GCN-NEXT:   S_ENDPGM
+name: split_block_cond_branch
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_OR_B64 %exec, %vcc, implicit-def %scc
+    S_CBRANCH_VCCNZ %bb.2, implicit undef %vcc
+
+  bb.1:
+
+  bb.2:
+    S_ENDPGM
+...
+---
+# GCN-LABEL: name: two_preds_both_dead
+# GCN:      bb.0:
+# GCN-NEXT:   successors: %bb.2
+# GCN-NOT:    S_OR
+# GCN:        S_BRANCH %bb.2
+# GCN:      bb.1:
+# GCN-NEXT:   successors: %bb.2
+# GCN-NOT:    S_AND
+# GCN:        S_BRANCH %bb.2
+# GCN:      bb.2:
+# GCN-NEXT:   S_ENDPGM
+name: two_preds_both_dead
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
+    S_BRANCH %bb.2
+
+  bb.1:
+    %vcc = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_AND_B64 %exec, killed %vcc, implicit-def %scc
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM
+...
+---
+# GCN-LABEL: name: two_preds_one_dead
+# GCN:      bb.0:
+# GCN-NEXT:   successors: %bb.2
+# GCN:        %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
+# GCN-NEXT:   S_BARRIER
+# GCN-NEXT:   S_BRANCH %bb.2
+# GCN:      bb.1:
+# GCN-NEXT:   successors: %bb.2
+# GCN-NOT:    S_AND
+# GCN:        S_BRANCH %bb.2
+# GCN:      bb.2:
+# GCN-NEXT:   S_ENDPGM
+name: two_preds_one_dead
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %vcc = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
+    S_BARRIER
+    S_BRANCH %bb.2
+
+  bb.1:
+    %vcc = IMPLICIT_DEF
+    %sgpr0_sgpr1 = S_AND_B64 %exec, killed %vcc, implicit-def %scc
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM
+...

Modified: llvm/trunk/test/CodeGen/AMDGPU/loop_break.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/loop_break.ll?rev=310987&r1=310986&r2=310987&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/loop_break.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/loop_break.ll Tue Aug 15 21:43:49 2017
@@ -42,7 +42,6 @@
 ; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]]
 
 ; GCN: ; BB#4: ; %bb9
-; GCN-NEXT: s_or_b64 exec, exec, [[MASK]]
 ; GCN-NEXT: s_endpgm
 define amdgpu_kernel void @break_loop(i32 %arg) #0 {
 bb:

Modified: llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll?rev=310987&r1=310986&r2=310987&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll Tue Aug 15 21:43:49 2017
@@ -86,7 +86,6 @@
 ; GCN: buffer_store_dword
 
 ; GCN: ; %UnifiedReturnBlock
-; GCN-NEXT: s_or_b64 exec, exec
 ; GCN-NEXT: s_endpgm
 define amdgpu_kernel void @multi_divergent_region_exit_ret_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
 entry:

Modified: llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir?rev=310987&r1=310986&r2=310987&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir Tue Aug 15 21:43:49 2017
@@ -21,7 +21,6 @@ body:             |
     %2 = IMPLICIT_DEF
     %3 = V_CMP_GT_U32_e64 %0, %1, implicit %exec
     %4, %5 = V_SUBBREV_U32_e64 0, %0, %3, implicit %exec
-    S_ENDPGM
 
 ...
 
@@ -46,7 +45,6 @@ body:             |
     %2 = IMPLICIT_DEF
     %3 = V_CMP_GT_U32_e64 %0, %1, implicit %exec
     %4, %5 = V_SUBB_U32_e64 %0, 0, %3, implicit %exec
-    S_ENDPGM
 
 ...
 
@@ -71,7 +69,6 @@ body:             |
     %2 = IMPLICIT_DEF
     %3 = V_CMP_GT_U32_e64 %0, %1, implicit %exec
     %4, %5 = V_ADDC_U32_e64 0, %0, %3, implicit %exec
-    S_ENDPGM
 
 ...
 
@@ -96,6 +93,5 @@ body:             |
     %2 = IMPLICIT_DEF
     %3 = V_CMP_GT_U32_e64 %0, %1, implicit %exec
     %4, %5 = V_ADDC_U32_e64 %0, 0, %3, implicit %exec
-    S_ENDPGM
 
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll?rev=310987&r1=310986&r2=310987&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll Tue Aug 15 21:43:49 2017
@@ -42,7 +42,6 @@ bb5:
 ; GCN: s_and_saveexec_b64
 ; GCN: ; mask branch [[UNIFIED_RET:BB[0-9]+_[0-9]+]]
 ; GCN-NEXT: [[UNIFIED_RET]]:
-; GCN-NEXT: s_or_b64 exec, exec
 ; GCN-NEXT: s_endpgm
 ; GCN: .Lfunc_end
 define amdgpu_kernel void @annotate_ret_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 {

Modified: llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll?rev=310987&r1=310986&r2=310987&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll Tue Aug 15 21:43:49 2017
@@ -10,7 +10,6 @@
 ; GCN: ; divergent unreachable
 
 ; GCN-NEXT: [[RET]]: ; %UnifiedReturnBlock
-; GCN-NEXT: s_or_b64 exec, exec
 ; GCN: s_endpgm
 
 define amdgpu_kernel void @lower_control_flow_unreachable_terminator() #0 {
@@ -37,7 +36,6 @@ ret:
 ; GCN: ; divergent unreachable
 
 ; GCN: [[RETURN]]:
-; GCN-NEXT: s_or_b64 exec, exec
 ; GCN-NEXT: s_endpgm
 define amdgpu_kernel void @lower_control_flow_unreachable_terminator_swap_block_order() #0 {
 bb:

Modified: llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll?rev=310987&r1=310986&r2=310987&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll Tue Aug 15 21:43:49 2017
@@ -354,7 +354,6 @@ bb7:
 ; CHECK: buffer_store_dword
 
 ; CHECK: [[END]]:
-; CHECK: s_or_b64 exec, exec
 ; CHECK: s_endpgm
 define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, <4 x float> %arg2) #0 {
 bb:

Modified: llvm/trunk/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/spill-empty-live-interval.mir?rev=310987&r1=310986&r2=310987&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/spill-empty-live-interval.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/spill-empty-live-interval.mir Tue Aug 15 21:43:49 2017
@@ -35,7 +35,6 @@ body:             |
     S_NOP 0, implicit %3.sub1
     S_NOP 0, implicit %0.sub1
     S_NOP 0, implicit undef %0.sub0
-    S_ENDPGM
 
 ...
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll?rev=310987&r1=310986&r2=310987&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/uniform-cfg.ll Tue Aug 15 21:43:49 2017
@@ -330,12 +330,14 @@ endif:
 
 ; GCN-LABEL: {{^}}divergent_inside_uniform:
 ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
-; GCN: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
-; GCN: [[IF_LABEL]]:
+; GCN: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]]
 ; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
 ; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
+; GCN: ; mask branch [[ENDIF_LABEL]]
 ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
 ; GCN: buffer_store_dword [[ONE]]
+; GCN: [[ENDIF_LABEL]]:
+; GCN: s_endpgm
 define amdgpu_kernel void @divergent_inside_uniform(i32 addrspace(1)* %out, i32 %cond) {
 entry:
   %u_cmp = icmp eq i32 %cond, 0

Modified: llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll?rev=310987&r1=310986&r2=310987&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/valu-i1.ll Tue Aug 15 21:43:49 2017
@@ -71,7 +71,6 @@ end:
 ; SI: buffer_store_dword
 
 ; SI-NEXT: {{^}}[[EXIT]]:
-; SI: s_or_b64 exec, exec, [[BR_SREG]]
 ; SI: s_endpgm
 define amdgpu_kernel void @simple_test_v_if(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -98,7 +97,6 @@ exit:
 ; SI: buffer_store_dword
 
 ; SI-NEXT: {{^}}[[EXIT]]:
-; SI: s_or_b64 exec, exec, [[BR_SREG]]
 ; SI: s_endpgm
 define amdgpu_kernel void @simple_test_v_if_ret_else_ret(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -137,7 +135,6 @@ exit:
 ; SI-NEXT: buffer_store_dword
 
 ; SI-NEXT: {{^}}[[UNIFIED_RETURN]]: ; %UnifiedReturnBlock
-; SI: s_or_b64 exec, exec
 ; SI: s_endpgm
 define amdgpu_kernel void @simple_test_v_if_ret_else_code_ret(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -230,9 +227,6 @@ exit:
 ; SI-NEXT: s_andn2_b64 exec, exec, [[COND_STATE]]
 ; SI-NEXT: s_cbranch_execnz [[LABEL_LOOP]]
 
-; SI: BB#5
-; SI: s_or_b64 exec, exec, [[COND_STATE]]
-
 ; SI: [[LABEL_EXIT]]:
 ; SI-NOT: [[COND_STATE]]
 ; SI: s_endpgm

Modified: llvm/trunk/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll?rev=310987&r1=310986&r2=310987&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll Tue Aug 15 21:43:49 2017
@@ -8,15 +8,17 @@
 
 declare void @llvm.write_register.i32(metadata, i32) #0
 declare i32 @llvm.amdgcn.workitem.id.x() #0
-
+declare void @llvm.amdgcn.wave.barrier() #2
 
 define amdgpu_kernel void @write_vgpr_into_sgpr() {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   call void @llvm.write_register.i32(metadata !0, i32 %tid)
+  call void @llvm.amdgcn.wave.barrier() #2
   ret void
 }
 
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
+attributes #2 = { convergent nounwind }
 
 !0 = !{!"exec_lo"}

Modified: llvm/trunk/test/CodeGen/AMDGPU/write_register.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/write_register.ll?rev=310987&r1=310986&r2=310987&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/write_register.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/write_register.ll Tue Aug 15 21:43:49 2017
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -enable-misched=0 -verify-machineinstrs < %s | FileCheck %s
 
 declare void @llvm.write_register.i32(metadata, i32) #0
 declare void @llvm.write_register.i64(metadata, i64) #0
@@ -8,6 +8,7 @@ define amdgpu_kernel void @test_write_m0
   call void @llvm.write_register.i32(metadata !0, i32 0)
   call void @llvm.write_register.i32(metadata !0, i32 -1)
   call void @llvm.write_register.i32(metadata !0, i32 %val)
+  call void @llvm.amdgcn.wave.barrier() #1
   ret void
 }
 
@@ -19,6 +20,7 @@ define amdgpu_kernel void @test_write_ex
   call void @llvm.write_register.i64(metadata !1, i64 0)
   call void @llvm.write_register.i64(metadata !1, i64 -1)
   call void @llvm.write_register.i64(metadata !1, i64 %val)
+  call void @llvm.amdgcn.wave.barrier() #1
   ret void
 }
 
@@ -30,6 +32,7 @@ define amdgpu_kernel void @test_write_fl
   call void @llvm.write_register.i64(metadata !2, i64 0)
   call void @llvm.write_register.i64(metadata !2, i64 -1)
   call void @llvm.write_register.i64(metadata !2, i64 %val)
+  call void @llvm.amdgcn.wave.barrier() #1
   ret void
 }
 
@@ -39,6 +42,7 @@ define amdgpu_kernel void @test_write_fl
 define amdgpu_kernel void @test_write_flat_scratch_lo(i32 %val) #0 {
   call void @llvm.write_register.i32(metadata !3, i32 0)
   call void @llvm.write_register.i32(metadata !3, i32 %val)
+  call void @llvm.amdgcn.wave.barrier() #1
   ret void
 }
 
@@ -48,6 +52,7 @@ define amdgpu_kernel void @test_write_fl
 define amdgpu_kernel void @test_write_flat_scratch_hi(i32 %val) #0 {
   call void @llvm.write_register.i32(metadata !4, i32 0)
   call void @llvm.write_register.i32(metadata !4, i32 %val)
+  call void @llvm.amdgcn.wave.barrier() #1
   ret void
 }
 
@@ -57,6 +62,7 @@ define amdgpu_kernel void @test_write_fl
 define amdgpu_kernel void @test_write_exec_lo(i32 %val) #0 {
   call void @llvm.write_register.i32(metadata !5, i32 0)
   call void @llvm.write_register.i32(metadata !5, i32 %val)
+  call void @llvm.amdgcn.wave.barrier() #1
   ret void
 }
 
@@ -66,10 +72,14 @@ define amdgpu_kernel void @test_write_ex
 define amdgpu_kernel void @test_write_exec_hi(i32 %val) #0 {
   call void @llvm.write_register.i32(metadata !6, i32 0)
   call void @llvm.write_register.i32(metadata !6, i32 %val)
+  call void @llvm.amdgcn.wave.barrier() #1
   ret void
 }
 
+declare void @llvm.amdgcn.wave.barrier() #1
+
 attributes #0 = { nounwind }
+attributes #1 = { convergent nounwind }
 
 !0 = !{!"m0"}
 !1 = !{!"exec"}




More information about the llvm-commits mailing list