[llvm] 66fe602 - AMDGPU/GlobalISel: Fix masked control flow with fallthrough blocks

Fri May 22 07:44:50 PDT 2020

Author: Matt Arsenault
Date: 2020-05-22T10:31:44-04:00
New Revision: 66fe60220ca2b1932e06093294c72b246be54ec8

URL: https://github.com/llvm/llvm-project/commit/66fe60220ca2b1932e06093294c72b246be54ec8
DIFF: https://github.com/llvm/llvm-project/commit/66fe60220ca2b1932e06093294c72b246be54ec8.diff

LOG: AMDGPU/GlobalISel: Fix masked control flow with fallthrough blocks

Unlike SelectionDAGBuilder, IRTranslator omits the unconditional
branch in fallthrough cases. Confusingly, the control flow pseudos
function in the opposite way the intrinsics are used, and the branch
targets always need to be swapped. We're inverting the target blocks,
so we need to figure out the old fallthrough block and insert a branch
to the original unconditional branch target.

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if.xfail.mir

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 554075ac8ab4..63106df9e2a2 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2278,22 +2278,30 @@ bool AMDGPULegalizerInfo::legalizeBuildVector(
 // Return the use branch instruction, otherwise null if the usage is invalid.
 static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
                                        MachineRegisterInfo &MRI,
-                                       MachineInstr *&Br) {
+                                       MachineInstr *&Br,
+                                       MachineBasicBlock *&UncondBrTarget) {
   Register CondDef = MI.getOperand(0).getReg();
   if (!MRI.hasOneNonDBGUse(CondDef))
     return nullptr;
 
+  MachineBasicBlock *Parent = MI.getParent();
   MachineInstr &UseMI = *MRI.use_instr_nodbg_begin(CondDef);
-  if (UseMI.getParent() != MI.getParent() ||
+  if (UseMI.getParent() != Parent ||
       UseMI.getOpcode() != AMDGPU::G_BRCOND)
     return nullptr;
 
-  // Make sure the cond br is followed by a G_BR
+  // Make sure the cond br is followed by a G_BR, or is the last instruction.
   MachineBasicBlock::iterator Next = std::next(UseMI.getIterator());
-  if (Next != MI.getParent()->end()) {
+  if (Next == Parent->end()) {
+    MachineFunction::iterator NextMBB = std::next(Parent->getIterator());
+    if (NextMBB == Parent->getParent()->end()) // Illegal intrinsic use.
+      return nullptr;
+    UncondBrTarget = &*NextMBB;
+  } else {
     if (Next->getOpcode() != AMDGPU::G_BR)
       return nullptr;
     Br = &*Next;
+    UncondBrTarget = Br->getOperand(0).getMBB();
   }
 
   return &UseMI;
@@ -4110,7 +4118,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
   case Intrinsic::amdgcn_if:
   case Intrinsic::amdgcn_else: {
     MachineInstr *Br = nullptr;
-    if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br)) {
+    MachineBasicBlock *UncondBrTarget = nullptr;
+    if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget)) {
       const SIRegisterInfo *TRI
         = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
 
@@ -4118,25 +4127,28 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
       Register Def = MI.getOperand(1).getReg();
       Register Use = MI.getOperand(3).getReg();
 
-      MachineBasicBlock *BrTarget = BrCond->getOperand(1).getMBB();
-      if (Br)
-        BrTarget = Br->getOperand(0).getMBB();
-
+      MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB();
       if (IntrID == Intrinsic::amdgcn_if) {
         B.buildInstr(AMDGPU::SI_IF)
           .addDef(Def)
           .addUse(Use)
-          .addMBB(BrTarget);
+          .addMBB(UncondBrTarget);
       } else {
         B.buildInstr(AMDGPU::SI_ELSE)
           .addDef(Def)
           .addUse(Use)
-          .addMBB(BrTarget)
+          .addMBB(UncondBrTarget)
           .addImm(0);
       }
 
-      if (Br)
-        Br->getOperand(0).setMBB(BrCond->getOperand(1).getMBB());
+      if (Br) {
+        Br->getOperand(0).setMBB(CondBrTarget);
+      } else {
+        // The IRTranslator skips inserting the G_BR for fallthrough cases, but
+        // since we're swapping branch targets it needs to be reinserted.
+        // FIXME: IRTranslator should probably not do this
+        B.buildBr(*CondBrTarget);
+      }
 
       MRI.setRegClass(Def, TRI->getWaveMaskRegClass());
       MRI.setRegClass(Use, TRI->getWaveMaskRegClass());
@@ -4149,23 +4161,23 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
   }
   case Intrinsic::amdgcn_loop: {
     MachineInstr *Br = nullptr;
-    if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br)) {
+    MachineBasicBlock *UncondBrTarget = nullptr;
+    if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget)) {
       const SIRegisterInfo *TRI
         = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
 
       B.setInstr(*BrCond);
 
-      MachineBasicBlock *BrTarget = BrCond->getOperand(1).getMBB();
-      if (Br)
-        BrTarget = Br->getOperand(0).getMBB();
-
+      MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB();
       Register Reg = MI.getOperand(2).getReg();
       B.buildInstr(AMDGPU::SI_LOOP)
         .addUse(Reg)
-        .addMBB(BrTarget);
+        .addMBB(UncondBrTarget);
 
       if (Br)
-        Br->getOperand(0).setMBB(BrCond->getOperand(1).getMBB());
+        Br->getOperand(0).setMBB(CondBrTarget);
+      else
+        B.buildBr(*CondBrTarget);
 
       MI.eraseFromParent();
       BrCond->eraseFromParent();

diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e3197e5ca3d3..3ddf4ae70397 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4714,6 +4714,7 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
   } else {
     // Get the target from BR if we don't negate the condition
     BR = findUser(BRCOND, ISD::BR);
+    assert(BR && "brcond missing unconditional branch user");
     Target = BR->getOperand(1);
   }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
index 0be830b3965a..bd313de000df 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
@@ -37,7 +37,7 @@ define i32 @divergent_if_swap_brtarget_order1(i32 %value) {
 ; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
 ; CHECK-NEXT:    ; implicit-def: $vgpr0
 ; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; CHECK-NEXT:    s_cbranch_execnz BB1_2
+; CHECK-NEXT:    s_cbranch_execz BB1_2
 ; CHECK-NEXT:  ; %bb.1: ; %if.true
 ; CHECK-NEXT:    global_load_dword v0, v[0:1], off
 ; CHECK-NEXT:  BB1_2: ; %endif

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if.xfail.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if.xfail.mir
new file mode 100644
index 000000000000..9716bb31db3f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if.xfail.mir
@@ -0,0 +1,21 @@
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s
+
+# Make sure there's no crash if there is somehow no successor block.
+
+# ERR: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_no_succ_block)
+
+---
+name: brcond_si_if_no_succ_block
+body:             |
+  bb.0:
+    S_NOP 0
+
+  bb.1:
+    successors: %bb.1
+    liveins: $vgpr0, $vgpr1
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s1) = G_ICMP intpred(ne), %0, %1
+    %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
+    G_BRCOND %3, %bb.1
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir
index 7a32dad92168..52d44d2d08fb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir
@@ -108,6 +108,7 @@ body:             |
   ; WAVE64:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; WAVE64:   [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
   ; WAVE64:   [[SI_IF:%[0-9]+]]:sreg_64_xexec(s64) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; WAVE64:   G_BR %bb.1
   ; WAVE64: bb.1:
   ; WAVE32-LABEL: name: brcond_si_if
   ; WAVE32: bb.0:
@@ -116,6 +117,7 @@ body:             |
   ; WAVE32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; WAVE32:   [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
   ; WAVE32:   [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; WAVE32:   G_BR %bb.1
   ; WAVE32: bb.1:
   bb.0:
     successors: %bb.1
@@ -139,6 +141,7 @@ body:             |
   ; WAVE64:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; WAVE64:   [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
   ; WAVE64:   [[SI_ELSE:%[0-9]+]]:sreg_64_xexec(s64) = SI_ELSE [[ICMP]](s1), %bb.1, 0, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; WAVE64:   G_BR %bb.1
   ; WAVE64: bb.1:
   ; WAVE32-LABEL: name: brcond_si_else
   ; WAVE32: bb.0:
@@ -147,6 +150,7 @@ body:             |
   ; WAVE32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; WAVE32:   [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
   ; WAVE32:   [[SI_ELSE:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_ELSE [[ICMP]](s1), %bb.1, 0, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; WAVE32:   G_BR %bb.1
   ; WAVE32: bb.1:
   bb.0:
     successors: %bb.1
@@ -161,32 +165,148 @@ body:             |
 ...
 
 ---
-name: brcond_si_loop
+name: brcond_si_loop_brcond
+tracksRegLiveness: true
 body:             |
-  ; WAVE64-LABEL: name: brcond_si_loop
+  ; WAVE64-LABEL: name: brcond_si_loop_brcond
   ; WAVE64: bb.0:
   ; WAVE64:   successors: %bb.1(0x80000000)
+  ; WAVE64:   liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
   ; WAVE64:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; WAVE64:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; WAVE64:   [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1
-  ; WAVE64:   SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
   ; WAVE64: bb.1:
-  ; WAVE32-LABEL: name: brcond_si_loop
+  ; WAVE64:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; WAVE64:   S_NOP 0
+  ; WAVE64:   SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; WAVE64:   G_BR %bb.2
+  ; WAVE64: bb.2:
+  ; WAVE64:   S_NOP 0
+  ; WAVE32-LABEL: name: brcond_si_loop_brcond
   ; WAVE32: bb.0:
   ; WAVE32:   successors: %bb.1(0x80000000)
+  ; WAVE32:   liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
   ; WAVE32:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; WAVE32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; WAVE32:   [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1
+  ; WAVE32: bb.1:
+  ; WAVE32:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; WAVE32:   S_NOP 0
   ; WAVE32:   SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; WAVE32:   G_BR %bb.2
+  ; WAVE32: bb.2:
+  ; WAVE32:   S_NOP 0
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s64) = COPY $sgpr0_sgpr1
+
+  bb.1:
+    successors: %bb.1, %bb.2
+    S_NOP 0
+    %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
+    G_BRCOND %3, %bb.2
+    G_BR %bb.1
+
+  bb.2:
+    S_NOP 0
+...
+
+# This usage is backwards from how the intrinsic is supposed to be
+# used.
+---
+name: brcond_si_loop_brcond_back
+tracksRegLiveness: true
+body:             |
+  ; WAVE64-LABEL: name: brcond_si_loop_brcond_back
+  ; WAVE64: bb.0:
+  ; WAVE64:   successors: %bb.1(0x80000000)
+  ; WAVE64:   liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
+  ; WAVE64:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; WAVE64:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; WAVE64:   [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1
+  ; WAVE64: bb.1:
+  ; WAVE64:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; WAVE64:   S_NOP 0
+  ; WAVE64:   SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; WAVE64:   G_BR %bb.1
+  ; WAVE64: bb.2:
+  ; WAVE64:   S_NOP 0
+  ; WAVE32-LABEL: name: brcond_si_loop_brcond_back
+  ; WAVE32: bb.0:
+  ; WAVE32:   successors: %bb.1(0x80000000)
+  ; WAVE32:   liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
+  ; WAVE32:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; WAVE32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; WAVE32:   [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1
   ; WAVE32: bb.1:
+  ; WAVE32:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; WAVE32:   S_NOP 0
+  ; WAVE32:   SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; WAVE32:   G_BR %bb.1
+  ; WAVE32: bb.2:
+  ; WAVE32:   S_NOP 0
   bb.0:
-    successors: %bb.1
     liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s64) = COPY $sgpr0_sgpr1
+
+  bb.1:
+    successors: %bb.1, %bb.2
+    S_NOP 0
     %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
     G_BRCOND %3, %bb.1
+    G_BR %bb.2
+
+  bb.2:
+    S_NOP 0
+...
+
+# This usage is backwards from how the intrinsic is supposed to be
+# used.
+---
+name: brcond_si_loop_brcond_back_fallthrough
+tracksRegLiveness: true
+body:             |
+  ; WAVE64-LABEL: name: brcond_si_loop_brcond_back_fallthrough
+  ; WAVE64: bb.0:
+  ; WAVE64:   successors: %bb.1(0x80000000)
+  ; WAVE64:   liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
+  ; WAVE64:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; WAVE64:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; WAVE64:   [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1
+  ; WAVE64: bb.1:
+  ; WAVE64:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; WAVE64:   S_NOP 0
+  ; WAVE64:   SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; WAVE64:   G_BR %bb.1
+  ; WAVE64: bb.2:
+  ; WAVE32-LABEL: name: brcond_si_loop_brcond_back_fallthrough
+  ; WAVE32: bb.0:
+  ; WAVE32:   successors: %bb.1(0x80000000)
+  ; WAVE32:   liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
+  ; WAVE32:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; WAVE32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; WAVE32:   [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1
+  ; WAVE32: bb.1:
+  ; WAVE32:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; WAVE32:   S_NOP 0
+  ; WAVE32:   SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; WAVE32:   G_BR %bb.1
+  ; WAVE32: bb.2:
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s64) = COPY $sgpr0_sgpr1
 
   bb.1:
+    successors: %bb.1, %bb.2
+    S_NOP 0
+    %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
+    G_BRCOND %3, %bb.1
+
+  bb.2:
 ...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
index 19471deb887f..d2e06fb44989 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
@@ -164,7 +164,7 @@ define void @localize_internal_globals(i1 %cond) {
 ; GFX9-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
 ; GFX9-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; GFX9-NEXT:    s_xor_b64 s[4:5], exec, s[6:7]
-; GFX9-NEXT:    s_cbranch_execnz BB2_2
+; GFX9-NEXT:    s_cbranch_execz BB2_2
 ; GFX9-NEXT:  ; %bb.1: ; %bb1
 ; GFX9-NEXT:    s_getpc_b64 s[6:7]
 ; GFX9-NEXT:    s_add_u32 s6, s6, static.gv2 at rel32@lo+4

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
index 5e619cb4f47f..656aa864d4c7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
@@ -16,7 +16,7 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) {
 ; CHECK-NEXT:    ; implicit-def: $vgpr4_vgpr5
 ; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CHECK-NEXT:    s_cbranch_execnz BB0_2
+; CHECK-NEXT:    s_cbranch_execz BB0_2
 ; CHECK-NEXT:  ; %bb.1:
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, v2
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v5, v3
@@ -646,7 +646,7 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CGP-NEXT:    s_cbranch_execnz BB2_2
+; CGP-NEXT:    s_cbranch_execz BB2_2
 ; CGP-NEXT:  ; %bb.1:
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v4
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v5
@@ -814,7 +814,7 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    ; implicit-def: $vgpr4_vgpr5
 ; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CGP-NEXT:    s_cbranch_execnz BB2_6
+; CGP-NEXT:    s_cbranch_execz BB2_6
 ; CGP-NEXT:  ; %bb.5:
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v6
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v7
@@ -2318,7 +2318,7 @@ define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
 ; CHECK-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CHECK-NEXT:    s_cbranch_execnz BB7_2
+; CHECK-NEXT:    s_cbranch_execz BB7_2
 ; CHECK-NEXT:  ; %bb.1:
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, v4
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v5
@@ -2767,7 +2767,7 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CGP-NEXT:    s_cbranch_execnz BB8_2
+; CGP-NEXT:    s_cbranch_execz BB8_2
 ; CGP-NEXT:  ; %bb.1:
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v10
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v11
@@ -2935,7 +2935,7 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    ; implicit-def: $vgpr4_vgpr5
 ; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CGP-NEXT:    s_cbranch_execnz BB8_6
+; CGP-NEXT:    s_cbranch_execz BB8_6
 ; CGP-NEXT:  ; %bb.5:
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v8
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v9

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index 4253067492eb..928c59239957 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -16,7 +16,7 @@ define i64 @v_urem_i64(i64 %num, i64 %den) {
 ; CHECK-NEXT:    ; implicit-def: $vgpr4_vgpr5
 ; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CHECK-NEXT:    s_cbranch_execnz BB0_2
+; CHECK-NEXT:    s_cbranch_execz BB0_2
 ; CHECK-NEXT:  ; %bb.1:
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, v2
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v5, v3
@@ -642,7 +642,7 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CGP-NEXT:    s_cbranch_execnz BB2_2
+; CGP-NEXT:    s_cbranch_execz BB2_2
 ; CGP-NEXT:  ; %bb.1:
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v4
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v5
@@ -809,7 +809,7 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    ; implicit-def: $vgpr4_vgpr5
 ; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CGP-NEXT:    s_cbranch_execnz BB2_6
+; CGP-NEXT:    s_cbranch_execz BB2_6
 ; CGP-NEXT:  ; %bb.5:
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v6
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v7
@@ -2292,7 +2292,7 @@ define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) {
 ; CHECK-NEXT:    ; implicit-def: $vgpr2_vgpr3
 ; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CHECK-NEXT:    s_cbranch_execnz BB7_2
+; CHECK-NEXT:    s_cbranch_execz BB7_2
 ; CHECK-NEXT:  ; %bb.1:
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, v4
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v5
@@ -2738,7 +2738,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CGP-NEXT:    s_cbranch_execnz BB8_2
+; CGP-NEXT:    s_cbranch_execz BB8_2
 ; CGP-NEXT:  ; %bb.1:
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v10
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v11
@@ -2905,7 +2905,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    ; implicit-def: $vgpr4_vgpr5
 ; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CGP-NEXT:    s_cbranch_execnz BB8_6
+; CGP-NEXT:    s_cbranch_execz BB8_6
 ; CGP-NEXT:  ; %bb.5:
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v8
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v9