[llvm] [AMDGPU] Simplify cond branch if condition is known (PR #180081)

via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 16 10:35:05 PST 2026


https://github.com/LU-JOHN updated https://github.com/llvm/llvm-project/pull/180081

>From 93a2356420a3feb0e1ef15a513b7921ecde6a6b8 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Thu, 5 Feb 2026 17:43:25 -0600
Subject: [PATCH 1/2] Simplify cond branch if condition is known

Signed-off-by: John Lu <John.Lu at amd.com>
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        | 118 +++
 llvm/lib/Target/AMDGPU/SIInstrInfo.h          |   2 +
 .../CodeGen/AMDGPU/copy-to-reg-frameindex.ll  |   9 +-
 .../CodeGen/AMDGPU/optimize-condbranch.mir    | 776 ++++++++++++++++++
 4 files changed, 897 insertions(+), 8 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/optimize-condbranch.mir

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 9e3b683d10c45..3df8d1c897b09 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -11236,6 +11236,124 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
   return false;
 }
 
+static bool evaluateCompare(const MachineInstr *CmpInstr, bool &SCCIsSet) {
+  const MachineRegisterInfo &MRI =
+      CmpInstr->getParent()->getParent()->getRegInfo();
+  const MachineOperand &Op0 = CmpInstr->getOperand(0);
+  const MachineOperand &Op1 = CmpInstr->getOperand(1);
+  int64_t Cmp0Value, Cmp1Value;
+
+  if (!Op0.isReg() || Op0.getSubReg() ||
+      !getFoldableImm(Op0.getReg(), MRI, Cmp0Value))
+    return false;
+  if (Op1.isImm())
+    Cmp1Value = Op1.getImm();
+  else if (!Op1.isReg() || Op1.getSubReg() ||
+           !getFoldableImm(Op1.getReg(), MRI, Cmp1Value))
+    return false;
+
+  switch (CmpInstr->getOpcode()) {
+  default:
+    return false;
+  case AMDGPU::S_CMP_EQ_U32:
+  case AMDGPU::S_CMP_EQ_I32:
+  case AMDGPU::S_CMP_EQ_U64:
+    SCCIsSet = Cmp0Value == Cmp1Value;
+    break;
+  case AMDGPU::S_CMP_LG_U32:
+  case AMDGPU::S_CMP_LG_I32:
+  case AMDGPU::S_CMP_LG_U64:
+    SCCIsSet = Cmp0Value != Cmp1Value;
+    break;
+  case AMDGPU::S_CMP_LT_U32:
+    SCCIsSet =
+        static_cast<uint64_t>(Cmp0Value) < static_cast<uint64_t>(Cmp1Value);
+    break;
+  case AMDGPU::S_CMP_LE_U32:
+    SCCIsSet =
+        static_cast<uint64_t>(Cmp0Value) <= static_cast<uint64_t>(Cmp1Value);
+    break;
+  case AMDGPU::S_CMP_GT_U32:
+    SCCIsSet =
+        static_cast<uint64_t>(Cmp0Value) > static_cast<uint64_t>(Cmp1Value);
+    break;
+  case AMDGPU::S_CMP_GE_U32:
+    SCCIsSet =
+        static_cast<uint64_t>(Cmp0Value) >= static_cast<uint64_t>(Cmp1Value);
+    break;
+  case AMDGPU::S_CMP_LT_I32:
+    SCCIsSet = Cmp0Value < Cmp1Value;
+    break;
+  case AMDGPU::S_CMP_LE_I32:
+    SCCIsSet = Cmp0Value <= Cmp1Value;
+    break;
+  case AMDGPU::S_CMP_GT_I32:
+    SCCIsSet = Cmp0Value > Cmp1Value;
+    break;
+  case AMDGPU::S_CMP_GE_I32:
+    SCCIsSet = Cmp0Value >= Cmp1Value;
+    break;
+  }
+  return true;
+}
+
+bool SIInstrInfo::optimizeCondBranch(MachineInstr &CBI) const {
+  if (CBI.getOpcode() != AMDGPU::S_CBRANCH_SCC0 &&
+      CBI.getOpcode() != AMDGPU::S_CBRANCH_SCC1)
+    return false;
+
+  // Search backward for compare.
+  MachineBasicBlock *Parent = CBI.getParent();
+  MachineInstr *CompareInst = nullptr;
+  constexpr unsigned ScanLimit = 8;
+  unsigned Count = 0;
+  for (MachineInstr &MI :
+       make_range(std::next(MachineBasicBlock::reverse_iterator(CBI)),
+                  Parent->rend())) {
+    if (MI.isCompare()) {
+      CompareInst = &MI;
+      break;
+    }
+    if (++Count > ScanLimit || MI.definesRegister(AMDGPU::SCC, &RI))
+      return false;
+  }
+
+  // If compare can be evaluated simplify the conditional branch.
+  bool SCCIsSet;
+  if (!CompareInst || !evaluateCompare(CompareInst, SCCIsSet))
+    return false;
+
+  MachineInstr &LastInst = Parent->back();
+  MachineBasicBlock *TargetBB = CBI.getOperand(0).getMBB();
+  if (SCCIsSet == (CBI.getOpcode() == AMDGPU::S_CBRANCH_SCC1)) {
+    // S_CBRANCH_SCC? will always branch to target.
+
+    // Only handle the case where the block ends with:
+    //
+    //   S_CBRANCH_SCC? <target>
+    //   S_BRANCH       <alt>
+    //
+    // Convert this to:
+    //
+    //   <erased>
+    //   S_BRANCH       <target>
+    //
+    // This is by far the most common case and is easy to convert.
+    if (std::next(CBI.getIterator()) != LastInst.getIterator() ||
+        LastInst.getOpcode() != AMDGPU::S_BRANCH)
+      return false;
+    MachineBasicBlock *AltBB = LastInst.getOperand(0).getMBB();
+    Parent->removeSuccessor(AltBB);
+    LastInst.getOperand(0).setMBB(TargetBB);
+  } else {
+    // S_CBRANCH_SCC? will never branch to target.
+    // Instruction will be erased.
+    Parent->removeSuccessor(TargetBB);
+  }
+  CBI.eraseFromParent();
+  return true;
+}
+
 void SIInstrInfo::enforceOperandRCAlignment(MachineInstr &MI,
                                             AMDGPU::OpName OpName) const {
   if (!ST.needsAlignedVGPRs())
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 05cf804d08ffc..27096c5ed4ed1 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -424,6 +424,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
                             Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
                             const MachineRegisterInfo *MRI) const override;
 
+  bool optimizeCondBranch(MachineInstr &CmpInstr) const override;
+
   bool
   areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
                                   const MachineInstr &MIb) const override;
diff --git a/llvm/test/CodeGen/AMDGPU/copy-to-reg-frameindex.ll b/llvm/test/CodeGen/AMDGPU/copy-to-reg-frameindex.ll
index a13f3513c660e..e0a3bff5e8d2a 100644
--- a/llvm/test/CodeGen/AMDGPU/copy-to-reg-frameindex.ll
+++ b/llvm/test/CodeGen/AMDGPU/copy-to-reg-frameindex.ll
@@ -4,20 +4,13 @@
 define amdgpu_kernel void @copy_to_reg_frameindex(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: copy_to_reg_frameindex:
 ; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    s_cmp_lt_u32 0, 16
 ; CHECK-NEXT:    ; implicit-def: $vgpr0
 ; CHECK-NEXT:  .LBB0_1: ; %loop
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    s_set_gpr_idx_on 0, gpr_idx(DST)
 ; CHECK-NEXT:    v_mov_b32_e32 v0, 0
 ; CHECK-NEXT:    s_set_gpr_idx_off
-; CHECK-NEXT:    s_cbranch_scc1 .LBB0_1
-; CHECK-NEXT:  ; %bb.2: ; %done
-; CHECK-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; CHECK-NEXT:    v_mov_b32_e32 v1, 0
-; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    global_store_dword v1, v0, s[0:1]
-; CHECK-NEXT:    s_endpgm
+; CHECK-NEXT:    s_branch .LBB0_1
 entry:
   %B = srem i32 %c, -1
   br label %loop
diff --git a/llvm/test/CodeGen/AMDGPU/optimize-condbranch.mir b/llvm/test/CodeGen/AMDGPU/optimize-condbranch.mir
new file mode 100644
index 0000000000000..d24ca057f2ad1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/optimize-condbranch.mir
@@ -0,0 +1,776 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=peephole-opt --verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
+# Simplify S_CBRANCH_SCC? if comparison can be evaluated.
+
+####################################################################################
+# Test all comparison opcodes that can be evaluated if operands have a known value.
+####################################################################################
+---
+name:            CMP_EQ_U32_1_0
+body:             |
+  ; GCN-LABEL: name: CMP_EQ_U32_1_0
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GCN-NEXT:   S_CMP_EQ_U32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %0:sreg_32 = S_MOV_B32 1
+    S_CMP_EQ_U32 killed %0, 0, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name:            CMP_EQ_I32_1_0
+body:             |
+  ; GCN-LABEL: name: CMP_EQ_I32_1_0
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GCN-NEXT:   S_CMP_EQ_I32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %0:sreg_32 = S_MOV_B32 1
+    S_CMP_EQ_I32 killed %0, 0, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name:            CMP_LG_U32_1_0
+body:             |
+  ; GCN-LABEL: name: CMP_LG_U32_1_0
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GCN-NEXT:   S_CMP_LG_U32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %0:sreg_32 = S_MOV_B32 1
+    S_CMP_LG_U32 killed %0, 0, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name:            CMP_LG_I32_1_0
+body:             |
+  ; GCN-LABEL: name: CMP_LG_I32_1_0
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GCN-NEXT:   S_CMP_LG_I32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %0:sreg_32 = S_MOV_B32 1
+    S_CMP_LG_I32 killed %0, 0, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name:            CMP_LT_U32_1_0
+body:             |
+  ; GCN-LABEL: name: CMP_LT_U32_1_0
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GCN-NEXT:   S_CMP_LT_U32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %0:sreg_32 = S_MOV_B32 1
+    S_CMP_LT_U32 killed %0, 0, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name:            CMP_LE_U32_1_0
+body:             |
+  ; GCN-LABEL: name: CMP_LE_U32_1_0
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GCN-NEXT:   S_CMP_LE_U32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %0:sreg_32 = S_MOV_B32 1
+    S_CMP_LE_U32 killed %0, 0, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name:            CMP_GT_U32_1_0
+body:             |
+  ; GCN-LABEL: name: CMP_GT_U32_1_0
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GCN-NEXT:   S_CMP_GT_U32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %0:sreg_32 = S_MOV_B32 1
+    S_CMP_GT_U32 killed %0, 0, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name:            CMP_GE_U32_1_0
+body:             |
+  ; GCN-LABEL: name: CMP_GE_U32_1_0
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GCN-NEXT:   S_CMP_GE_U32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %0:sreg_32 = S_MOV_B32 1
+    S_CMP_GE_U32 killed %0, 0, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name:            CMP_LT_I32_1_0
+body:             |
+  ; GCN-LABEL: name: CMP_LT_I32_1_0
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GCN-NEXT:   S_CMP_LT_I32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %0:sreg_32 = S_MOV_B32 1
+    S_CMP_LT_I32 killed %0, 0, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name:            CMP_LE_I32_1_0
+body:             |
+  ; GCN-LABEL: name: CMP_LE_I32_1_0
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GCN-NEXT:   S_CMP_LE_I32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %0:sreg_32 = S_MOV_B32 1
+    S_CMP_LE_I32 killed %0, 0, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name:            CMP_GT_I32_1_0
+body:             |
+  ; GCN-LABEL: name: CMP_GT_I32_1_0
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GCN-NEXT:   S_CMP_GT_I32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %0:sreg_32 = S_MOV_B32 1
+    S_CMP_GT_I32 killed %0, 0, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name:            CMP_GE_I32_1_0
+body:             |
+  ; GCN-LABEL: name: CMP_GE_I32_1_0
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GCN-NEXT:   S_CMP_GE_I32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %0:sreg_32 = S_MOV_B32 1
+    S_CMP_GE_I32 killed %0, 0, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name:            CMP_EQ_U64_1_0
+body:             |
+  ; GCN-LABEL: name: CMP_EQ_U64_1_0
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1
+  ; GCN-NEXT:   S_CMP_EQ_U64 killed [[S_MOV_B64_]], 0, implicit-def $scc
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %0:sreg_64 = S_MOV_B64 1
+    S_CMP_EQ_U64 killed %0, 0, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name:            CMP_LG_U64_1_0
+body:             |
+  ; GCN-LABEL: name: CMP_LG_U64_1_0
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1
+  ; GCN-NEXT:   S_CMP_LG_U64 killed [[S_MOV_B64_]], 0, implicit-def $scc
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %0:sreg_64 = S_MOV_B64 1
+    S_CMP_LG_U64 killed %0, 0, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+####################################################################################
+# Test when second operand has a constant value in a register.
+####################################################################################
+---
+name:            CMP_EQ_U32_1_0_reg
+body:             |
+  ; GCN-LABEL: name: CMP_EQ_U32_1_0_reg
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GCN-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; GCN-NEXT:   S_CMP_EQ_U32 killed [[S_MOV_B32_]], killed [[S_MOV_B32_1]], implicit-def $scc
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %0:sreg_32 = S_MOV_B32 1
+    %1:sreg_32 = S_MOV_B32 0
+    S_CMP_EQ_U32 killed %0, killed %1, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name:            CMP_LG_I32_1_0_reg
+body:             |
+  ; GCN-LABEL: name: CMP_LG_I32_1_0_reg
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GCN-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; GCN-NEXT:   S_CMP_LG_I32 killed [[S_MOV_B32_]], killed [[S_MOV_B32_1]], implicit-def $scc
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %0:sreg_32 = S_MOV_B32 1
+    %1:sreg_32 = S_MOV_B32 0
+    S_CMP_LG_I32 killed %0, killed %1, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+####################################################################################
+# Negative test.  Register has unknown value.
+####################################################################################
+---
+name:            NegativeTest_CMP_LT_I32_X_0
+body:             |
+  ; GCN-LABEL: name: NegativeTest_CMP_LT_I32_X_0
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN-NEXT:   liveins: $sgpr0
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GCN-NEXT:   S_CMP_LT_I32 killed [[COPY]], 0, implicit-def $scc
+  ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: $sgpr0
+
+    %0:sreg_32 = COPY $sgpr0
+    S_CMP_LT_I32 killed %0, 0, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name:            NegativeTest_CMP_LE_I32_X_0
+body:             |
+  ; GCN-LABEL: name: NegativeTest_CMP_LE_I32_X_0
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN-NEXT:   liveins: $sgpr0
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+  ; GCN-NEXT:   S_CMP_LE_I32 killed [[COPY]], 0, implicit-def $scc
+  ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: $sgpr0
+
+    %0:sreg_32 = COPY $sgpr0
+    S_CMP_LE_I32 killed %0, 0, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+####################################################################################
+# Ensure signed/unsigned comparisons are evaluated correctly.
+####################################################################################
+---
+name:            CMP_GT_I32_n1_0_reg
+body:             |
+  ; GCN-LABEL: name: CMP_GT_I32_n1_0_reg
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+  ; GCN-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; GCN-NEXT:   S_CMP_GT_I32 killed [[S_MOV_B32_]], killed [[S_MOV_B32_1]], implicit-def $scc
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %0:sreg_32 = S_MOV_B32 -1
+    %1:sreg_32 = S_MOV_B32 0
+    S_CMP_GT_I32 killed %0, killed %1, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name:            CMP_GT_U32_n1_0_reg
+body:             |
+  ; GCN-LABEL: name: CMP_GT_U32_n1_0_reg
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+  ; GCN-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; GCN-NEXT:   S_CMP_GT_U32 killed [[S_MOV_B32_]], killed [[S_MOV_B32_1]], implicit-def $scc
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %0:sreg_32 = S_MOV_B32 -1
+    %1:sreg_32 = S_MOV_B32 0
+    S_CMP_GT_U32 killed %0, killed %1, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+####################################################################################
+# Test S_CBRANCH_SCC0
+####################################################################################
+---
+name:            SCC0_AlwaysTaken
+body:             |
+  ; GCN-LABEL: name: SCC0_AlwaysTaken
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1
+  ; GCN-NEXT:   S_CMP_EQ_U64 killed [[S_MOV_B64_]], 0, implicit-def $scc
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %0:sreg_64 = S_MOV_B64 1
+    S_CMP_EQ_U64 killed %0, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name:            SCC0_NeverTaken
+body:             |
+  ; GCN-LABEL: name: SCC0_NeverTaken
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1
+  ; GCN-NEXT:   S_CMP_LG_U64 killed [[S_MOV_B64_]], 0, implicit-def $scc
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %0:sreg_64 = S_MOV_B64 1
+    S_CMP_LG_U64 killed %0, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+####################################################################################
+# Negative test.  Scan limit for compare exceeded.
+####################################################################################
+---
+name:            NegativeTest_ScanLimit
+body:             |
+  ; GCN-LABEL: name: NegativeTest_ScanLimit
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+  ; GCN-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; GCN-NEXT:   S_CMP_GT_I32 killed [[S_MOV_B32_]], killed [[S_MOV_B32_1]], implicit-def $scc
+  ; GCN-NEXT:   S_NOP 0
+  ; GCN-NEXT:   S_NOP 0
+  ; GCN-NEXT:   S_NOP 0
+  ; GCN-NEXT:   S_NOP 0
+  ; GCN-NEXT:   S_NOP 0
+  ; GCN-NEXT:   S_NOP 0
+  ; GCN-NEXT:   S_NOP 0
+  ; GCN-NEXT:   S_NOP 0
+  ; GCN-NEXT:   S_NOP 0
+  ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    %0:sreg_32 = S_MOV_B32 -1
+    %1:sreg_32 = S_MOV_B32 0
+    S_CMP_GT_I32 killed %0, killed %1, implicit-def $scc
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+    S_BRANCH %bb.1
+  bb.1:
+    successors: %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+
+####################################################################################
+# Test blocks that do not end with S_CBRANCH_SCC?/S_BRANCH.
+####################################################################################
+
+# Test can be optimized because S_CBRANCH_SCC? can be removed.
+---
+name:            Test_BranchNeverTaken
+body:             |
+  bb.0:
+    successors: %bb.0
+
+    ; GCN-LABEL: name: Test_BranchNeverTaken
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+    ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; GCN-NEXT: S_CMP_GT_I32 killed [[S_MOV_B32_]], killed [[S_MOV_B32_1]], implicit-def $scc
+    ; GCN-NEXT: S_ENDPGM 0
+    %0:sreg_32 = S_MOV_B32 -1
+    %1:sreg_32 = S_MOV_B32 0
+    S_CMP_GT_I32 killed %0, killed %1, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.0, implicit $scc
+    S_ENDPGM 0
+...
+
+# Test is not optimized because this case is rare and
+# not worth the added complexity.
+---
+name:            NegativeTest_BranchAlwaysTaken
+body:             |
+  bb.0:
+    successors: %bb.0
+
+    ; GCN-LABEL: name: NegativeTest_BranchAlwaysTaken
+    ; GCN: successors: %bb.0(0x80000000)
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+    ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; GCN-NEXT: S_CMP_GT_I32 killed [[S_MOV_B32_]], killed [[S_MOV_B32_1]], implicit-def $scc
+    ; GCN-NEXT: S_CBRANCH_SCC1 %bb.0, implicit $scc
+    ; GCN-NEXT: S_ENDPGM 0
+    %0:sreg_32 = S_MOV_B32 1
+    %1:sreg_32 = S_MOV_B32 0
+    S_CMP_GT_I32 killed %0, killed %1, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.0, implicit $scc
+    S_ENDPGM 0
+...

>From f8f0c40770f5c338d0283c0afbac0579915d49f2 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Thu, 12 Feb 2026 09:57:46 -0600
Subject: [PATCH 2/2] Add memcpy testcase that creates a constant compare.

Signed-off-by: John Lu <John.Lu at amd.com>
---
 llvm/test/CodeGen/AMDGPU/const_compare.ll | 61 +++++++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/const_compare.ll

diff --git a/llvm/test/CodeGen/AMDGPU/const_compare.ll b/llvm/test/CodeGen/AMDGPU/const_compare.ll
new file mode 100644
index 0000000000000..75a4384973600
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/const_compare.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck %s
+
+define amdgpu_kernel void @_start(ptr %0) {
+; CHECK-LABEL: _start:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; CHECK-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; CHECK-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
+; CHECK-NEXT:    s_mov_b64 s[2:3], 0
+; CHECK-NEXT:  ; %bb.1: ; %dynamic-memcpy-expansion-main-body.preheader
+; CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-NEXT:  .LBB0_2: ; %dynamic-memcpy-expansion-main-body
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    global_load_dwordx4 v[4:7], v0, s[2:3]
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_add_u32 s4, s0, s2
+; CHECK-NEXT:    s_addc_u32 s5, s1, s3
+; CHECK-NEXT:    s_add_u32 s2, s2, 16
+; CHECK-NEXT:    s_addc_u32 s3, s3, 0
+; CHECK-NEXT:    v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1]
+; CHECK-NEXT:    v_cmp_lt_u64_e64 s[4:5], s[2:3], 16
+; CHECK-NEXT:    s_and_b64 vcc, exec, s[4:5]
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    flat_store_dwordx4 v[2:3], v[4:7]
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_2
+; CHECK-NEXT:  ; %bb.3: ; %dynamic-memcpy-expansion-residual-body.preheader
+; CHECK-NEXT:    s_sub_u32 s2, 29, 13
+; CHECK-NEXT:    s_subb_u32 s3, 0, 0
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_add_u32 s2, s0, s2
+; CHECK-NEXT:    s_addc_u32 s3, s1, s3
+; CHECK-NEXT:    s_sub_u32 s4, 0, 13
+; CHECK-NEXT:    s_mov_b64 s[0:1], 0
+; CHECK-NEXT:    s_subb_u32 s5, 0, 0
+; CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-NEXT:  .LBB0_4: ; %dynamic-memcpy-expansion-residual-body
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    s_add_u32 s6, s4, s0
+; CHECK-NEXT:    s_addc_u32 s7, s5, s1
+; CHECK-NEXT:    global_load_ubyte v1, v0, s[6:7] offset:29
+; CHECK-NEXT:    s_add_u32 s6, s2, s0
+; CHECK-NEXT:    s_addc_u32 s7, s3, s1
+; CHECK-NEXT:    s_add_u32 s0, s0, 1
+; CHECK-NEXT:    s_addc_u32 s1, s1, 0
+; CHECK-NEXT:    v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1]
+; CHECK-NEXT:    v_cmp_lt_u64_e64 s[6:7], s[0:1], 13
+; CHECK-NEXT:    s_and_b64 vcc, exec, s[6:7]
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    flat_store_byte v[2:3], v1
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_4
+; CHECK-NEXT:  ; %bb.5: ; %dynamic-memcpy-post-expansion
+; CHECK-NEXT:    s_endpgm
+  call void @llvm.memcpy.p0.p4.i64(ptr %0, ptr addrspace(4) null, i64 add (i64 sub (i64 16, i64 ptrtoint (ptr addrspacecast (ptr addrspace(4) null to ptr) to i64)), i64 13), i1 false)
+  ret void
+}
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.memcpy.p0.p4.i64(ptr noalias writeonly captures(none), ptr addrspace(4) noalias readonly captures(none), i64, i1 immarg) #0
+
+attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }



More information about the llvm-commits mailing list