[llvm] [AMDGPU] Simplify cond branch if condition is known (PR #180081)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 16 10:35:05 PST 2026
https://github.com/LU-JOHN updated https://github.com/llvm/llvm-project/pull/180081
>From 93a2356420a3feb0e1ef15a513b7921ecde6a6b8 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Thu, 5 Feb 2026 17:43:25 -0600
Subject: [PATCH 1/2] Simplify cond branch if condition is known
Signed-off-by: John Lu <John.Lu at amd.com>
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 118 +++
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 2 +
.../CodeGen/AMDGPU/copy-to-reg-frameindex.ll | 9 +-
.../CodeGen/AMDGPU/optimize-condbranch.mir | 776 ++++++++++++++++++
4 files changed, 897 insertions(+), 8 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/optimize-condbranch.mir
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 9e3b683d10c45..3df8d1c897b09 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -11236,6 +11236,124 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
return false;
}
+static bool evaluateCompare(const MachineInstr *CmpInstr, bool &SCCIsSet) {
+ const MachineRegisterInfo &MRI =
+ CmpInstr->getParent()->getParent()->getRegInfo();
+ const MachineOperand &Op0 = CmpInstr->getOperand(0);
+ const MachineOperand &Op1 = CmpInstr->getOperand(1);
+ int64_t Cmp0Value, Cmp1Value;
+
+ if (!Op0.isReg() || Op0.getSubReg() ||
+ !getFoldableImm(Op0.getReg(), MRI, Cmp0Value))
+ return false;
+ if (Op1.isImm())
+ Cmp1Value = Op1.getImm();
+ else if (!Op1.isReg() || Op1.getSubReg() ||
+ !getFoldableImm(Op1.getReg(), MRI, Cmp1Value))
+ return false;
+
+ switch (CmpInstr->getOpcode()) {
+ default:
+ return false;
+ case AMDGPU::S_CMP_EQ_U32:
+ case AMDGPU::S_CMP_EQ_I32:
+ case AMDGPU::S_CMP_EQ_U64:
+ SCCIsSet = Cmp0Value == Cmp1Value;
+ break;
+ case AMDGPU::S_CMP_LG_U32:
+ case AMDGPU::S_CMP_LG_I32:
+ case AMDGPU::S_CMP_LG_U64:
+ SCCIsSet = Cmp0Value != Cmp1Value;
+ break;
+ case AMDGPU::S_CMP_LT_U32:
+ SCCIsSet =
+ static_cast<uint64_t>(Cmp0Value) < static_cast<uint64_t>(Cmp1Value);
+ break;
+ case AMDGPU::S_CMP_LE_U32:
+ SCCIsSet =
+ static_cast<uint64_t>(Cmp0Value) <= static_cast<uint64_t>(Cmp1Value);
+ break;
+ case AMDGPU::S_CMP_GT_U32:
+ SCCIsSet =
+ static_cast<uint64_t>(Cmp0Value) > static_cast<uint64_t>(Cmp1Value);
+ break;
+ case AMDGPU::S_CMP_GE_U32:
+ SCCIsSet =
+ static_cast<uint64_t>(Cmp0Value) >= static_cast<uint64_t>(Cmp1Value);
+ break;
+ case AMDGPU::S_CMP_LT_I32:
+ SCCIsSet = Cmp0Value < Cmp1Value;
+ break;
+ case AMDGPU::S_CMP_LE_I32:
+ SCCIsSet = Cmp0Value <= Cmp1Value;
+ break;
+ case AMDGPU::S_CMP_GT_I32:
+ SCCIsSet = Cmp0Value > Cmp1Value;
+ break;
+ case AMDGPU::S_CMP_GE_I32:
+ SCCIsSet = Cmp0Value >= Cmp1Value;
+ break;
+ }
+ return true;
+}
+
+bool SIInstrInfo::optimizeCondBranch(MachineInstr &CBI) const {
+ if (CBI.getOpcode() != AMDGPU::S_CBRANCH_SCC0 &&
+ CBI.getOpcode() != AMDGPU::S_CBRANCH_SCC1)
+ return false;
+
+ // Search backward for compare.
+ MachineBasicBlock *Parent = CBI.getParent();
+ MachineInstr *CompareInst = nullptr;
+ constexpr unsigned ScanLimit = 8;
+ unsigned Count = 0;
+ for (MachineInstr &MI :
+ make_range(std::next(MachineBasicBlock::reverse_iterator(CBI)),
+ Parent->rend())) {
+ if (MI.isCompare()) {
+ CompareInst = &MI;
+ break;
+ }
+ if (++Count > ScanLimit || MI.definesRegister(AMDGPU::SCC, &RI))
+ return false;
+ }
+
+ // If compare can be evaluated simplify the conditional branch.
+ bool SCCIsSet;
+ if (!CompareInst || !evaluateCompare(CompareInst, SCCIsSet))
+ return false;
+
+ MachineInstr &LastInst = Parent->back();
+ MachineBasicBlock *TargetBB = CBI.getOperand(0).getMBB();
+ if (SCCIsSet == (CBI.getOpcode() == AMDGPU::S_CBRANCH_SCC1)) {
+ // S_CBRANCH_SCC? will always branch to target.
+
+ // Only handle the case where the block ends with:
+ //
+ // S_CBRANCH_SCC? <target>
+ // S_BRANCH <alt>
+ //
+ // Convert this to:
+ //
+ // <erased>
+ // S_BRANCH <target>
+ //
+ // This is by far the most common case and is easy to convert.
+ if (std::next(CBI.getIterator()) != LastInst.getIterator() ||
+ LastInst.getOpcode() != AMDGPU::S_BRANCH)
+ return false;
+ MachineBasicBlock *AltBB = LastInst.getOperand(0).getMBB();
+ Parent->removeSuccessor(AltBB);
+ LastInst.getOperand(0).setMBB(TargetBB);
+ } else {
+ // S_CBRANCH_SCC? will never branch to target.
+ // Instruction will be erased.
+ Parent->removeSuccessor(TargetBB);
+ }
+ CBI.eraseFromParent();
+ return true;
+}
+
void SIInstrInfo::enforceOperandRCAlignment(MachineInstr &MI,
AMDGPU::OpName OpName) const {
if (!ST.needsAlignedVGPRs())
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 05cf804d08ffc..27096c5ed4ed1 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -424,6 +424,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
const MachineRegisterInfo *MRI) const override;
+ bool optimizeCondBranch(MachineInstr &CmpInstr) const override;
+
bool
areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
const MachineInstr &MIb) const override;
diff --git a/llvm/test/CodeGen/AMDGPU/copy-to-reg-frameindex.ll b/llvm/test/CodeGen/AMDGPU/copy-to-reg-frameindex.ll
index a13f3513c660e..e0a3bff5e8d2a 100644
--- a/llvm/test/CodeGen/AMDGPU/copy-to-reg-frameindex.ll
+++ b/llvm/test/CodeGen/AMDGPU/copy-to-reg-frameindex.ll
@@ -4,20 +4,13 @@
define amdgpu_kernel void @copy_to_reg_frameindex(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: copy_to_reg_frameindex:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: s_cmp_lt_u32 0, 16
; CHECK-NEXT: ; implicit-def: $vgpr0
; CHECK-NEXT: .LBB0_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_set_gpr_idx_on 0, gpr_idx(DST)
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_set_gpr_idx_off
-; CHECK-NEXT: s_cbranch_scc1 .LBB0_1
-; CHECK-NEXT: ; %bb.2: ; %done
-; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; CHECK-NEXT: v_mov_b32_e32 v1, 0
-; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: global_store_dword v1, v0, s[0:1]
-; CHECK-NEXT: s_endpgm
+; CHECK-NEXT: s_branch .LBB0_1
entry:
%B = srem i32 %c, -1
br label %loop
diff --git a/llvm/test/CodeGen/AMDGPU/optimize-condbranch.mir b/llvm/test/CodeGen/AMDGPU/optimize-condbranch.mir
new file mode 100644
index 0000000000000..d24ca057f2ad1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/optimize-condbranch.mir
@@ -0,0 +1,776 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=peephole-opt --verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
+# Simplify S_CBRANCH_SCC? if comparison can be evaluated.
+
+####################################################################################
+# Test all comparison opcodes that can be evaluated if operands have a known value.
+####################################################################################
+---
+name: CMP_EQ_U32_1_0
+body: |
+ ; GCN-LABEL: name: CMP_EQ_U32_1_0
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GCN-NEXT: S_CMP_EQ_U32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+
+ %0:sreg_32 = S_MOV_B32 1
+ S_CMP_EQ_U32 killed %0, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+ bb.1:
+ successors: %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+---
+name: CMP_EQ_I32_1_0
+body: |
+ ; GCN-LABEL: name: CMP_EQ_I32_1_0
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GCN-NEXT: S_CMP_EQ_I32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+
+ %0:sreg_32 = S_MOV_B32 1
+ S_CMP_EQ_I32 killed %0, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+ bb.1:
+ successors: %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+---
+name: CMP_LG_U32_1_0
+body: |
+ ; GCN-LABEL: name: CMP_LG_U32_1_0
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.2(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GCN-NEXT: S_CMP_LG_U32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+
+ %0:sreg_32 = S_MOV_B32 1
+ S_CMP_LG_U32 killed %0, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+ bb.1:
+ successors: %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+---
+name: CMP_LG_I32_1_0
+body: |
+ ; GCN-LABEL: name: CMP_LG_I32_1_0
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.2(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GCN-NEXT: S_CMP_LG_I32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+
+ %0:sreg_32 = S_MOV_B32 1
+ S_CMP_LG_I32 killed %0, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+ bb.1:
+ successors: %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+---
+name: CMP_LT_U32_1_0
+body: |
+ ; GCN-LABEL: name: CMP_LT_U32_1_0
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GCN-NEXT: S_CMP_LT_U32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+
+ %0:sreg_32 = S_MOV_B32 1
+ S_CMP_LT_U32 killed %0, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+ bb.1:
+ successors: %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+---
+name: CMP_LE_U32_1_0
+body: |
+ ; GCN-LABEL: name: CMP_LE_U32_1_0
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GCN-NEXT: S_CMP_LE_U32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+
+ %0:sreg_32 = S_MOV_B32 1
+ S_CMP_LE_U32 killed %0, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+ bb.1:
+ successors: %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+---
+name: CMP_GT_U32_1_0
+body: |
+ ; GCN-LABEL: name: CMP_GT_U32_1_0
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.2(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GCN-NEXT: S_CMP_GT_U32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+
+ %0:sreg_32 = S_MOV_B32 1
+ S_CMP_GT_U32 killed %0, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+ bb.1:
+ successors: %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+---
+name: CMP_GE_U32_1_0
+body: |
+ ; GCN-LABEL: name: CMP_GE_U32_1_0
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.2(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GCN-NEXT: S_CMP_GE_U32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+
+ %0:sreg_32 = S_MOV_B32 1
+ S_CMP_GE_U32 killed %0, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+ bb.1:
+ successors: %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+---
+name: CMP_LT_I32_1_0
+body: |
+ ; GCN-LABEL: name: CMP_LT_I32_1_0
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GCN-NEXT: S_CMP_LT_I32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+
+ %0:sreg_32 = S_MOV_B32 1
+ S_CMP_LT_I32 killed %0, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+ bb.1:
+ successors: %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+---
+name: CMP_LE_I32_1_0
+body: |
+ ; GCN-LABEL: name: CMP_LE_I32_1_0
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GCN-NEXT: S_CMP_LE_I32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+
+ %0:sreg_32 = S_MOV_B32 1
+ S_CMP_LE_I32 killed %0, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+ bb.1:
+ successors: %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+---
+name: CMP_GT_I32_1_0
+body: |
+ ; GCN-LABEL: name: CMP_GT_I32_1_0
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.2(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GCN-NEXT: S_CMP_GT_I32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+
+ %0:sreg_32 = S_MOV_B32 1
+ S_CMP_GT_I32 killed %0, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+ bb.1:
+ successors: %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+---
+name: CMP_GE_I32_1_0
+body: |
+ ; GCN-LABEL: name: CMP_GE_I32_1_0
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.2(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GCN-NEXT: S_CMP_GE_I32 killed [[S_MOV_B32_]], 0, implicit-def $scc
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+
+ %0:sreg_32 = S_MOV_B32 1
+ S_CMP_GE_I32 killed %0, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+ bb.1:
+ successors: %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+---
+name: CMP_EQ_U64_1_0
+body: |
+ ; GCN-LABEL: name: CMP_EQ_U64_1_0
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1
+ ; GCN-NEXT: S_CMP_EQ_U64 killed [[S_MOV_B64_]], 0, implicit-def $scc
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+
+ %0:sreg_64 = S_MOV_B64 1
+ S_CMP_EQ_U64 killed %0, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+ bb.1:
+ successors: %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+---
+name: CMP_LG_U64_1_0
+body: |
+ ; GCN-LABEL: name: CMP_LG_U64_1_0
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.2(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1
+ ; GCN-NEXT: S_CMP_LG_U64 killed [[S_MOV_B64_]], 0, implicit-def $scc
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+
+ %0:sreg_64 = S_MOV_B64 1
+ S_CMP_LG_U64 killed %0, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+ bb.1:
+ successors: %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+####################################################################################
+# Test when second operand has a constant value in a register.
+####################################################################################
+---
+name: CMP_EQ_U32_1_0_reg
+body: |
+ ; GCN-LABEL: name: CMP_EQ_U32_1_0_reg
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GCN-NEXT: S_CMP_EQ_U32 killed [[S_MOV_B32_]], killed [[S_MOV_B32_1]], implicit-def $scc
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+
+ %0:sreg_32 = S_MOV_B32 1
+ %1:sreg_32 = S_MOV_B32 0
+ S_CMP_EQ_U32 killed %0, killed %1, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+ bb.1:
+ successors: %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+---
+name: CMP_LG_I32_1_0_reg
+body: |
+ ; GCN-LABEL: name: CMP_LG_I32_1_0_reg
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.2(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GCN-NEXT: S_CMP_LG_I32 killed [[S_MOV_B32_]], killed [[S_MOV_B32_1]], implicit-def $scc
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+
+ %0:sreg_32 = S_MOV_B32 1
+ %1:sreg_32 = S_MOV_B32 0
+ S_CMP_LG_I32 killed %0, killed %1, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+ bb.1:
+ successors: %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+####################################################################################
+# Negative test. Register has unknown value.
+####################################################################################
+---
+name: NegativeTest_CMP_LT_I32_X_0
+body: |
+ ; GCN-LABEL: name: NegativeTest_CMP_LT_I32_X_0
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN-NEXT: liveins: $sgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN-NEXT: S_CMP_LT_I32 killed [[COPY]], 0, implicit-def $scc
+ ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ liveins: $sgpr0
+
+ %0:sreg_32 = COPY $sgpr0
+ S_CMP_LT_I32 killed %0, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+ bb.1:
+ successors: %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+---
+name: NegativeTest_CMP_LE_I32_X_0
+body: |
+ ; GCN-LABEL: name: NegativeTest_CMP_LE_I32_X_0
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN-NEXT: liveins: $sgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN-NEXT: S_CMP_LE_I32 killed [[COPY]], 0, implicit-def $scc
+ ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ liveins: $sgpr0
+
+ %0:sreg_32 = COPY $sgpr0
+ S_CMP_LE_I32 killed %0, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+ bb.1:
+ successors: %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+####################################################################################
+# Ensure signed/unsigned comparisons are evaluated correctly.
+####################################################################################
+---
+name: CMP_GT_I32_n1_0_reg
+body: |
+ ; GCN-LABEL: name: CMP_GT_I32_n1_0_reg
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+ ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GCN-NEXT: S_CMP_GT_I32 killed [[S_MOV_B32_]], killed [[S_MOV_B32_1]], implicit-def $scc
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+
+ %0:sreg_32 = S_MOV_B32 -1
+ %1:sreg_32 = S_MOV_B32 0
+ S_CMP_GT_I32 killed %0, killed %1, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+ bb.1:
+ successors: %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+---
+name: CMP_GT_U32_n1_0_reg
+body: |
+ ; GCN-LABEL: name: CMP_GT_U32_n1_0_reg
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.2(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+ ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GCN-NEXT: S_CMP_GT_U32 killed [[S_MOV_B32_]], killed [[S_MOV_B32_1]], implicit-def $scc
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+
+ %0:sreg_32 = S_MOV_B32 -1
+ %1:sreg_32 = S_MOV_B32 0
+ S_CMP_GT_U32 killed %0, killed %1, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+ bb.1:
+ successors: %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+####################################################################################
+# Test S_CBRANCH_SCC0
+####################################################################################
+---
+name: SCC0_AlwaysTaken
+body: |
+ ; GCN-LABEL: name: SCC0_AlwaysTaken
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.2(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1
+ ; GCN-NEXT: S_CMP_EQ_U64 killed [[S_MOV_B64_]], 0, implicit-def $scc
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+
+ %0:sreg_64 = S_MOV_B64 1
+ S_CMP_EQ_U64 killed %0, 0, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+ bb.1:
+ successors: %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+---
+name: SCC0_NeverTaken
+body: |
+ ; GCN-LABEL: name: SCC0_NeverTaken
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 1
+ ; GCN-NEXT: S_CMP_LG_U64 killed [[S_MOV_B64_]], 0, implicit-def $scc
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+
+ %0:sreg_64 = S_MOV_B64 1
+ S_CMP_LG_U64 killed %0, 0, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+ bb.1:
+ successors: %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+####################################################################################
+# Negative test. Scan limit for compare exceeded.
+####################################################################################
+---
+name: NegativeTest_ScanLimit
+body: |
+ ; GCN-LABEL: name: NegativeTest_ScanLimit
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+ ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GCN-NEXT: S_CMP_GT_I32 killed [[S_MOV_B32_]], killed [[S_MOV_B32_1]], implicit-def $scc
+ ; GCN-NEXT: S_NOP 0
+ ; GCN-NEXT: S_NOP 0
+ ; GCN-NEXT: S_NOP 0
+ ; GCN-NEXT: S_NOP 0
+ ; GCN-NEXT: S_NOP 0
+ ; GCN-NEXT: S_NOP 0
+ ; GCN-NEXT: S_NOP 0
+ ; GCN-NEXT: S_NOP 0
+ ; GCN-NEXT: S_NOP 0
+ ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+
+ %0:sreg_32 = S_MOV_B32 -1
+ %1:sreg_32 = S_MOV_B32 0
+ S_CMP_GT_I32 killed %0, killed %1, implicit-def $scc
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+ bb.1:
+ successors: %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+
+####################################################################################
+# Test blocks that do not end with S_CBRANCH_SCC?/S_BRANCH.
+####################################################################################
+
+# Test can be optimized because S_CBRANCH_SCC? can be removed.
+---
+name: Test_BranchNeverTaken
+body: |
+ bb.0:
+ successors: %bb.0
+
+ ; GCN-LABEL: name: Test_BranchNeverTaken
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+ ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GCN-NEXT: S_CMP_GT_I32 killed [[S_MOV_B32_]], killed [[S_MOV_B32_1]], implicit-def $scc
+ ; GCN-NEXT: S_ENDPGM 0
+ %0:sreg_32 = S_MOV_B32 -1
+ %1:sreg_32 = S_MOV_B32 0
+ S_CMP_GT_I32 killed %0, killed %1, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.0, implicit $scc
+ S_ENDPGM 0
+...
+
+# Test is not optimized because this case is rare and
+# not worth the added complexity.
+---
+name: NegativeTest_BranchAlwaysTaken
+body: |
+ bb.0:
+ successors: %bb.0
+
+ ; GCN-LABEL: name: NegativeTest_BranchAlwaysTaken
+ ; GCN: successors: %bb.0(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GCN-NEXT: S_CMP_GT_I32 killed [[S_MOV_B32_]], killed [[S_MOV_B32_1]], implicit-def $scc
+ ; GCN-NEXT: S_CBRANCH_SCC1 %bb.0, implicit $scc
+ ; GCN-NEXT: S_ENDPGM 0
+ %0:sreg_32 = S_MOV_B32 1
+ %1:sreg_32 = S_MOV_B32 0
+ S_CMP_GT_I32 killed %0, killed %1, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.0, implicit $scc
+ S_ENDPGM 0
+...
>From f8f0c40770f5c338d0283c0afbac0579915d49f2 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Thu, 12 Feb 2026 09:57:46 -0600
Subject: [PATCH 2/2] Add memcpy testcase that creates a constant compare.
Signed-off-by: John Lu <John.Lu at amd.com>
---
llvm/test/CodeGen/AMDGPU/const_compare.ll | 61 +++++++++++++++++++++++
1 file changed, 61 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/const_compare.ll
diff --git a/llvm/test/CodeGen/AMDGPU/const_compare.ll b/llvm/test/CodeGen/AMDGPU/const_compare.ll
new file mode 100644
index 0000000000000..75a4384973600
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/const_compare.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck %s
+
+define amdgpu_kernel void @_start(ptr %0) {
+; CHECK-LABEL: _start:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; CHECK-NEXT: s_add_u32 flat_scratch_lo, s12, s17
+; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
+; CHECK-NEXT: s_mov_b64 s[2:3], 0
+; CHECK-NEXT: ; %bb.1: ; %dynamic-memcpy-expansion-main-body.preheader
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: .LBB0_2: ; %dynamic-memcpy-expansion-main-body
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: global_load_dwordx4 v[4:7], v0, s[2:3]
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_add_u32 s4, s0, s2
+; CHECK-NEXT: s_addc_u32 s5, s1, s3
+; CHECK-NEXT: s_add_u32 s2, s2, 16
+; CHECK-NEXT: s_addc_u32 s3, s3, 0
+; CHECK-NEXT: v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1]
+; CHECK-NEXT: v_cmp_lt_u64_e64 s[4:5], s[2:3], 16
+; CHECK-NEXT: s_and_b64 vcc, exec, s[4:5]
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
+; CHECK-NEXT: s_cbranch_vccnz .LBB0_2
+; CHECK-NEXT: ; %bb.3: ; %dynamic-memcpy-expansion-residual-body.preheader
+; CHECK-NEXT: s_sub_u32 s2, 29, 13
+; CHECK-NEXT: s_subb_u32 s3, 0, 0
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_add_u32 s2, s0, s2
+; CHECK-NEXT: s_addc_u32 s3, s1, s3
+; CHECK-NEXT: s_sub_u32 s4, 0, 13
+; CHECK-NEXT: s_mov_b64 s[0:1], 0
+; CHECK-NEXT: s_subb_u32 s5, 0, 0
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: .LBB0_4: ; %dynamic-memcpy-expansion-residual-body
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: s_add_u32 s6, s4, s0
+; CHECK-NEXT: s_addc_u32 s7, s5, s1
+; CHECK-NEXT: global_load_ubyte v1, v0, s[6:7] offset:29
+; CHECK-NEXT: s_add_u32 s6, s2, s0
+; CHECK-NEXT: s_addc_u32 s7, s3, s1
+; CHECK-NEXT: s_add_u32 s0, s0, 1
+; CHECK-NEXT: s_addc_u32 s1, s1, 0
+; CHECK-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1]
+; CHECK-NEXT: v_cmp_lt_u64_e64 s[6:7], s[0:1], 13
+; CHECK-NEXT: s_and_b64 vcc, exec, s[6:7]
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: flat_store_byte v[2:3], v1
+; CHECK-NEXT: s_cbranch_vccnz .LBB0_4
+; CHECK-NEXT: ; %bb.5: ; %dynamic-memcpy-post-expansion
+; CHECK-NEXT: s_endpgm
+ call void @llvm.memcpy.p0.p4.i64(ptr %0, ptr addrspace(4) null, i64 add (i64 sub (i64 16, i64 ptrtoint (ptr addrspacecast (ptr addrspace(4) null to ptr) to i64)), i64 13), i1 false)
+ ret void
+}
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.memcpy.p0.p4.i64(ptr noalias writeonly captures(none), ptr addrspace(4) noalias readonly captures(none), i64, i1 immarg) #0
+
+attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
More information about the llvm-commits
mailing list