[llvm] a174f0d - AMDGPU/GlobalISel: Add pre-legalize combiner pass
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 22 07:17:11 PST 2020
Author: Matt Arsenault
Date: 2020-01-22T10:16:39-05:00
New Revision: a174f0da62f1cad36d21c040bf37bfdd291b28cf
URL: https://github.com/llvm/llvm-project/commit/a174f0da62f1cad36d21c040bf37bfdd291b28cf
DIFF: https://github.com/llvm/llvm-project/commit/a174f0da62f1cad36d21c040bf37bfdd291b28cf.diff
LOG: AMDGPU/GlobalISel: Add pre-legalize combiner pass
Just copy the AArch64 pass as-is for now, except for removing the
memcpy handling.
Added:
llvm/lib/Target/AMDGPU/AMDGPUCombine.td
llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
Modified:
llvm/lib/Target/AMDGPU/AMDGPU.h
llvm/lib/Target/AMDGPU/AMDGPUGISel.td
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/lib/Target/AMDGPU/CMakeLists.txt
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index fbed51de0ea4..621a93d45879 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -27,6 +27,10 @@ class TargetOptions;
class PassRegistry;
class Module;
+// GlobalISel passes
+void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &);
+FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone);
+
// R600 Passes
FunctionPass *createR600VectorRegMerger();
FunctionPass *createR600ExpandSpecialInstrsPass();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
new file mode 100644
index 000000000000..d8b3b89605e9
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -0,0 +1,15 @@
+//=- AMDGPUCombine.td - Define AMDGPU Combine Rules ----------*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/GlobalISel/Combine.td"
+
+def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper<
+ "AMDGPUGenPreLegalizerCombinerHelper", [all_combines,
+ elide_br_by_inverting_cond]> {
+ let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule";
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index afb08a23f2cf..1a8134788f8c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
include "AMDGPU.td"
+include "AMDGPUCombine.td"
def sd_vsrc0 : ComplexPattern<i32, 1, "">;
def gi_vsrc0 :
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
new file mode 100644
index 000000000000..7298fb2317ac
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
@@ -0,0 +1,149 @@
+//=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass does combining of machine instructions at the generic MI level,
+// before the legalizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+
+#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+#include "AMDGPUGenGICombiner.inc"
+#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+
+namespace {
+#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
+#include "AMDGPUGenGICombiner.inc"
+#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
+
+class AMDGPUPreLegalizerCombinerInfo : public CombinerInfo {
+ GISelKnownBits *KB;
+ MachineDominatorTree *MDT;
+
+public:
+ AMDGPUGenPreLegalizerCombinerHelper Generated;
+
+ AMDGPUPreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
+ GISelKnownBits *KB, MachineDominatorTree *MDT)
+ : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+ /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
+ KB(KB), MDT(MDT) {
+ if (!Generated.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
+ }
+
+ virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+ MachineIRBuilder &B) const override;
+};
+
+bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
+ MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ CombinerHelper Helper(Observer, B, KB, MDT);
+
+ if (Generated.tryCombineAll(Observer, MI, B, Helper))
+ return true;
+
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_CONCAT_VECTORS:
+ return Helper.tryCombineConcatVectors(MI);
+ case TargetOpcode::G_SHUFFLE_VECTOR:
+ return Helper.tryCombineShuffleVector(MI);
+ }
+
+ return false;
+}
+
+#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
+#include "AMDGPUGenGICombiner.inc"
+#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
+
+// Pass boilerplate
+// ================
+
+class AMDGPUPreLegalizerCombiner : public MachineFunctionPass {
+public:
+ static char ID;
+
+ AMDGPUPreLegalizerCombiner(bool IsOptNone = false);
+
+ StringRef getPassName() const override { return "AMDGPUPreLegalizerCombiner"; }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+private:
+ bool IsOptNone;
+};
+} // end anonymous namespace
+
+void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ AU.addRequired<GISelKnownBitsAnalysis>();
+ AU.addPreserved<GISelKnownBitsAnalysis>();
+ if (!IsOptNone) {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)
+ : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
+ initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+}
+
+bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ auto *TPC = &getAnalysis<TargetPassConfig>();
+ const Function &F = MF.getFunction();
+ bool EnableOpt =
+ MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
+ GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+ MachineDominatorTree *MDT =
+ IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
+ AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
+ F.hasMinSize(), KB, MDT);
+ Combiner C(PCInfo, TPC);
+ return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
+}
+
+char AMDGPUPreLegalizerCombiner::ID = 0;
+INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
+ "Combine AMDGPU machine instrs before legalization",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
+INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
+ "Combine AMDGPU machine instrs before legalization", false,
+ false)
+
+namespace llvm {
+FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone) {
+ return new AMDGPUPreLegalizerCombiner(IsOptNone);
+}
+} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index eb30d659bf0b..0b0d16967324 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -217,6 +217,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPULowerKernelAttributesPass(*PR);
initializeAMDGPULowerIntrinsicsPass(*PR);
initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(*PR);
+ initializeAMDGPUPreLegalizerCombinerPass(*PR);
initializeAMDGPUPromoteAllocaPass(*PR);
initializeAMDGPUCodeGenPreparePass(*PR);
initializeAMDGPUPropagateAttributesEarlyPass(*PR);
@@ -617,6 +618,7 @@ class GCNPassConfig final : public AMDGPUPassConfig {
bool addILPOpts() override;
bool addInstSelector() override;
bool addIRTranslator() override;
+ void addPreLegalizeMachineIR() override;
bool addLegalizeMachineIR() override;
bool addRegBankSelect() override;
bool addGlobalInstructionSelect() override;
@@ -895,6 +897,11 @@ bool GCNPassConfig::addIRTranslator() {
return false;
}
+void GCNPassConfig::addPreLegalizeMachineIR() {
+ bool IsOptNone = getOptLevel() == CodeGenOpt::None;
+ addPass(createAMDGPUPreLegalizeCombiner(IsOptNone));
+}
+
bool GCNPassConfig::addLegalizeMachineIR() {
addPass(new Legalizer());
return false;
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 0b8eb4b25ae4..3bbf2c346954 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -15,6 +15,8 @@ tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget)
set(LLVM_TARGET_DEFINITIONS AMDGPUGISel.td)
tablegen(LLVM AMDGPUGenGlobalISel.inc -gen-global-isel)
+tablegen(LLVM AMDGPUGenGICombiner.inc -gen-global-isel-combiner
+ -combiners="AMDGPUPreLegalizerCombinerHelper")
set(LLVM_TARGET_DEFINITIONS R600.td)
tablegen(LLVM R600GenAsmWriter.inc -gen-asm-writer)
@@ -58,6 +60,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUMacroFusion.cpp
AMDGPUMCInstLower.cpp
AMDGPUOpenCLEnqueuedBlockLowering.cpp
+ AMDGPUPreLegalizerCombiner.cpp
AMDGPUPromoteAlloca.cpp
AMDGPUPropagateAttributes.cpp
AMDGPURegisterBankInfo.cpp
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll
index d441571f48cc..d0b43b93d431 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll
@@ -46,7 +46,6 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 ad
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: lds_atomic_dec_ret_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -97,7 +96,6 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out,
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: lds_atomic_dec_ret_i32_offset:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -138,7 +136,6 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) noun
; VI-NEXT: v_mov_b32_e32 v1, s0
; VI-NEXT: ds_dec_rtn_u32 v0, v1, v0
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: lds_atomic_dec_noret_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24
@@ -173,7 +170,6 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %pt
; VI-NEXT: v_mov_b32_e32 v1, s0
; VI-NEXT: ds_dec_rtn_u32 v0, v1, v0
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: lds_atomic_dec_noret_i32_offset:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24
@@ -216,7 +212,6 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: global_atomic_dec_ret_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -267,7 +262,6 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %o
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: global_atomic_dec_ret_i32_offset:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -309,7 +303,6 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) n
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: global_atomic_dec_noret_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -347,7 +340,6 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(i32 addrspace(1)*
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: global_atomic_dec_noret_i32_offset:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -414,7 +406,6 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(i32 addrspace
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; VI-NEXT: flat_store_dword v[2:3], v0
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: global_atomic_dec_ret_i32_offset_addr64:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0
@@ -488,7 +479,6 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(i32 addrspa
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: global_atomic_dec_noret_i32_offset_addr64:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0
@@ -543,7 +533,6 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32(i32* %out, i32* %ptr) #0 {
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: flat_atomic_dec_ret_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -594,7 +583,6 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(i32* %out, i32* %ptr)
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: flat_atomic_dec_ret_i32_offset:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -636,7 +624,6 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32(i32* %ptr) nounwind {
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: flat_atomic_dec_noret_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -674,7 +661,6 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(i32* %ptr) nounwind
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: flat_atomic_dec_noret_i32_offset:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -741,7 +727,6 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(i32* %out, i32*
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; VI-NEXT: flat_store_dword v[2:3], v0
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: flat_atomic_dec_ret_i32_offset_addr64:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0
@@ -815,7 +800,6 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(i32* %ptr) #0
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: flat_atomic_dec_noret_i32_offset_addr64:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0
@@ -872,7 +856,6 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64(i64* %out, i64* %ptr) #0 {
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: flat_atomic_dec_ret_i64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -926,7 +909,6 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(i64* %out, i64* %ptr)
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: flat_atomic_dec_ret_i64_offset:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -971,7 +953,6 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64(i64* %ptr) nounwind {
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: flat_atomic_dec_noret_i64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1012,7 +993,6 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(i64* %ptr) nounwind
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: flat_atomic_dec_noret_i64_offset:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1082,7 +1062,6 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(i64* %out, i64*
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: flat_atomic_dec_ret_i64_offset_addr64:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0
@@ -1159,7 +1138,6 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64* %ptr) #0
; VI-NEXT: v_mov_b32_e32 v3, 0
; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: flat_atomic_dec_noret_i64_offset_addr64:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0
@@ -1192,42 +1170,41 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64* %ptr) #0
define amdgpu_kernel void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
; CI-LABEL: atomic_dec_shl_base_lds_0:
; CI: ; %bb.0:
-; CI-NEXT: v_mul_lo_u32 v1, 4, v0
+; CI-NEXT: v_mul_lo_u32 v5, 4, v0
; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; CI-NEXT: v_add_i32_e32 v2, vcc, 2, v0
+; CI-NEXT: v_mov_b32_e32 v6, 9
; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: v_add_i32_e32 v0, vcc, 0, v1
-; CI-NEXT: v_add_i32_e32 v0, vcc, 8, v0
-; CI-NEXT: v_mov_b32_e32 v1, 9
-; CI-NEXT: ds_dec_rtn_u32 v3, v0, v1
+; CI-NEXT: v_add_i32_e32 v5, vcc, 0, v5
+; CI-NEXT: v_add_i32_e32 v5, vcc, 8, v5
+; CI-NEXT: ds_dec_rtn_u32 v5, v5, v6
+; CI-NEXT: v_add_i32_e32 v4, vcc, 2, v0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_mov_b32_e32 v0, s2
+; CI-NEXT: v_mov_b32_e32 v3, s1
; CI-NEXT: v_mov_b32_e32 v1, s3
-; CI-NEXT: flat_store_dword v[0:1], v2
-; CI-NEXT: v_mov_b32_e32 v0, s0
-; CI-NEXT: v_mov_b32_e32 v1, s1
-; CI-NEXT: flat_store_dword v[0:1], v3
+; CI-NEXT: v_mov_b32_e32 v2, s0
+; CI-NEXT: flat_store_dword v[0:1], v4
+; CI-NEXT: flat_store_dword v[2:3], v5
; CI-NEXT: s_endpgm
;
; VI-LABEL: atomic_dec_shl_base_lds_0:
; VI: ; %bb.0:
-; VI-NEXT: v_mul_lo_u32 v1, 4, v0
+; VI-NEXT: v_mul_lo_u32 v5, 4, v0
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT: v_add_u32_e32 v2, vcc, 2, v0
+; VI-NEXT: v_mov_b32_e32 v6, 9
; VI-NEXT: s_mov_b32 m0, -1
-; VI-NEXT: v_add_u32_e32 v0, vcc, 0, v1
-; VI-NEXT: v_add_u32_e32 v0, vcc, 8, v0
-; VI-NEXT: v_mov_b32_e32 v1, 9
-; VI-NEXT: ds_dec_rtn_u32 v3, v0, v1
+; VI-NEXT: v_add_u32_e32 v5, vcc, 0, v5
+; VI-NEXT: v_add_u32_e32 v5, vcc, 8, v5
+; VI-NEXT: ds_dec_rtn_u32 v5, v5, v6
+; VI-NEXT: v_add_u32_e32 v4, vcc, 2, v0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s2
+; VI-NEXT: v_mov_b32_e32 v3, s1
; VI-NEXT: v_mov_b32_e32 v1, s3
-; VI-NEXT: flat_store_dword v[0:1], v2
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: flat_store_dword v[0:1], v3
+; VI-NEXT: v_mov_b32_e32 v2, s0
+; VI-NEXT: flat_store_dword v[0:1], v4
+; VI-NEXT: flat_store_dword v[2:3], v5
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: atomic_dec_shl_base_lds_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mul_lo_u32 v1, 4, v0
@@ -1286,7 +1263,6 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 ad
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: lds_atomic_dec_ret_i64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -1340,7 +1316,6 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out,
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: lds_atomic_dec_ret_i64_offset:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -1384,7 +1359,6 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) noun
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1]
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: lds_atomic_dec_noret_i64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24
@@ -1422,7 +1396,6 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %pt
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1]
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: lds_atomic_dec_noret_i64_offset:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24
@@ -1468,7 +1441,6 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: global_atomic_dec_ret_i64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -1522,7 +1494,6 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %o
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: global_atomic_dec_ret_i64_offset:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -1567,7 +1538,6 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) n
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: global_atomic_dec_noret_i64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1608,7 +1578,6 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)*
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: global_atomic_dec_noret_i64_offset:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1678,7 +1647,6 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: global_atomic_dec_ret_i64_offset_addr64:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0
@@ -1755,7 +1723,6 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspa
; VI-NEXT: v_mov_b32_e32 v3, 0
; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: global_atomic_dec_noret_i64_offset_addr64:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0
@@ -1788,44 +1755,43 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspa
define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
; CI-LABEL: atomic_dec_shl_base_lds_0_i64:
; CI: ; %bb.0:
-; CI-NEXT: v_mul_lo_u32 v1, 8, v0
+; CI-NEXT: v_mul_lo_u32 v7, 8, v0
; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; CI-NEXT: v_add_i32_e32 v4, vcc, 2, v0
-; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: v_add_i32_e32 v0, vcc, 0, v1
-; CI-NEXT: v_add_i32_e32 v2, vcc, 16, v0
+; CI-NEXT: v_add_i32_e32 v6, vcc, 2, v0
; CI-NEXT: v_mov_b32_e32 v0, 9
+; CI-NEXT: v_add_i32_e32 v7, vcc, 0, v7
; CI-NEXT: v_mov_b32_e32 v1, 0
-; CI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1]
+; CI-NEXT: v_add_i32_e32 v7, vcc, 16, v7
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_dec_rtn_u64 v[0:1], v7, v[0:1]
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_mov_b32_e32 v2, s2
+; CI-NEXT: v_mov_b32_e32 v5, s1
; CI-NEXT: v_mov_b32_e32 v3, s3
-; CI-NEXT: flat_store_dword v[2:3], v4
-; CI-NEXT: v_mov_b32_e32 v3, s1
-; CI-NEXT: v_mov_b32_e32 v2, s0
-; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; CI-NEXT: v_mov_b32_e32 v4, s0
+; CI-NEXT: flat_store_dword v[2:3], v6
+; CI-NEXT: flat_store_dwordx2 v[4:5], v[0:1]
; CI-NEXT: s_endpgm
;
; VI-LABEL: atomic_dec_shl_base_lds_0_i64:
; VI: ; %bb.0:
-; VI-NEXT: v_mul_lo_u32 v1, 8, v0
+; VI-NEXT: v_mul_lo_u32 v7, 8, v0
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT: v_add_u32_e32 v4, vcc, 2, v0
-; VI-NEXT: s_mov_b32 m0, -1
-; VI-NEXT: v_add_u32_e32 v0, vcc, 0, v1
-; VI-NEXT: v_add_u32_e32 v2, vcc, 16, v0
+; VI-NEXT: v_add_u32_e32 v6, vcc, 2, v0
; VI-NEXT: v_mov_b32_e32 v0, 9
+; VI-NEXT: v_add_u32_e32 v7, vcc, 0, v7
; VI-NEXT: v_mov_b32_e32 v1, 0
-; VI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1]
+; VI-NEXT: v_add_u32_e32 v7, vcc, 16, v7
+; VI-NEXT: s_mov_b32 m0, -1
+; VI-NEXT: ds_dec_rtn_u64 v[0:1], v7, v[0:1]
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v2, s2
+; VI-NEXT: v_mov_b32_e32 v5, s1
; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dword v[2:3], v4
-; VI-NEXT: v_mov_b32_e32 v3, s1
-; VI-NEXT: v_mov_b32_e32 v2, s0
-; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT: v_mov_b32_e32 v4, s0
+; VI-NEXT: flat_store_dword v[2:3], v6
+; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1]
; VI-NEXT: s_endpgm
-;
; GFX9-LABEL: atomic_dec_shl_base_lds_0_i64:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mul_lo_u32 v3, 8, v0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
index 176c7f195025..64dc688cae49 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
@@ -523,40 +523,40 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(i32 addrspa
define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
; CI-LABEL: atomic_inc_shl_base_lds_0_i32:
; CI: ; %bb.0:
-; CI-NEXT: v_mul_lo_u32 v1, 4, v0
+; CI-NEXT: v_mul_lo_u32 v5, 4, v0
; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; CI-NEXT: v_add_i32_e32 v2, vcc, 2, v0
+; CI-NEXT: v_mov_b32_e32 v6, 9
; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: v_add_i32_e32 v0, vcc, 0, v1
-; CI-NEXT: v_add_i32_e32 v0, vcc, 8, v0
-; CI-NEXT: v_mov_b32_e32 v1, 9
-; CI-NEXT: ds_inc_rtn_u32 v3, v0, v1
+; CI-NEXT: v_add_i32_e32 v5, vcc, 0, v5
+; CI-NEXT: v_add_i32_e32 v5, vcc, 8, v5
+; CI-NEXT: ds_inc_rtn_u32 v5, v5, v6
+; CI-NEXT: v_add_i32_e32 v4, vcc, 2, v0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_mov_b32_e32 v0, s2
+; CI-NEXT: v_mov_b32_e32 v3, s1
; CI-NEXT: v_mov_b32_e32 v1, s3
-; CI-NEXT: flat_store_dword v[0:1], v2
-; CI-NEXT: v_mov_b32_e32 v0, s0
-; CI-NEXT: v_mov_b32_e32 v1, s1
-; CI-NEXT: flat_store_dword v[0:1], v3
+; CI-NEXT: v_mov_b32_e32 v2, s0
+; CI-NEXT: flat_store_dword v[0:1], v4
+; CI-NEXT: flat_store_dword v[2:3], v5
; CI-NEXT: s_endpgm
;
; VI-LABEL: atomic_inc_shl_base_lds_0_i32:
; VI: ; %bb.0:
-; VI-NEXT: v_mul_lo_u32 v1, 4, v0
+; VI-NEXT: v_mul_lo_u32 v5, 4, v0
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT: v_add_u32_e32 v2, vcc, 2, v0
+; VI-NEXT: v_mov_b32_e32 v6, 9
; VI-NEXT: s_mov_b32 m0, -1
-; VI-NEXT: v_add_u32_e32 v0, vcc, 0, v1
-; VI-NEXT: v_add_u32_e32 v0, vcc, 8, v0
-; VI-NEXT: v_mov_b32_e32 v1, 9
-; VI-NEXT: ds_inc_rtn_u32 v3, v0, v1
+; VI-NEXT: v_add_u32_e32 v5, vcc, 0, v5
+; VI-NEXT: v_add_u32_e32 v5, vcc, 8, v5
+; VI-NEXT: ds_inc_rtn_u32 v5, v5, v6
+; VI-NEXT: v_add_u32_e32 v4, vcc, 2, v0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s2
+; VI-NEXT: v_mov_b32_e32 v3, s1
; VI-NEXT: v_mov_b32_e32 v1, s3
-; VI-NEXT: flat_store_dword v[0:1], v2
-; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_mov_b32_e32 v1, s1
-; VI-NEXT: flat_store_dword v[0:1], v3
+; VI-NEXT: v_mov_b32_e32 v2, s0
+; VI-NEXT: flat_store_dword v[0:1], v4
+; VI-NEXT: flat_store_dword v[2:3], v5
; VI-NEXT: s_endpgm
;
; GFX9-LABEL: atomic_inc_shl_base_lds_0_i32:
@@ -1446,42 +1446,42 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0
define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
; CI-LABEL: atomic_inc_shl_base_lds_0_i64:
; CI: ; %bb.0:
-; CI-NEXT: v_mul_lo_u32 v1, 8, v0
+; CI-NEXT: v_mul_lo_u32 v7, 8, v0
; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; CI-NEXT: v_add_i32_e32 v4, vcc, 2, v0
-; CI-NEXT: s_mov_b32 m0, -1
-; CI-NEXT: v_add_i32_e32 v0, vcc, 0, v1
-; CI-NEXT: v_add_i32_e32 v2, vcc, 16, v0
+; CI-NEXT: v_add_i32_e32 v6, vcc, 2, v0
; CI-NEXT: v_mov_b32_e32 v0, 9
+; CI-NEXT: v_add_i32_e32 v7, vcc, 0, v7
; CI-NEXT: v_mov_b32_e32 v1, 0
-; CI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1]
+; CI-NEXT: v_add_i32_e32 v7, vcc, 16, v7
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_inc_rtn_u64 v[0:1], v7, v[0:1]
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_mov_b32_e32 v2, s2
+; CI-NEXT: v_mov_b32_e32 v5, s1
; CI-NEXT: v_mov_b32_e32 v3, s3
-; CI-NEXT: flat_store_dword v[2:3], v4
-; CI-NEXT: v_mov_b32_e32 v3, s1
-; CI-NEXT: v_mov_b32_e32 v2, s0
-; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; CI-NEXT: v_mov_b32_e32 v4, s0
+; CI-NEXT: flat_store_dword v[2:3], v6
+; CI-NEXT: flat_store_dwordx2 v[4:5], v[0:1]
; CI-NEXT: s_endpgm
;
; VI-LABEL: atomic_inc_shl_base_lds_0_i64:
; VI: ; %bb.0:
-; VI-NEXT: v_mul_lo_u32 v1, 8, v0
+; VI-NEXT: v_mul_lo_u32 v7, 8, v0
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT: v_add_u32_e32 v4, vcc, 2, v0
-; VI-NEXT: s_mov_b32 m0, -1
-; VI-NEXT: v_add_u32_e32 v0, vcc, 0, v1
-; VI-NEXT: v_add_u32_e32 v2, vcc, 16, v0
+; VI-NEXT: v_add_u32_e32 v6, vcc, 2, v0
; VI-NEXT: v_mov_b32_e32 v0, 9
+; VI-NEXT: v_add_u32_e32 v7, vcc, 0, v7
; VI-NEXT: v_mov_b32_e32 v1, 0
-; VI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1]
+; VI-NEXT: v_add_u32_e32 v7, vcc, 16, v7
+; VI-NEXT: s_mov_b32 m0, -1
+; VI-NEXT: ds_inc_rtn_u64 v[0:1], v7, v[0:1]
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v2, s2
+; VI-NEXT: v_mov_b32_e32 v5, s1
; VI-NEXT: v_mov_b32_e32 v3, s3
-; VI-NEXT: flat_store_dword v[2:3], v4
-; VI-NEXT: v_mov_b32_e32 v3, s1
-; VI-NEXT: v_mov_b32_e32 v2, s0
-; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT: v_mov_b32_e32 v4, s0
+; VI-NEXT: flat_store_dword v[2:3], v6
+; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1]
; VI-NEXT: s_endpgm
;
; GFX9-LABEL: atomic_inc_shl_base_lds_0_i64:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
index 29c9729d2e34..0164678bbf8c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
@@ -8,11 +8,11 @@ define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) {
; GCN-NEXT: s_load_dword s0, s[4:5], 0x24
; GCN-NEXT: ; implicit-def: $vcc_hi
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_cmp_eq_u32 s1, 0
+; GCN-NEXT: s_cmp_lg_u32 s1, 0
; GCN-NEXT: s_cselect_b32 s1, 1, 0
; GCN-NEXT: s_and_b32 s1, s1, 1
; GCN-NEXT: s_cmp_lg_u32 s1, 0
-; GCN-NEXT: s_cbranch_scc0 BB0_2
+; GCN-NEXT: s_cbranch_scc1 BB0_2
; GCN-NEXT: ; %bb.1: ; %mid
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: global_store_dword v[0:1], v0, off
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll
index c4ae325ba8ed..b02a296e2bdf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll
@@ -7,11 +7,11 @@ define amdgpu_kernel void @test_wave64(i32 %arg0, i64 %saved) {
; GCN-NEXT: s_load_dword s2, s[4:5], 0x0
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_cmp_eq_u32 s2, 0
+; GCN-NEXT: s_cmp_lg_u32 s2, 0
; GCN-NEXT: s_cselect_b32 s2, 1, 0
; GCN-NEXT: s_and_b32 s2, s2, 1
; GCN-NEXT: s_cmp_lg_u32 s2, 0
-; GCN-NEXT: s_cbranch_scc0 BB0_2
+; GCN-NEXT: s_cbranch_scc1 BB0_2
; GCN-NEXT: ; %bb.1: ; %mid
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: global_store_dword v[0:1], v0, off
More information about the llvm-commits
mailing list