[llvm] [AMDGPU] Add target hook to isGlobalMemoryObject (PR #112781)
Austin Kerbow via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 6 21:19:28 PST 2025
https://github.com/kerbowa updated https://github.com/llvm/llvm-project/pull/112781
>From ee4e158b2564f94e095504cf7a41ad33bf30193e Mon Sep 17 00:00:00 2001
From: Austin Kerbow <Austin.Kerbow at amd.com>
Date: Mon, 22 Apr 2024 08:49:23 -0700
Subject: [PATCH 1/2] [AMDGPU] Add target hook to isGlobalMemoryObject
We want special handing for IGLP instructions in the scheduler but they
should still be treated like they have side effects by other passes. Add
a target hook to the ScheduleDAGInstrs DAG builder so that we have more
control over this.
---
llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h | 4 +++
llvm/lib/CodeGen/ScheduleDAGInstrs.cpp | 2 +-
llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp | 21 -----------
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 26 ++++++++++++++
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 4 +++
.../AMDGPU/llvm.amdgcn.iglp.opt.exp.small.mir | 6 ++--
.../CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll | 35 +++++++++++++++++++
.../CodeGen/AMDGPU/sched-barrier-pre-RA.mir | 22 ++++++------
8 files changed, 84 insertions(+), 36 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
index 822b06f080fa64..d9737ff007a85b 100644
--- a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -374,6 +374,10 @@ namespace llvm {
void addVRegDefDeps(SUnit *SU, unsigned OperIdx);
void addVRegUseDeps(SUnit *SU, unsigned OperIdx);
+ /// Returns true if MI is an instruction we are unable to reason about
+ /// (like a call or something with unmodeled side effects).
+ virtual bool isGlobalMemoryObject(MachineInstr *MI);
+
/// Returns a mask for which lanes get read/written by the given (register)
/// machine operand.
LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const;
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 6d3b3f34e8cabc..f5b3308cb36a63 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -549,7 +549,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
/// Returns true if MI is an instruction we are unable to reason about
/// (like a call or something with unmodeled side effects).
-static inline bool isGlobalMemoryObject(MachineInstr *MI) {
+bool ScheduleDAGInstrs::isGlobalMemoryObject(MachineInstr *MI) {
return MI->isCall() || MI->hasUnmodeledSideEffects() ||
(MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad());
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
index ac01562e457f78..b5dd0d8b863315 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -239,23 +239,6 @@ class SchedGroup {
}
};
-// Remove all existing edges from a SCHED_BARRIER or SCHED_GROUP_BARRIER.
-static void resetEdges(SUnit &SU, ScheduleDAGInstrs *DAG) {
- assert(SU.getInstr()->getOpcode() == AMDGPU::SCHED_BARRIER ||
- SU.getInstr()->getOpcode() == AMDGPU::SCHED_GROUP_BARRIER ||
- SU.getInstr()->getOpcode() == AMDGPU::IGLP_OPT);
-
- while (!SU.Preds.empty())
- for (auto &P : SU.Preds)
- SU.removePred(P);
-
- while (!SU.Succs.empty())
- for (auto &S : SU.Succs)
- for (auto &SP : S.getSUnit()->Preds)
- if (SP.getSUnit() == &SU)
- S.getSUnit()->removePred(SP);
-}
-
using SUToCandSGsPair = std::pair<SUnit *, SmallVector<int, 4>>;
using SUsToCandSGsVec = SmallVector<SUToCandSGsPair, 4>;
@@ -459,7 +442,6 @@ void PipelineSolver::makePipeline() {
// Command line requested IGroupLP doesn't have SGBarr
if (!SGBarr)
continue;
- resetEdges(*SGBarr, DAG);
SG.link(*SGBarr, false);
}
}
@@ -2611,7 +2593,6 @@ void IGroupLPDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
initSchedGroupBarrierPipelineStage(R);
FoundSB = true;
} else if (Opc == AMDGPU::IGLP_OPT) {
- resetEdges(*R, DAG);
if (!FoundSB && !FoundIGLP) {
FoundIGLP = true;
ShouldApplyIGLP = initIGLPOpt(*R);
@@ -2633,7 +2614,6 @@ void IGroupLPDAGMutation::addSchedBarrierEdges(SUnit &SchedBarrier) {
assert(MI.getOpcode() == AMDGPU::SCHED_BARRIER);
// Remove all existing edges from the SCHED_BARRIER that were added due to the
// instruction having side effects.
- resetEdges(SchedBarrier, DAG);
LLVM_DEBUG(dbgs() << "Building SchedGroup for SchedBarrier with Mask: "
<< MI.getOperand(0).getImm() << "\n");
auto InvertedMask =
@@ -2691,7 +2671,6 @@ void IGroupLPDAGMutation::initSchedGroupBarrierPipelineStage(
std::vector<SUnit>::reverse_iterator RIter) {
// Remove all existing edges from the SCHED_GROUP_BARRIER that were added due
// to the instruction having side effects.
- resetEdges(*RIter, DAG);
MachineInstr &SGB = *RIter->getInstr();
assert(SGB.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER);
int32_t SGMask = SGB.getOperand(0).getImm();
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 1c23b237eaf4be..f5e0d7d5f9a73d 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -27,6 +27,7 @@
#include "AMDGPUIGroupLP.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#define DEBUG_TYPE "machine-scheduler"
@@ -1892,6 +1893,17 @@ void GCNScheduleDAGMILive::updateRegionBoundaries(
}
}
+static bool isIGLPInstr(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case AMDGPU::IGLP_OPT:
+ case AMDGPU::SCHED_BARRIER:
+ case AMDGPU::SCHED_GROUP_BARRIER:
+ return true;
+ default:
+ return false;
+ }
+}
+
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) {
return any_of(*DAG, [](MachineBasicBlock::iterator MI) {
unsigned Opc = MI->getOpcode();
@@ -1899,11 +1911,25 @@ static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) {
});
}
+bool GCNScheduleDAGMILive::isGlobalMemoryObject(MachineInstr *MI) {
+ if (isIGLPInstr(MI))
+ return false;
+
+ return ScheduleDAGInstrs::isGlobalMemoryObject(MI);
+}
+
GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive(
MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
bool RemoveKillFlags)
: ScheduleDAGMI(C, std::move(S), RemoveKillFlags) {}
+bool GCNPostScheduleDAGMILive::isGlobalMemoryObject(MachineInstr *MI) {
+ if (isIGLPInstr(MI))
+ return false;
+
+ return ScheduleDAGInstrs::isGlobalMemoryObject(MI);
+}
+
void GCNPostScheduleDAGMILive::schedule() {
HasIGLPInstrs = hasIGLPInstrs(this);
if (HasIGLPInstrs) {
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 44db834a41f828..977edfd2b9b603 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -297,6 +297,8 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
+ bool isGlobalMemoryObject(MachineInstr *MI) override;
+
public:
GCNScheduleDAGMILive(MachineSchedContext *C,
std::unique_ptr<MachineSchedStrategy> S);
@@ -490,6 +492,8 @@ class GCNPostScheduleDAGMILive final : public ScheduleDAGMI {
bool HasIGLPInstrs = false;
+ bool isGlobalMemoryObject(MachineInstr *MI) override;
+
public:
void schedule() override;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.small.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.small.mir
index bc4d35f5a1f9a8..2707c2209e7c92 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.small.mir
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.small.mir
@@ -25,9 +25,6 @@
; GCN-NEXT: ; implicit-def: $vgpr79
; GCN-NEXT: ; implicit-def: $vgpr80
; GCN-NEXT: ; implicit-def: $vgpr91
- ; GCN-NEXT: ;;#ASMSTART
- ; GCN-NEXT: s_waitcnt vmcnt(8)
- ; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: ; kill: killed $sgpr16_sgpr17_sgpr18_sgpr19
; GCN-NEXT: ; iglp_opt mask(0x00000002)
; GCN-NEXT: s_nop 1
@@ -477,6 +474,9 @@
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: buffer_inv sc0 sc1
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[4:5], v[8:9], v[32:47]
+ ; GCN-NEXT: ;;#ASMSTART
+ ; GCN-NEXT: s_waitcnt vmcnt(8)
+ ; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: v_mov_b32_e32 v4, 0
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[6:7], v[0:1], v[32:47]
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[2:3], v[0:1], v[48:63]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
index b532aa9cd7e86a..ae949b1efef001 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
@@ -285,6 +285,41 @@ entry:
ret void
}
+define amdgpu_kernel void @test_iglp_opt_asm_sideeffect(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) #0 {
+; GCN-LABEL: test_iglp_opt_asm_sideeffect:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT: v_and_b32_e32 v0, 0xffc, v0
+; GCN-NEXT: ; iglp_opt mask(0x00000000)
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: v_add_u32_e32 v1, s0, v0
+; GCN-NEXT: ds_read_b32 v1, v1
+; GCN-NEXT: v_add_u32_e32 v0, s1, v0
+; GCN-NEXT: v_mov_b32_e32 v2, s0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: ds_write_b32 v0, v1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ds_read_b32 v0, v2 offset:256
+; GCN-NEXT: v_mov_b32_e32 v1, s1
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: ds_write_b32 v1, v0 offset:256
+; GCN-NEXT: s_endpgm
+entry:
+ %idx = call i32 @llvm.amdgcn.workitem.id.x()
+ %load.0.addr = getelementptr float, ptr addrspace(3) %in, i32 %idx
+ %load.0 = load float, ptr addrspace(3) %load.0.addr
+ %store.0.addr = getelementptr float, ptr addrspace(3) %out, i32 %idx
+ store float %load.0, ptr addrspace(3) %store.0.addr
+ call void asm sideeffect "", ""() #1
+ call void @llvm.amdgcn.iglp.opt(i32 0) #1
+ %load.1.addr = getelementptr float, ptr addrspace(3) %in, i32 64
+ %load.1 = load float, ptr addrspace(3) %load.1.addr
+ %store.1.addr = getelementptr float, ptr addrspace(3) %out, i32 64
+ store float %load.1, ptr addrspace(3) %store.1.addr
+ ret void
+}
declare void @llvm.amdgcn.iglp.opt(i32) #1
declare i32 @llvm.amdgcn.workitem.id.x() #1
diff --git a/llvm/test/CodeGen/AMDGPU/sched-barrier-pre-RA.mir b/llvm/test/CodeGen/AMDGPU/sched-barrier-pre-RA.mir
index bdfc8227fdccb1..7295506213c4b2 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-barrier-pre-RA.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-barrier-pre-RA.mir
@@ -96,10 +96,10 @@ body: |
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: SCHED_BARRIER 1
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
- ; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:sreg_64_xexec_xnull = IMPLICIT_DEF
@@ -163,19 +163,19 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
- ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF2]], 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
- ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_1:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF2]], 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec
- ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_2:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_1]], 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_1:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_MUL_LO_U32_e64_2:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec
- ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_3:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_2]], 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_2:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_1]], 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
+ ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_3:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_2]], 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_4:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_3]], 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 4
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[V_MUL_LO_U32_e64_3:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
- ; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_3]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
%0:sreg_64_xexec_xnull = IMPLICIT_DEF
@@ -258,10 +258,10 @@ body: |
; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
- ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_NOP 0
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: SCHED_BARRIER 16
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
@@ -290,10 +290,10 @@ body: |
; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
- ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: S_NOP 0
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: SCHED_BARRIER 32
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
@@ -354,9 +354,9 @@ body: |
; CHECK: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_]], [[DS_READ_U16_gfx9_]], implicit $exec
- ; CHECK-NEXT: [[DS_READ_U16_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
; CHECK-NEXT: S_NOP 0
+ ; CHECK-NEXT: [[DS_READ_U16_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
; CHECK-NEXT: SCHED_BARRIER 128
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_1]], [[DS_READ_U16_gfx9_1]], implicit $exec
; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
@@ -386,9 +386,9 @@ body: |
; CHECK: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_]], [[DS_READ_U16_gfx9_]], implicit $exec
- ; CHECK-NEXT: [[DS_READ_U16_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
; CHECK-NEXT: S_NOP 0
+ ; CHECK-NEXT: [[DS_READ_U16_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
; CHECK-NEXT: SCHED_BARRIER 256
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_1]], [[DS_READ_U16_gfx9_1]], implicit $exec
; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
@@ -453,7 +453,6 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
; CHECK-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
- ; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF2]], 0, 0, 0, implicit $mode, implicit $exec
@@ -462,6 +461,7 @@ body: |
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
; CHECK-NEXT: SCHED_BARRIER 12
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_2:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_1]], 0, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: SCHED_BARRIER 8
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
>From 37416aab303accdcc129c98e942eb93c35eaf3c1 Mon Sep 17 00:00:00 2001
From: Austin Kerbow <Austin.Kerbow at amd.com>
Date: Mon, 6 Jan 2025 20:43:03 -0800
Subject: [PATCH 2/2] Based on offline discussion moved function to TII. Fix
ambiguity in IGLP instr utility functions.
---
llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h | 4 --
llvm/include/llvm/CodeGen/TargetInstrInfo.h | 4 ++
llvm/lib/CodeGen/ScheduleDAGInstrs.cpp | 10 ++---
llvm/lib/CodeGen/TargetInstrInfo.cpp | 5 +++
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 38 ++++---------------
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 4 --
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 7 ++++
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 9 +++++
.../CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll | 2 +-
9 files changed, 37 insertions(+), 46 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
index d9737ff007a85b..822b06f080fa64 100644
--- a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -374,10 +374,6 @@ namespace llvm {
void addVRegDefDeps(SUnit *SU, unsigned OperIdx);
void addVRegUseDeps(SUnit *SU, unsigned OperIdx);
- /// Returns true if MI is an instruction we are unable to reason about
- /// (like a call or something with unmodeled side effects).
- virtual bool isGlobalMemoryObject(MachineInstr *MI);
-
/// Returns a mask for which lanes get read/written by the given (register)
/// machine operand.
LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const;
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 408adcd330b846..165af902e42d08 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -136,6 +136,10 @@ class TargetInstrInfo : public MCInstrInfo {
const TargetRegisterInfo *TRI,
const MachineFunction &MF) const;
+ /// Returns true if MI is an instruction we are unable to reason about
+ /// (like a call or something with unmodeled side effects).
+ virtual bool isGlobalMemoryObject(const MachineInstr *MI) const;
+
/// Return true if the instruction is trivially rematerializable, meaning it
/// has no side effects and requires no operands that aren't always available.
/// This means the only allowed uses are constants and unallocatable physical
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index f5b3308cb36a63..8e3e06bf57153e 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -35,6 +35,7 @@
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleDFS.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
@@ -547,12 +548,6 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
}
}
-/// Returns true if MI is an instruction we are unable to reason about
-/// (like a call or something with unmodeled side effects).
-bool ScheduleDAGInstrs::isGlobalMemoryObject(MachineInstr *MI) {
- return MI->isCall() || MI->hasUnmodeledSideEffects() ||
- (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad());
-}
void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
unsigned Latency) {
@@ -899,8 +894,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA,
// isLoadFromStackSLot are not usable after stack slots are lowered to
// actual addresses).
+ const TargetInstrInfo *TII = ST.getInstrInfo();
// This is a barrier event that acts as a pivotal node in the DAG.
- if (isGlobalMemoryObject(&MI)) {
+ if (TII->isGlobalMemoryObject(&MI)) {
// Become the barrier chain.
if (BarrierChain)
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 38bd0b0ba4114c..770b851f3607a0 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -1917,3 +1917,8 @@ bool TargetInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
}
return true;
}
+
+bool TargetInstrInfo::isGlobalMemoryObject(const MachineInstr *MI) const {
+ return MI->isCall() || MI->hasUnmodeledSideEffects() ||
+ (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad());
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index f5e0d7d5f9a73d..f5bbc5482d347c 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -27,7 +27,6 @@
#include "AMDGPUIGroupLP.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#define DEBUG_TYPE "machine-scheduler"
@@ -189,6 +188,12 @@ static void getRegisterPressures(
Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = NewPressure.getAGPRNum();
}
+// Return true if the instruction is mutually exclusive with all non-IGLP DAG
+// mutations, requiring all other mutations to be disabled.
+static bool isIGLPMutationOnly(unsigned Opcode) {
+ return Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;
+}
+
void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
bool AtTop,
const RegPressureTracker &RPTracker,
@@ -1153,8 +1158,7 @@ bool GCNSchedStage::initGCNRegion() {
StageID == GCNSchedStageID::ILPInitialSchedule) {
for (auto &I : DAG) {
Unsched.push_back(&I);
- if (I.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER ||
- I.getOpcode() == AMDGPU::IGLP_OPT)
+ if (isIGLPMutationOnly(I.getOpcode()))
DAG.RegionsWithIGLPInstrs[RegionIdx] = true;
}
} else {
@@ -1893,43 +1897,17 @@ void GCNScheduleDAGMILive::updateRegionBoundaries(
}
}
-static bool isIGLPInstr(MachineInstr *MI) {
- switch (MI->getOpcode()) {
- case AMDGPU::IGLP_OPT:
- case AMDGPU::SCHED_BARRIER:
- case AMDGPU::SCHED_GROUP_BARRIER:
- return true;
- default:
- return false;
- }
-}
-
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) {
return any_of(*DAG, [](MachineBasicBlock::iterator MI) {
- unsigned Opc = MI->getOpcode();
- return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT;
+ return isIGLPMutationOnly(MI->getOpcode());
});
}
-bool GCNScheduleDAGMILive::isGlobalMemoryObject(MachineInstr *MI) {
- if (isIGLPInstr(MI))
- return false;
-
- return ScheduleDAGInstrs::isGlobalMemoryObject(MI);
-}
-
GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive(
MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
bool RemoveKillFlags)
: ScheduleDAGMI(C, std::move(S), RemoveKillFlags) {}
-bool GCNPostScheduleDAGMILive::isGlobalMemoryObject(MachineInstr *MI) {
- if (isIGLPInstr(MI))
- return false;
-
- return ScheduleDAGInstrs::isGlobalMemoryObject(MI);
-}
-
void GCNPostScheduleDAGMILive::schedule() {
HasIGLPInstrs = hasIGLPInstrs(this);
if (HasIGLPInstrs) {
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 977edfd2b9b603..44db834a41f828 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -297,8 +297,6 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
- bool isGlobalMemoryObject(MachineInstr *MI) override;
-
public:
GCNScheduleDAGMILive(MachineSchedContext *C,
std::unique_ptr<MachineSchedStrategy> S);
@@ -492,8 +490,6 @@ class GCNPostScheduleDAGMILive final : public ScheduleDAGMI {
bool HasIGLPInstrs = false;
- bool isGlobalMemoryObject(MachineInstr *MI) override;
-
public:
void schedule() override;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index e6f333fbb87843..8fc32d9e60bf20 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -10051,3 +10051,10 @@ void SIInstrInfo::enforceOperandRCAlignment(MachineInstr &MI,
Op.setSubReg(AMDGPU::sub0);
MI.addOperand(MachineOperand::CreateReg(NewVR, false, true));
}
+
+bool SIInstrInfo::isGlobalMemoryObject(const MachineInstr *MI) const {
+ if (isIGLP(*MI))
+ return false;
+
+ return TargetInstrInfo::isGlobalMemoryObject(MI);
+}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 8f9ca6141816d4..d49939bf81b106 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -242,6 +242,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0,
int64_t &Offset1) const override;
+ bool isGlobalMemoryObject(const MachineInstr *MI) const override;
+
bool getMemOperandsWithOffsetWidth(
const MachineInstr &LdSt,
SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
@@ -968,6 +970,13 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead;
}
+ bool isIGLP(unsigned Opcode) const {
+ return Opcode == AMDGPU::SCHED_BARRIER ||
+ Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;
+ }
+
+ bool isIGLP(const MachineInstr &MI) const { return isIGLP(MI.getOpcode()); }
+
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode) {
switch (Opcode) {
case AMDGPU::S_WAITCNT_soft:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
index ae949b1efef001..08c0d154329155 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
@@ -288,7 +288,7 @@ entry:
define amdgpu_kernel void @test_iglp_opt_asm_sideeffect(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) #0 {
; GCN-LABEL: test_iglp_opt_asm_sideeffect:
; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_and_b32_e32 v0, 0xffc, v0
; GCN-NEXT: ; iglp_opt mask(0x00000000)
More information about the llvm-commits
mailing list