[llvm] ddd3807 - [AMDGPU] Use new target MMO flag MONoClobber
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 2 09:12:46 PST 2022
Author: Jay Foad
Date: 2022-02-02T17:12:36Z
New Revision: ddd3807e6952dedfd842b3b10076a614f75c1979
URL: https://github.com/llvm/llvm-project/commit/ddd3807e6952dedfd842b3b10076a614f75c1979
DIFF: https://github.com/llvm/llvm-project/commit/ddd3807e6952dedfd842b3b10076a614f75c1979.diff
LOG: [AMDGPU] Use new target MMO flag MONoClobber
This allows us to set the noclobber flag on (the MMO of) a load
instruction instead of on the pointer. This fixes a bug where noclobber
was being applied to all loads from the same pointer, even if some of
them were clobbered.
Differential Revision: https://reviews.llvm.org/D118775
Added:
llvm/test/CodeGen/MIR/AMDGPU/target-memoperands.mir
Modified:
llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.h
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.h
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
llvm/test/CodeGen/AMDGPU/annotate-noclobber.ll
llvm/test/CodeGen/AMDGPU/global_smrd.ll
llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index bebf032b5535a..b7506f2d1baa2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -195,11 +195,11 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
}
}
- if (PtrI) {
+ if (PtrI)
setUniformMetadata(PtrI);
- if (NotClobbered)
- setNoClobberMetadata(PtrI);
- }
+
+ if (NotClobbered)
+ setNoClobberMetadata(&I);
}
bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index de2dccef804ac..31097a4fc3094 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -428,11 +428,6 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects(
}
}
-static bool memOpHasNoClobbered(const MachineMemOperand *MMO) {
- const Instruction *I = dyn_cast_or_null<Instruction>(MMO->getValue());
- return I && I->getMetadata("amdgpu.noclobber");
-}
-
// FIXME: Returns uniform if there's no source value information. This is
// probably wrong.
static bool isScalarLoadLegal(const MachineInstr &MI) {
@@ -451,7 +446,7 @@ static bool isScalarLoadLegal(const MachineInstr &MI) {
// spaces.
(IsConst || !MMO->isVolatile()) &&
// Memory must be known constant, or not written before this load.
- (IsConst || MMO->isInvariant() || memOpHasNoClobbered(MMO)) &&
+ (IsConst || MMO->isInvariant() || (MMO->getFlags() & MONoClobber)) &&
AMDGPUInstrInfo::isUniformMMO(MMO);
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e2f4a0896bc32..dc89a3f2554be 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1639,9 +1639,7 @@ EVT SITargetLowering::getOptimalMemOpType(
bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const {
const MemSDNode *MemNode = cast<MemSDNode>(N);
- const Value *Ptr = MemNode->getMemOperand()->getValue();
- const Instruction *I = dyn_cast_or_null<Instruction>(Ptr);
- return I && I->getMetadata("amdgpu.noclobber");
+ return MemNode->getMemOperand()->getFlags() & MONoClobber;
}
bool SITargetLowering::isNonGlobalAddrSpace(unsigned AS) {
@@ -12612,3 +12610,11 @@ bool SITargetLowering::isReassocProfitable(SelectionDAG &DAG, SDValue N0,
return (DAG.isBaseWithConstantOffset(N0) &&
hasMemSDNodeUser(*N0->use_begin()));
}
+
+MachineMemOperand::Flags
+SITargetLowering::getTargetMMOFlags(const Instruction &I) const {
+ // Propagate metadata set by AMDGPUAnnotateUniformValues to the MMO of a load.
+ if (I.getMetadata("amdgpu.noclobber"))
+ return MONoClobber;
+ return MachineMemOperand::MONone;
+}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index bf81e082b478e..98e6b9bbc2ebf 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -505,6 +505,9 @@ class SITargetLowering final : public AMDGPUTargetLowering {
std::pair<InstructionCost, MVT> getTypeLegalizationCost(const DataLayout &DL,
Type *Ty) const;
+
+ MachineMemOperand::Flags
+ getTargetMMOFlags(const Instruction &I) const override;
};
} // End namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0a2f9381e71fd..b422750f8156b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -7513,6 +7513,16 @@ SIInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
return makeArrayRef(TargetFlags);
}
+ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
+SIInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
+ static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
+ {
+ {MONoClobber, "amdgpu-noclobber"},
+ };
+
+ return makeArrayRef(TargetFlags);
+}
+
bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const {
return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
MI.modifiesRegister(AMDGPU::EXEC, &RI);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index e551d6c7223fd..239980e1e9f78 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -35,6 +35,11 @@ class RegScavenger;
class TargetRegisterClass;
class ScheduleHazardRecognizer;
+/// Mark the MMO of a uniform load if there are no potentially clobbering stores
+/// on any path from the start of an entry function to this load.
+static const MachineMemOperand::Flags MONoClobber =
+ MachineMemOperand::MOTargetFlag1;
+
class SIInstrInfo final : public AMDGPUGenInstrInfo {
private:
const SIRegisterInfo RI;
@@ -1036,6 +1041,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
ArrayRef<std::pair<unsigned, const char *>>
getSerializableDirectMachineOperandTargetFlags() const override;
+ ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
+ getSerializableMachineMemOperandTargetFlags() const override;
+
ScheduleHazardRecognizer *
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const override;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
index 8992621229a84..edcc3033a97dc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
@@ -1678,7 +1678,7 @@ define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out
; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
; HSA-VI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[ADDRSPACE_CAST]](p1)
- ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (dereferenceable load (s32) from %ir.1, addrspace 1)
+ ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (dereferenceable "amdgpu-noclobber" load (s32) from %ir.1, addrspace 1)
; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
; HSA-VI-NEXT: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: byref_global_i32_arg
@@ -1693,7 +1693,7 @@ define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out
; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
; LEGACY-MESA-VI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[ADDRSPACE_CAST]](p1)
- ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (dereferenceable load (s32) from %ir.1, addrspace 1)
+ ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (dereferenceable "amdgpu-noclobber" load (s32) from %ir.1, addrspace 1)
; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
%in = load i32, i32 addrspace(1)* %in.byref
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
index baf0b4d29d819..f15d00cd6881c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -3213,7 +3213,7 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
- ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1)
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x s32>) from %ir.ptr, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i32
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -3349,7 +3349,7 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
- ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1)
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x s32>) from %ir.ptr, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i32
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -3423,7 +3423,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
- ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1)
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -3518,7 +3518,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
- ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr0, addrspace 1)
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1)
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p1) :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i32
@@ -3618,7 +3618,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF1]](p1)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
- ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr0, addrspace 1)
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1)
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[DEF1]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1)
; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY10]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
@@ -3729,7 +3729,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 {
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF1]](p1)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
- ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr0, addrspace 1)
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1)
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p3) = G_LOAD [[DEF1]](p1) :: (load (p3) from `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1)
; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(p5) = G_LOAD [[COPY10]](p1) :: (load (p5) from `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
@@ -3832,10 +3832,10 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4)
- ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1)
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (s8) from %ir.ptr0, align 4, addrspace 1)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
- ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr0 + 4, addrspace 1)
+ ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: ("amdgpu-noclobber" load (s32) from %ir.ptr0 + 4, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_struct_i8_i32
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -4134,7 +4134,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 {
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<2 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
- ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<2 x s8>) from %ir.ptr, addrspace 1)
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<2 x s8>) from %ir.ptr, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i8
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -4198,7 +4198,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 {
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<3 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
- ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s8>) from %ir.ptr, align 4, addrspace 1)
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<3 x s8>) from %ir.ptr, align 4, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i8
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -4265,7 +4265,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 {
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
- ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s8>) from %ir.ptr, addrspace 1)
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<4 x s8>) from %ir.ptr, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i8
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -4335,7 +4335,7 @@ define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 {
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<8 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
- ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s8>) from %ir.ptr, addrspace 1)
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x s8>) from %ir.ptr, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i8
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -4417,7 +4417,7 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
- ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s8>) from %ir.ptr, addrspace 1)
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x s8>) from %ir.ptr, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i8
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-noclobber.ll b/llvm/test/CodeGen/AMDGPU/annotate-noclobber.ll
index 580ea202addd6..48e266ae32301 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-noclobber.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-noclobber.ll
@@ -3,8 +3,8 @@ target datalayout = "A5"
; OPT-LABEL: @amdgpu_noclobber_global(
-; OPT: %addr = getelementptr i32, i32 addrspace(1)* %in, i64 0, !amdgpu.uniform !0, !amdgpu.noclobber !0
-; OPT-NEXT: %load = load i32, i32 addrspace(1)* %addr, align 4
+; OPT: %addr = getelementptr i32, i32 addrspace(1)* %in, i64 0, !amdgpu.uniform !0
+; OPT-NEXT: %load = load i32, i32 addrspace(1)* %addr, align 4, !amdgpu.noclobber !0
define amdgpu_kernel void @amdgpu_noclobber_global( i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
entry:
%addr = getelementptr i32, i32 addrspace(1)* %in, i64 0
diff --git a/llvm/test/CodeGen/AMDGPU/global_smrd.ll b/llvm/test/CodeGen/AMDGPU/global_smrd.ll
index 56d4b0faa4d30..533a1be4e5f08 100644
--- a/llvm/test/CodeGen/AMDGPU/global_smrd.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_smrd.ll
@@ -24,12 +24,11 @@ bb:
}
; uniform loads before and after an aliasing store
-; FIXME: The second load should not be converted to an SMEM load!
; CHECK-LABEL: @uniform_load_store_load
; CHECK: s_load_dwordx4
; CHECK: s_load_dword
; CHECK: flat_store_dword
-; CHECK: s_load_dword
+; CHECK: flat_load_dword
; CHECK: flat_store_dword
define amdgpu_kernel void @uniform_load_store_load(float addrspace(1)* %arg0, float addrspace(1)* %arg1) {
diff --git a/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll b/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
index 8858bca2934a3..691d699707c86 100644
--- a/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
@@ -20,14 +20,14 @@
define amdgpu_kernel void @simple_barrier(i32 addrspace(1)* %arg) {
; CHECK-LABEL: @simple_barrier(
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[ARG:%.*]], i64 0, !amdgpu.uniform !0, !amdgpu.noclobber !0
-; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[ARG:%.*]], i64 0, !amdgpu.uniform !0
+; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence syncscope("workgroup") acquire
; CHECK-NEXT: tail call void @llvm.amdgcn.wave.barrier()
-; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 1, !amdgpu.uniform !0, !amdgpu.noclobber !0
-; CHECK-NEXT: [[I2:%.*]] = load i32, i32 addrspace(1)* [[I1]], align 4
+; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 1, !amdgpu.uniform !0
+; CHECK-NEXT: [[I2:%.*]] = load i32, i32 addrspace(1)* [[I1]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]]
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 2
; CHECK-NEXT: store i32 [[I3]], i32 addrspace(1)* [[I4]], align 4
@@ -59,8 +59,8 @@ bb:
define amdgpu_kernel void @memory_phi_no_clobber(i32 addrspace(1)* %arg) {
; CHECK-LABEL: @memory_phi_no_clobber(
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[ARG:%.*]], i64 0, !amdgpu.uniform !0, !amdgpu.noclobber !0
-; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[ARG:%.*]], i64 0, !amdgpu.uniform !0
+; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform !0
; CHECK: if.then:
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
@@ -69,8 +69,8 @@ define amdgpu_kernel void @memory_phi_no_clobber(i32 addrspace(1)* %arg) {
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: br label [[IF_END]], !amdgpu.uniform !0
; CHECK: if.end:
-; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 1, !amdgpu.uniform !0, !amdgpu.noclobber !0
-; CHECK-NEXT: [[I2:%.*]] = load i32, i32 addrspace(1)* [[I1]], align 4
+; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 1, !amdgpu.uniform !0
+; CHECK-NEXT: [[I2:%.*]] = load i32, i32 addrspace(1)* [[I1]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]]
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 2
; CHECK-NEXT: store i32 [[I3]], i32 addrspace(1)* [[I4]], align 4
@@ -106,8 +106,8 @@ if.end:
define amdgpu_kernel void @memory_phi_clobber1(i32 addrspace(1)* %arg) {
; CHECK-LABEL: @memory_phi_clobber1(
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[ARG:%.*]], i64 0, !amdgpu.uniform !0, !amdgpu.noclobber !0
-; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[ARG:%.*]], i64 0, !amdgpu.uniform !0
+; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform !0
; CHECK: if.then:
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 3
@@ -155,8 +155,8 @@ if.end:
define amdgpu_kernel void @memory_phi_clobber2(i32 addrspace(1)* %arg) {
; CHECK-LABEL: @memory_phi_clobber2(
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[ARG:%.*]], i64 0, !amdgpu.uniform !0, !amdgpu.noclobber !0
-; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[ARG:%.*]], i64 0, !amdgpu.uniform !0
+; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform !0
; CHECK: if.then:
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
@@ -203,12 +203,12 @@ if.end:
define amdgpu_kernel void @no_clobbering_loop1(i32 addrspace(1)* %arg, i1 %cc) {
; CHECK-LABEL: @no_clobbering_loop1(
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[ARG:%.*]], i64 0, !amdgpu.uniform !0, !amdgpu.noclobber !0
-; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[ARG:%.*]], i64 0, !amdgpu.uniform !0
+; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: br label [[WHILE_COND:%.*]], !amdgpu.uniform !0
; CHECK: while.cond:
-; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 1, !amdgpu.uniform !0, !amdgpu.noclobber !0
-; CHECK-NEXT: [[I2:%.*]] = load i32, i32 addrspace(1)* [[I1]], align 4
+; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 1, !amdgpu.uniform !0
+; CHECK-NEXT: [[I2:%.*]] = load i32, i32 addrspace(1)* [[I1]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]]
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 2
; CHECK-NEXT: store i32 [[I3]], i32 addrspace(1)* [[I4]], align 4
@@ -242,14 +242,14 @@ end:
define amdgpu_kernel void @no_clobbering_loop2(i32 addrspace(1)* noalias %arg, i32 addrspace(1)* noalias %out, i32 %n) {
; CHECK-LABEL: @no_clobbering_loop2(
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[ARG:%.*]], i64 0, !amdgpu.uniform !0, !amdgpu.noclobber !0
-; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[ARG:%.*]], i64 0, !amdgpu.uniform !0
+; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: br label [[WHILE_COND:%.*]], !amdgpu.uniform !0
; CHECK: while.cond:
; CHECK-NEXT: [[C:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
; CHECK-NEXT: [[ACC:%.*]] = phi i32 [ [[I]], [[BB]] ], [ [[I3:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i32 [[C]], !amdgpu.uniform !0, !amdgpu.noclobber !0
-; CHECK-NEXT: [[I2:%.*]] = load i32, i32 addrspace(1)* [[I1]], align 4
+; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i32 [[C]], !amdgpu.uniform !0
+; CHECK-NEXT: [[I2:%.*]] = load i32, i32 addrspace(1)* [[I1]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: [[I3]] = add i32 [[I2]], [[ACC]]
; CHECK-NEXT: tail call void @llvm.amdgcn.wave.barrier()
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[C]], 1
@@ -286,8 +286,8 @@ end:
define amdgpu_kernel void @clobbering_loop(i32 addrspace(1)* %arg, i32 addrspace(1)* %out, i1 %cc) {
; CHECK-LABEL: @clobbering_loop(
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[ARG:%.*]], i64 0, !amdgpu.uniform !0, !amdgpu.noclobber !0
-; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[ARG:%.*]], i64 0, !amdgpu.uniform !0
+; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: br label [[WHILE_COND:%.*]], !amdgpu.uniform !0
; CHECK: while.cond:
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 1, !amdgpu.uniform !0
@@ -325,10 +325,10 @@ end:
define amdgpu_kernel void @clobber_by_atomic_load(i32 addrspace(1)* %arg) {
; CHECK-LABEL: @clobber_by_atomic_load(
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[ARG:%.*]], i64 0, !amdgpu.uniform !0, !amdgpu.noclobber !0
-; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 2, !amdgpu.uniform !0, !amdgpu.noclobber !0
-; CHECK-NEXT: [[VAL:%.*]] = load atomic i32, i32 addrspace(1)* [[GEP]] seq_cst, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[ARG:%.*]], i64 0, !amdgpu.uniform !0
+; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4, !amdgpu.noclobber !0
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 2, !amdgpu.uniform !0
+; CHECK-NEXT: [[VAL:%.*]] = load atomic i32, i32 addrspace(1)* [[GEP]] seq_cst, align 4, !amdgpu.noclobber !0
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 3, !amdgpu.uniform !0
; CHECK-NEXT: [[I2:%.*]] = load i32, i32 addrspace(1)* [[I1]], align 4
; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]]
@@ -361,8 +361,8 @@ define protected amdgpu_kernel void @no_alias_store(i32 addrspace(1)* %in, i32 a
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence syncscope("workgroup") acquire
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0, !amdgpu.noclobber !0
-; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
+; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
@@ -418,8 +418,8 @@ define protected amdgpu_kernel void @no_alias_volatile_store(i32 addrspace(1)* %
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence syncscope("workgroup") acquire
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0, !amdgpu.noclobber !0
-; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
+; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
@@ -443,8 +443,8 @@ define protected amdgpu_kernel void @no_alias_atomic_rmw_relaxed(i32 addrspace(1
; CHECK-LABEL: @no_alias_atomic_rmw_relaxed(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add i32 addrspace(3)* @LDS, i32 5 monotonic, align 4
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0, !amdgpu.noclobber !0
-; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
+; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
@@ -468,8 +468,8 @@ define protected amdgpu_kernel void @no_alias_atomic_cmpxchg(i32 addrspace(1)* %
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence syncscope("workgroup") acquire
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0, !amdgpu.noclobber !0
-; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
+; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
@@ -496,8 +496,8 @@ define protected amdgpu_kernel void @no_alias_atomic_rmw(i32 addrspace(1)* %in,
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence syncscope("workgroup") acquire
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0, !amdgpu.noclobber !0
-; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
+; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
@@ -613,8 +613,8 @@ define protected amdgpu_kernel void @no_alias_atomic_rmw_then_no_alias_store(i32
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence syncscope("workgroup") acquire
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0, !amdgpu.noclobber !0
-; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
+; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/target-memoperands.mir b/llvm/test/CodeGen/MIR/AMDGPU/target-memoperands.mir
new file mode 100644
index 0000000000000..e91c015d95682
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/AMDGPU/target-memoperands.mir
@@ -0,0 +1,14 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -run-pass none -o - %s | FileCheck %s
+
+---
+name: target_memoperands
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+ ; CHECK-LABEL: name: target_memoperands
+ ; CHECK: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: ("amdgpu-noclobber" load (s32))
+ %0:_(p4) = COPY $sgpr0_sgpr1
+ %1:_(s32) = G_LOAD %0 :: ("amdgpu-noclobber" load (s32))
+...
More information about the llvm-commits
mailing list