[llvm] [NFC][AMDGPU] Construct MMO inside getTgtMemIntrinsic. (PR #83554)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 1 02:53:45 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: None (sstipanovic)
<details>
<summary>Changes</summary>
In #<!-- -->73613 it was suggested to split the PR in 2 parts. This is the first part, constructing MMO inside of getTgtMemIntrinsic.
---
Patch is 946.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83554.diff
63 Files Affected:
- (modified) llvm/include/llvm/CodeGen/TargetLowering.h (+4)
- (modified) llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp (+6-2)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+7-3)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+102-59)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll (+16-16)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll (+120-120)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll (+24-24)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll (+184-184)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.d.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.a16.ll (+9-9)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll (+54-54)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll (+15-15)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll (+9-9)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll (+10-10)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll (+27-27)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll (+20-20)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll (+22-22)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.f16.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.ll (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.format.f16.ll (+18-18)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.format.f32.ll (+10-10)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.ll (+11-11)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.f16.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.f16.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll (+9-9)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll (+10-10)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.f16.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.ll (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.format.f16.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.format.f32.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.f16.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll (+9-9)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll (+10-10)
- (modified) llvm/test/CodeGen/AMDGPU/force-store-sc0-sc1.ll (+4-9)
- (modified) llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/unsupported-image-g16.ll (+1-1)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index f2e00aab8d5da27..f58ae3acf03bc18 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -1160,6 +1161,9 @@ class TargetLoweringBase {
MaybeAlign align = Align(1); // alignment
MachineMemOperand::Flags flags = MachineMemOperand::MONone;
+
+ MachineMemOperand *MMO = nullptr;
+
IntrinsicInfo() = default;
};
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 7c986dbbc2c7c88..d9a3aab5dcc28b1 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2643,8 +2643,12 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
MPI = MachinePointerInfo(Info.ptrVal, Info.offset);
else if (Info.fallbackAddressSpace)
MPI = MachinePointerInfo(*Info.fallbackAddressSpace);
- MIB.addMemOperand(
- MF->getMachineMemOperand(MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata()));
+
+ if (Info.MMO)
+ MIB.addMemOperand(Info.MMO);
+ else
+ MIB.addMemOperand(MF->getMachineMemOperand(
+ MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata()));
}
return true;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index ab2f42d2024ccc2..459987604a4191a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5090,9 +5090,13 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
MPI = MachinePointerInfo(Info.ptrVal, Info.offset);
else if (Info.fallbackAddressSpace)
MPI = MachinePointerInfo(*Info.fallbackAddressSpace);
- Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops,
- Info.memVT, MPI, Info.align, Info.flags,
- Info.size, I.getAAMetadata());
+ if (Info.MMO)
+ Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops,
+ Info.memVT, Info.MMO);
+ else
+ Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops,
+ Info.memVT, MPI, Info.align, Info.flags,
+ Info.size, I.getAAMetadata());
} else if (!HasChain) {
Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
} else if (!I.getType()->isVoidTy()) {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 84ef9679ab95635..3dceab87fb8c931 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -17,11 +17,14 @@
#include "AMDGPUTargetMachine.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/BinaryFormat/ELF.h"
@@ -34,6 +37,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -1158,9 +1162,36 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &CI,
MachineFunction &MF,
unsigned IntrID) const {
- Info.flags = MachineMemOperand::MONone;
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone;
+ std::optional<unsigned> FallbackAddressSpace;
+ MaybeAlign BaseAlignment = Align(1);
+ PointerUnion<const Value *, const PseudoSourceValue *> PtrVal;
+ uint64_t Size = 0;
+ int Offset = 0;
+
+ auto CreateMMO = [&]() {
+ MachinePointerInfo PtrInfo;
+ if (PtrVal)
+ PtrInfo = MachinePointerInfo(PtrVal, Offset);
+ else if (FallbackAddressSpace)
+ PtrInfo = MachinePointerInfo(*FallbackAddressSpace);
+
+ if (!Size && Info.memVT.isScalableVector())
+ Size = MemoryLocation::UnknownSize;
+ else if (!Size)
+ Size = Info.memVT.getStoreSize();
+
+ Type *Ty = Info.memVT == MVT::iPTR
+ ? PointerType::get(CI.getContext(), 0)
+ : Info.memVT.getTypeForEVT(CI.getContext());
+ Align Alignment = MF.getDataLayout().getABITypeAlign(Ty);
+ return MF.getMachineMemOperand(PtrInfo, Flags, Size,
+ BaseAlignment.value_or(Alignment),
+ CI.getAAMetadata());
+ };
+
if (CI.hasMetadata(LLVMContext::MD_invariant_load))
- Info.flags |= MachineMemOperand::MOInvariant;
+ Flags |= MachineMemOperand::MOInvariant;
if (const AMDGPU::RsrcIntrinsic *RsrcIntr =
AMDGPU::lookupRsrcIntrinsic(IntrID)) {
@@ -1171,10 +1202,10 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return false;
// TODO: Should images get their own address space?
- Info.fallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
+ FallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
if (RsrcIntr->IsImage)
- Info.align.reset();
+ BaseAlignment.reset();
Value *RsrcArg = CI.getArgOperand(RsrcIntr->RsrcArg);
if (auto *RsrcPtrTy = dyn_cast<PointerType>(RsrcArg->getType())) {
@@ -1184,13 +1215,13 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
// those pointers. Cases like "this points at the same value
// but with a different offset" are handled in
// areMemAccessesTriviallyDisjoint.
- Info.ptrVal = RsrcArg;
+ PtrVal = RsrcArg;
}
auto *Aux = cast<ConstantInt>(CI.getArgOperand(CI.arg_size() - 1));
if (Aux->getZExtValue() & AMDGPU::CPol::VOLATILE)
- Info.flags |= MachineMemOperand::MOVolatile;
- Info.flags |= MachineMemOperand::MODereferenceable;
+ Flags |= MachineMemOperand::MOVolatile;
+ Flags |= MachineMemOperand::MODereferenceable;
if (ME.onlyReadsMemory()) {
unsigned MaxNumLanes = 4;
@@ -1213,7 +1244,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
// FIXME: What does alignment mean for an image?
Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.flags |= MachineMemOperand::MOLoad;
+ Flags |= MachineMemOperand::MOLoad;
} else if (ME.onlyWritesMemory()) {
Info.opc = ISD::INTRINSIC_VOID;
@@ -1225,15 +1256,17 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
} else
Info.memVT = EVT::getEVT(DataTy);
- Info.flags |= MachineMemOperand::MOStore;
+ Flags |= MachineMemOperand::MOStore;
} else {
// Atomic
Info.opc = CI.getType()->isVoidTy() ? ISD::INTRINSIC_VOID :
ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType());
- Info.flags |= MachineMemOperand::MOLoad |
- MachineMemOperand::MOStore |
- MachineMemOperand::MODereferenceable;
+ Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
+ MachineMemOperand::MODereferenceable;
+
+ // XXX - Should this be volatile without known ordering?
+ Flags |= MachineMemOperand::MOVolatile;
switch (IntrID) {
default:
@@ -1246,11 +1279,13 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
unsigned Width = cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue();
Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
- Info.ptrVal = CI.getArgOperand(1);
+ PtrVal = CI.getArgOperand(1);
+ Info.MMO = CreateMMO();
return true;
}
}
}
+ Info.MMO = CreateMMO();
return true;
}
@@ -1262,70 +1297,72 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::amdgcn_ds_fmax: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType());
- Info.ptrVal = CI.getOperand(0);
- Info.align.reset();
- Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ PtrVal = CI.getOperand(0);
+ BaseAlignment.reset();
+ Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(4));
if (!Vol->isZero())
- Info.flags |= MachineMemOperand::MOVolatile;
+ Flags |= MachineMemOperand::MOVolatile;
+ Info.MMO = CreateMMO();
return true;
}
case Intrinsic::amdgcn_buffer_atomic_fadd: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getOperand(0)->getType());
- Info.fallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
- Info.align.reset();
- Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ FallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
+ BaseAlignment.reset();
+ Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
const ConstantInt *Vol = dyn_cast<ConstantInt>(CI.getOperand(4));
if (!Vol || !Vol->isZero())
- Info.flags |= MachineMemOperand::MOVolatile;
-
+ Flags |= MachineMemOperand::MOVolatile;
+ Info.MMO = CreateMMO();
return true;
}
case Intrinsic::amdgcn_ds_add_gs_reg_rtn:
case Intrinsic::amdgcn_ds_sub_gs_reg_rtn: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getOperand(0)->getType());
- Info.ptrVal = nullptr;
- Info.fallbackAddressSpace = AMDGPUAS::STREAMOUT_REGISTER;
- Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ PtrVal = nullptr;
+ FallbackAddressSpace = AMDGPUAS::STREAMOUT_REGISTER;
+ Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ Info.MMO = CreateMMO();
return true;
}
case Intrinsic::amdgcn_ds_append:
case Intrinsic::amdgcn_ds_consume: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType());
- Info.ptrVal = CI.getOperand(0);
- Info.align.reset();
- Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ PtrVal = CI.getOperand(0);
+ BaseAlignment.reset();
+ Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(1));
if (!Vol->isZero())
- Info.flags |= MachineMemOperand::MOVolatile;
-
+ Flags |= MachineMemOperand::MOVolatile;
+ Info.MMO = CreateMMO();
return true;
}
case Intrinsic::amdgcn_global_atomic_csub: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType());
- Info.ptrVal = CI.getOperand(0);
- Info.align.reset();
- Info.flags |= MachineMemOperand::MOLoad |
- MachineMemOperand::MOStore |
- MachineMemOperand::MOVolatile;
+ PtrVal = CI.getOperand(0);
+ BaseAlignment.reset();
+ Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
+ MachineMemOperand::MOVolatile;
+ Info.MMO = CreateMMO();
return true;
}
case Intrinsic::amdgcn_image_bvh_intersect_ray: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType()); // XXX: what is correct VT?
- Info.fallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
- Info.align.reset();
- Info.flags |= MachineMemOperand::MOLoad |
- MachineMemOperand::MODereferenceable;
+ FallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
+ BaseAlignment.reset();
+ Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable;
+ Info.MMO = CreateMMO();
return true;
}
case Intrinsic::amdgcn_global_atomic_fadd:
@@ -1344,20 +1381,21 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType());
- Info.ptrVal = CI.getOperand(0);
- Info.align.reset();
- Info.flags |= MachineMemOperand::MOLoad |
- MachineMemOperand::MOStore |
- MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOVolatile;
+ PtrVal = CI.getOperand(0);
+ BaseAlignment.reset();
+ Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOVolatile;
+ Info.MMO = CreateMMO();
return true;
}
case Intrinsic::amdgcn_global_load_tr: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType());
- Info.ptrVal = CI.getOperand(0);
- Info.align.reset();
- Info.flags |= MachineMemOperand::MOLoad;
+ PtrVal = CI.getOperand(0);
+ BaseAlignment.reset();
+ Flags |= MachineMemOperand::MOLoad;
+ Info.MMO = CreateMMO();
return true;
}
case Intrinsic::amdgcn_ds_gws_init:
@@ -1372,25 +1410,29 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
static_cast<const GCNTargetMachine &>(getTargetMachine());
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- Info.ptrVal = MFI->getGWSPSV(TM);
+ PtrVal = MFI->getGWSPSV(TM);
// This is an abstract access, but we need to specify a type and size.
Info.memVT = MVT::i32;
- Info.size = 4;
- Info.align = Align(4);
+ Size = 4;
+ BaseAlignment = Align(4);
if (IntrID == Intrinsic::amdgcn_ds_gws_barrier)
- Info.flags |= MachineMemOperand::MOLoad;
+ Flags |= MachineMemOperand::MOLoad;
else
- Info.flags |= MachineMemOperand::MOStore;
+ Flags |= MachineMemOperand::MOStore;
+ Info.MMO = CreateMMO();
return true;
}
case Intrinsic::amdgcn_global_load_lds: {
Info.opc = ISD::INTRINSIC_VOID;
unsigned Width = cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue();
Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
- Info.ptrVal = CI.getArgOperand(1);
- Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+ Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
+ MachineMemOperand::MOVolatile;
+ PtrVal = CI.getArgOperand(1);
+ Info.MMO = CreateMMO();
return true;
}
case Intrinsic::amdgcn_ds_bvh_stack_rtn: {
@@ -1400,14 +1442,15 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
static_cast<const GCNTargetMachine &>(getTargetMachine());
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- Info.ptrVal = MFI->getGWSPSV(TM);
+ PtrVal = MFI->getGWSPSV(TM);
// This is an abstract access, but we need to specify a type and size.
Info.memVT = MVT::i32;
- Info.size = 4;
- Info.align = Align(4);
+ Size = 4;
+ BaseAlignment = Align(4);
- Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ Info.MMO = CreateMMO();
return true;
}
default:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll
index 9514bea86e4d171..22dccf39cff0593 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll
@@ -15,7 +15,7 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, <
; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
+ ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn
@@ -29,7 +29,7 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, <
; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
+ ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
; GFX90A_GFX940-NEXT: S_ENDPGM 0
%ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
ret void
@@ -48,7 +48,7 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val, <4
; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
+ ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
; GFX908-NEXT: S_ENDPGM 0
;
; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offen_no_rtn
@@ -63,7 +63,7 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val, <4
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
+ ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
; GFX90A_GFX940-NEXT: S_ENDPGM 0
%ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@@ -82,7 +82,7 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val, <4
; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
- ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>)...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/83554
More information about the llvm-commits
mailing list