[llvm] [NFC][AMDGPU] Construct MMO inside getTgtMemIntrinsic. (PR #83554)

via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 1 02:53:45 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: None (sstipanovic)

<details>
<summary>Changes</summary>

In #<!-- -->73613 it was suggested to split the PR in 2 parts. This is the first part, constructing MMO inside of getTgtMemIntrinsic.

---

Patch is 946.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83554.diff


63 Files Affected:

- (modified) llvm/include/llvm/CodeGen/TargetLowering.h (+4) 
- (modified) llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp (+6-2) 
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+7-3) 
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+102-59) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll (+16-16) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll (+8-8) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll (+8-8) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll (+120-120) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll (+24-24) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll (+12-12) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll (+184-184) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.d.ll (+12-12) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.a16.ll (+9-9) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll (+54-54) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll (+15-15) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll (+9-9) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll (+8-8) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll (+10-10) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll (+27-27) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll (+20-20) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll (+22-22) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.f16.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.ll (+5-5) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.format.f16.ll (+18-18) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.format.f32.ll (+10-10) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.ll (+11-11) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.f16.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.ll (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.f16.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.ll (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll (+9-9) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll (+12-12) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll (+10-10) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll (+12-12) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.f16.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.ll (+5-5) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.format.f16.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.format.f32.ll (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.f16.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll (+9-9) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll (+8-8) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll (+8-8) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll (+10-10) 
- (modified) llvm/test/CodeGen/AMDGPU/force-store-sc0-sc1.ll (+4-9) 
- (modified) llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/unsupported-image-g16.ll (+1-1) 


``````````diff
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index f2e00aab8d5da27..f58ae3acf03bc18 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -30,6 +30,7 @@
 #include "llvm/CodeGen/DAGCombine.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RuntimeLibcalls.h"
 #include "llvm/CodeGen/SelectionDAG.h"
@@ -1160,6 +1161,9 @@ class TargetLoweringBase {
     MaybeAlign align = Align(1);   // alignment
 
     MachineMemOperand::Flags flags = MachineMemOperand::MONone;
+
+    MachineMemOperand *MMO = nullptr;
+
     IntrinsicInfo() = default;
   };
 
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 7c986dbbc2c7c88..d9a3aab5dcc28b1 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2643,8 +2643,12 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
       MPI = MachinePointerInfo(Info.ptrVal, Info.offset);
     else if (Info.fallbackAddressSpace)
       MPI = MachinePointerInfo(*Info.fallbackAddressSpace);
-    MIB.addMemOperand(
-        MF->getMachineMemOperand(MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata()));
+
+    if (Info.MMO)
+      MIB.addMemOperand(Info.MMO);
+    else
+      MIB.addMemOperand(MF->getMachineMemOperand(
+          MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata()));
   }
 
   return true;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index ab2f42d2024ccc2..459987604a4191a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5090,9 +5090,13 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
       MPI = MachinePointerInfo(Info.ptrVal, Info.offset);
     else if (Info.fallbackAddressSpace)
       MPI = MachinePointerInfo(*Info.fallbackAddressSpace);
-    Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops,
-                                     Info.memVT, MPI, Info.align, Info.flags,
-                                     Info.size, I.getAAMetadata());
+    if (Info.MMO)
+      Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops,
+                                       Info.memVT, Info.MMO);
+    else
+      Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops,
+                                       Info.memVT, MPI, Info.align, Info.flags,
+                                       Info.size, I.getAAMetadata());
   } else if (!HasChain) {
     Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
   } else if (!I.getType()->isVoidTy()) {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 84ef9679ab95635..3dceab87fb8c931 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -17,11 +17,14 @@
 #include "AMDGPUTargetMachine.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
 #include "SIMachineFunctionInfo.h"
 #include "SIRegisterInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/FloatingPointMode.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/UniformityAnalysis.h"
 #include "llvm/BinaryFormat/ELF.h"
@@ -34,6 +37,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -1158,9 +1162,36 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
                                           const CallInst &CI,
                                           MachineFunction &MF,
                                           unsigned IntrID) const {
-  Info.flags = MachineMemOperand::MONone;
+  MachineMemOperand::Flags Flags = MachineMemOperand::MONone;
+  std::optional<unsigned> FallbackAddressSpace;
+  MaybeAlign BaseAlignment = Align(1);
+  PointerUnion<const Value *, const PseudoSourceValue *> PtrVal;
+  uint64_t Size = 0;
+  int Offset = 0;
+
+  auto CreateMMO = [&]() {
+    MachinePointerInfo PtrInfo;
+    if (PtrVal)
+      PtrInfo = MachinePointerInfo(PtrVal, Offset);
+    else if (FallbackAddressSpace)
+      PtrInfo = MachinePointerInfo(*FallbackAddressSpace);
+
+    if (!Size && Info.memVT.isScalableVector())
+      Size = MemoryLocation::UnknownSize;
+    else if (!Size)
+      Size = Info.memVT.getStoreSize();
+
+    Type *Ty = Info.memVT == MVT::iPTR
+                   ? PointerType::get(CI.getContext(), 0)
+                   : Info.memVT.getTypeForEVT(CI.getContext());
+    Align Alignment = MF.getDataLayout().getABITypeAlign(Ty);
+    return MF.getMachineMemOperand(PtrInfo, Flags, Size,
+                                   BaseAlignment.value_or(Alignment),
+                                   CI.getAAMetadata());
+  };
+
   if (CI.hasMetadata(LLVMContext::MD_invariant_load))
-    Info.flags |= MachineMemOperand::MOInvariant;
+    Flags |= MachineMemOperand::MOInvariant;
 
   if (const AMDGPU::RsrcIntrinsic *RsrcIntr =
           AMDGPU::lookupRsrcIntrinsic(IntrID)) {
@@ -1171,10 +1202,10 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
       return false;
 
     // TODO: Should images get their own address space?
-    Info.fallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
+    FallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
 
     if (RsrcIntr->IsImage)
-      Info.align.reset();
+      BaseAlignment.reset();
 
     Value *RsrcArg = CI.getArgOperand(RsrcIntr->RsrcArg);
     if (auto *RsrcPtrTy = dyn_cast<PointerType>(RsrcArg->getType())) {
@@ -1184,13 +1215,13 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
         // those pointers. Cases like "this points at the same value
         // but with a different offset" are handled in
         // areMemAccessesTriviallyDisjoint.
-        Info.ptrVal = RsrcArg;
+        PtrVal = RsrcArg;
     }
 
     auto *Aux = cast<ConstantInt>(CI.getArgOperand(CI.arg_size() - 1));
     if (Aux->getZExtValue() & AMDGPU::CPol::VOLATILE)
-      Info.flags |= MachineMemOperand::MOVolatile;
-    Info.flags |= MachineMemOperand::MODereferenceable;
+      Flags |= MachineMemOperand::MOVolatile;
+    Flags |= MachineMemOperand::MODereferenceable;
     if (ME.onlyReadsMemory()) {
       unsigned MaxNumLanes = 4;
 
@@ -1213,7 +1244,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
 
       // FIXME: What does alignment mean for an image?
       Info.opc = ISD::INTRINSIC_W_CHAIN;
-      Info.flags |= MachineMemOperand::MOLoad;
+      Flags |= MachineMemOperand::MOLoad;
     } else if (ME.onlyWritesMemory()) {
       Info.opc = ISD::INTRINSIC_VOID;
 
@@ -1225,15 +1256,17 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
       } else
         Info.memVT = EVT::getEVT(DataTy);
 
-      Info.flags |= MachineMemOperand::MOStore;
+      Flags |= MachineMemOperand::MOStore;
     } else {
       // Atomic
       Info.opc = CI.getType()->isVoidTy() ? ISD::INTRINSIC_VOID :
                                             ISD::INTRINSIC_W_CHAIN;
       Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType());
-      Info.flags |= MachineMemOperand::MOLoad |
-                    MachineMemOperand::MOStore |
-                    MachineMemOperand::MODereferenceable;
+      Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
+               MachineMemOperand::MODereferenceable;
+
+      // XXX - Should this be volatile without known ordering?
+      Flags |= MachineMemOperand::MOVolatile;
 
       switch (IntrID) {
       default:
@@ -1246,11 +1279,13 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
       case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
         unsigned Width = cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue();
         Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
-        Info.ptrVal = CI.getArgOperand(1);
+        PtrVal = CI.getArgOperand(1);
+        Info.MMO = CreateMMO();
         return true;
       }
       }
     }
+    Info.MMO = CreateMMO();
     return true;
   }
 
@@ -1262,70 +1297,72 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   case Intrinsic::amdgcn_ds_fmax: {
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     Info.memVT = MVT::getVT(CI.getType());
-    Info.ptrVal = CI.getOperand(0);
-    Info.align.reset();
-    Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+    PtrVal = CI.getOperand(0);
+    BaseAlignment.reset();
+    Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
 
     const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(4));
     if (!Vol->isZero())
-      Info.flags |= MachineMemOperand::MOVolatile;
+      Flags |= MachineMemOperand::MOVolatile;
 
+    Info.MMO = CreateMMO();
     return true;
   }
   case Intrinsic::amdgcn_buffer_atomic_fadd: {
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     Info.memVT = MVT::getVT(CI.getOperand(0)->getType());
-    Info.fallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
-    Info.align.reset();
-    Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+    FallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
+    BaseAlignment.reset();
+    Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
 
     const ConstantInt *Vol = dyn_cast<ConstantInt>(CI.getOperand(4));
     if (!Vol || !Vol->isZero())
-      Info.flags |= MachineMemOperand::MOVolatile;
-
+      Flags |= MachineMemOperand::MOVolatile;
+    Info.MMO = CreateMMO();
     return true;
   }
   case Intrinsic::amdgcn_ds_add_gs_reg_rtn:
   case Intrinsic::amdgcn_ds_sub_gs_reg_rtn: {
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     Info.memVT = MVT::getVT(CI.getOperand(0)->getType());
-    Info.ptrVal = nullptr;
-    Info.fallbackAddressSpace = AMDGPUAS::STREAMOUT_REGISTER;
-    Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+    PtrVal = nullptr;
+    FallbackAddressSpace = AMDGPUAS::STREAMOUT_REGISTER;
+    Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+    Info.MMO = CreateMMO();
     return true;
   }
   case Intrinsic::amdgcn_ds_append:
   case Intrinsic::amdgcn_ds_consume: {
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     Info.memVT = MVT::getVT(CI.getType());
-    Info.ptrVal = CI.getOperand(0);
-    Info.align.reset();
-    Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+    PtrVal = CI.getOperand(0);
+    BaseAlignment.reset();
+    Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
 
     const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(1));
     if (!Vol->isZero())
-      Info.flags |= MachineMemOperand::MOVolatile;
-
+      Flags |= MachineMemOperand::MOVolatile;
+    Info.MMO = CreateMMO();
     return true;
   }
   case Intrinsic::amdgcn_global_atomic_csub: {
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     Info.memVT = MVT::getVT(CI.getType());
-    Info.ptrVal = CI.getOperand(0);
-    Info.align.reset();
-    Info.flags |= MachineMemOperand::MOLoad |
-                  MachineMemOperand::MOStore |
-                  MachineMemOperand::MOVolatile;
+    PtrVal = CI.getOperand(0);
+    BaseAlignment.reset();
+    Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
+             MachineMemOperand::MOVolatile;
+    Info.MMO = CreateMMO();
     return true;
   }
   case Intrinsic::amdgcn_image_bvh_intersect_ray: {
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     Info.memVT = MVT::getVT(CI.getType()); // XXX: what is correct VT?
 
-    Info.fallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
-    Info.align.reset();
-    Info.flags |= MachineMemOperand::MOLoad |
-                  MachineMemOperand::MODereferenceable;
+    FallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
+    BaseAlignment.reset();
+    Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable;
+    Info.MMO = CreateMMO();
     return true;
   }
   case Intrinsic::amdgcn_global_atomic_fadd:
@@ -1344,20 +1381,21 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16: {
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     Info.memVT = MVT::getVT(CI.getType());
-    Info.ptrVal = CI.getOperand(0);
-    Info.align.reset();
-    Info.flags |= MachineMemOperand::MOLoad |
-                  MachineMemOperand::MOStore |
-                  MachineMemOperand::MODereferenceable |
-                  MachineMemOperand::MOVolatile;
+    PtrVal = CI.getOperand(0);
+    BaseAlignment.reset();
+    Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
+             MachineMemOperand::MODereferenceable |
+             MachineMemOperand::MOVolatile;
+    Info.MMO = CreateMMO();
     return true;
   }
   case Intrinsic::amdgcn_global_load_tr: {
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     Info.memVT = MVT::getVT(CI.getType());
-    Info.ptrVal = CI.getOperand(0);
-    Info.align.reset();
-    Info.flags |= MachineMemOperand::MOLoad;
+    PtrVal = CI.getOperand(0);
+    BaseAlignment.reset();
+    Flags |= MachineMemOperand::MOLoad;
+    Info.MMO = CreateMMO();
     return true;
   }
   case Intrinsic::amdgcn_ds_gws_init:
@@ -1372,25 +1410,29 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
         static_cast<const GCNTargetMachine &>(getTargetMachine());
 
     SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-    Info.ptrVal = MFI->getGWSPSV(TM);
+    PtrVal = MFI->getGWSPSV(TM);
 
     // This is an abstract access, but we need to specify a type and size.
     Info.memVT = MVT::i32;
-    Info.size = 4;
-    Info.align = Align(4);
+    Size = 4;
+    BaseAlignment = Align(4);
 
     if (IntrID == Intrinsic::amdgcn_ds_gws_barrier)
-      Info.flags |= MachineMemOperand::MOLoad;
+      Flags |= MachineMemOperand::MOLoad;
     else
-      Info.flags |= MachineMemOperand::MOStore;
+      Flags |= MachineMemOperand::MOStore;
+    Info.MMO = CreateMMO();
     return true;
   }
   case Intrinsic::amdgcn_global_load_lds: {
     Info.opc = ISD::INTRINSIC_VOID;
     unsigned Width = cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue();
     Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
-    Info.ptrVal = CI.getArgOperand(1);
-    Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+    Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
+             MachineMemOperand::MOVolatile;
+    PtrVal = CI.getArgOperand(1);
+    Info.MMO = CreateMMO();
     return true;
   }
   case Intrinsic::amdgcn_ds_bvh_stack_rtn: {
@@ -1400,14 +1442,15 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
         static_cast<const GCNTargetMachine &>(getTargetMachine());
 
     SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-    Info.ptrVal = MFI->getGWSPSV(TM);
+    PtrVal = MFI->getGWSPSV(TM);
 
     // This is an abstract access, but we need to specify a type and size.
     Info.memVT = MVT::i32;
-    Info.size = 4;
-    Info.align = Align(4);
+    Size = 4;
+    BaseAlignment = Align(4);
 
-    Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+    Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+    Info.MMO = CreateMMO();
     return true;
   }
   default:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll
index 9514bea86e4d171..22dccf39cff0593 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll
@@ -15,7 +15,7 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, <
   ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
   ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
   ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
+  ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
   ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn
@@ -29,7 +29,7 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, <
   ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
   ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
   ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
+  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
   ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
   %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
   ret void
@@ -48,7 +48,7 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val, <4
   ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
   ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
   ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
+  ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
   ; GFX908-NEXT:   S_ENDPGM 0
   ;
   ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offen_no_rtn
@@ -63,7 +63,7 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val, <4
   ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
   ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
   ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
+  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
   ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
   %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret void
@@ -82,7 +82,7 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val, <4
   ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
   ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
   ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
-  ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>)...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/83554


More information about the llvm-commits mailing list