[llvm-branch-commits] [llvm] [AMDGPU] Return two MMOs for load-to-lds and store-from-lds intrinsics (PR #175845)
Krzysztof Drewniak via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Jan 13 14:43:05 PST 2026
================
@@ -1626,35 +1642,64 @@ void SITargetLowering::getTgtMemIntrinsic(SmallVectorImpl<IntrinsicInfo> &Infos,
case Intrinsic::amdgcn_cluster_load_async_to_lds_b32:
case Intrinsic::amdgcn_cluster_load_async_to_lds_b64:
case Intrinsic::amdgcn_cluster_load_async_to_lds_b128: {
+ // Entry 0: Load from source (global/flat).
Info.opc = ISD::INTRINSIC_VOID;
Info.memVT = EVT::getIntegerVT(CI.getContext(), getIntrMemWidth(IntrID));
- Info.ptrVal = CI.getArgOperand(1);
- Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ Info.ptrVal = CI.getArgOperand(0); // Global pointer
+ Info.offset = cast<ConstantInt>(CI.getArgOperand(2))->getSExtValue();
+ Info.flags |= MachineMemOperand::MOLoad;
+ Infos.push_back(Info);
+
+ // Entry 1: Store to LDS (same offset).
+ Info.flags &= ~MachineMemOperand::MOLoad;
+ Info.flags |= MachineMemOperand::MOStore;
+ Info.ptrVal = CI.getArgOperand(1); // LDS pointer
Infos.push_back(Info);
return;
}
case Intrinsic::amdgcn_global_store_async_from_lds_b8:
case Intrinsic::amdgcn_global_store_async_from_lds_b32:
case Intrinsic::amdgcn_global_store_async_from_lds_b64:
case Intrinsic::amdgcn_global_store_async_from_lds_b128: {
+ // Entry 0: Load from LDS.
Info.opc = ISD::INTRINSIC_VOID;
Info.memVT = EVT::getIntegerVT(CI.getContext(), getIntrMemWidth(IntrID));
- Info.ptrVal = CI.getArgOperand(0);
- Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ Info.ptrVal = CI.getArgOperand(1); // LDS pointer
+ Info.offset = cast<ConstantInt>(CI.getArgOperand(2))->getSExtValue();
+ Info.flags |= MachineMemOperand::MOLoad;
+ Infos.push_back(Info);
+
+ // Entry 1: Store to global (same offset).
+ Info.flags &= ~MachineMemOperand::MOLoad;
+ Info.flags |= MachineMemOperand::MOStore;
+ Info.ptrVal = CI.getArgOperand(0); // Global pointer
Infos.push_back(Info);
return;
}
case Intrinsic::amdgcn_load_to_lds:
case Intrinsic::amdgcn_global_load_lds: {
- Info.opc = ISD::INTRINSIC_VOID;
unsigned Width = cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue();
- Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
- Info.ptrVal = CI.getArgOperand(1);
- Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
auto *Aux = cast<ConstantInt>(CI.getArgOperand(CI.arg_size() - 1));
- if (Aux->getZExtValue() & AMDGPU::CPol::VOLATILE)
+ bool IsVolatile = Aux->getZExtValue() & AMDGPU::CPol::VOLATILE;
+
+ // Entry 0: Load from source (global/flat).
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
+ Info.ptrVal = CI.getArgOperand(0); // Source pointer
+ Info.offset = cast<ConstantInt>(CI.getArgOperand(3))->getSExtValue();
+ Info.flags |= MachineMemOperand::MOLoad;
+ if (IsVolatile)
Info.flags |= MachineMemOperand::MOVolatile;
Infos.push_back(Info);
+
+ // Entry 1: Store to LDS.
+ // Same offset from the instruction, but an additional per-lane offset is
+ // added. Represent that using a wider memory type.
+ Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8 * 64);
----------------
krzysz00 wrote:
simiarly
https://github.com/llvm/llvm-project/pull/175845
More information about the llvm-branch-commits
mailing list