[llvm] r321555 - AMDGPU: Implement getTgtMemIntrinsic for images
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 29 09:18:14 PST 2017
Author: arsenm
Date: Fri Dec 29 09:18:14 2017
New Revision: 321555
URL: http://llvm.org/viewvc/llvm-project?rev=321555&view=rev
Log:
AMDGPU: Implement getTgtMemIntrinsic for images
Currently all images are lowered to have a single
image PseudoSourceValue. Image stores happen to have
overly strict mayLoad/mayStore/hasSideEffects flags
set on them, so this happens to work. When these
are fixed to be correct, the scheduler breaks
this because the identical PSVs are assumed to
be the same address. These need to be unique
to the image resource value.
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h
Modified: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td?rev=321555&r1=321554&r2=321555&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td Fri Dec 29 09:18:14 2017
@@ -304,7 +304,8 @@ class AMDGPUImageLoad<bit NoMem = 0> : I
llvm_i1_ty, // slc(imm)
llvm_i1_ty, // lwe(imm)
llvm_i1_ty], // da(imm)
- !if(NoMem, [IntrNoMem], [IntrReadMem])>;
+ !if(NoMem, [IntrNoMem], [IntrReadMem]), "",
+ !if(NoMem, [], [SDNPMemOperand])>;
def int_amdgcn_image_load : AMDGPUImageLoad;
def int_amdgcn_image_load_mip : AMDGPUImageLoad;
@@ -320,7 +321,7 @@ class AMDGPUImageStore : Intrinsic <
llvm_i1_ty, // slc(imm)
llvm_i1_ty, // lwe(imm)
llvm_i1_ty], // da(imm)
- []>;
+ [IntrWriteMem], "", [SDNPMemOperand]>;
def int_amdgcn_image_store : AMDGPUImageStore;
def int_amdgcn_image_store_mip : AMDGPUImageStore;
@@ -336,7 +337,8 @@ class AMDGPUImageSample<bit NoMem = 0> :
llvm_i1_ty, // slc(imm)
llvm_i1_ty, // lwe(imm)
llvm_i1_ty], // da(imm)
- !if(NoMem, [IntrNoMem], [IntrReadMem])>;
+ !if(NoMem, [IntrNoMem], [IntrReadMem]), "",
+ !if(NoMem, [], [SDNPMemOperand])>;
// Basic sample
def int_amdgcn_image_sample : AMDGPUImageSample;
@@ -428,7 +430,7 @@ class AMDGPUImageAtomic : Intrinsic <
llvm_i1_ty, // r128(imm)
llvm_i1_ty, // da(imm)
llvm_i1_ty], // slc(imm)
- []>;
+ [], "", [SDNPMemOperand]>;
def int_amdgcn_image_atomic_swap : AMDGPUImageAtomic;
def int_amdgcn_image_atomic_add : AMDGPUImageAtomic;
@@ -451,7 +453,7 @@ def int_amdgcn_image_atomic_cmpswap : In
llvm_i1_ty, // r128(imm)
llvm_i1_ty, // da(imm)
llvm_i1_ty], // slc(imm)
- []>;
+ [], "", [SDNPMemOperand]>;
class AMDGPUBufferLoad : Intrinsic <
[llvm_anyfloat_ty],
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=321555&r1=321554&r2=321555&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Fri Dec 29 09:18:14 2017
@@ -575,6 +575,157 @@ bool SITargetLowering::getTgtMemIntrinsi
return true;
}
+
+ // Image load.
+ case Intrinsic::amdgcn_image_load:
+ case Intrinsic::amdgcn_image_load_mip:
+
+ // Sample.
+ case Intrinsic::amdgcn_image_sample:
+ case Intrinsic::amdgcn_image_sample_cl:
+ case Intrinsic::amdgcn_image_sample_d:
+ case Intrinsic::amdgcn_image_sample_d_cl:
+ case Intrinsic::amdgcn_image_sample_l:
+ case Intrinsic::amdgcn_image_sample_b:
+ case Intrinsic::amdgcn_image_sample_b_cl:
+ case Intrinsic::amdgcn_image_sample_lz:
+ case Intrinsic::amdgcn_image_sample_cd:
+ case Intrinsic::amdgcn_image_sample_cd_cl:
+
+ // Sample with comparison.
+ case Intrinsic::amdgcn_image_sample_c:
+ case Intrinsic::amdgcn_image_sample_c_cl:
+ case Intrinsic::amdgcn_image_sample_c_d:
+ case Intrinsic::amdgcn_image_sample_c_d_cl:
+ case Intrinsic::amdgcn_image_sample_c_l:
+ case Intrinsic::amdgcn_image_sample_c_b:
+ case Intrinsic::amdgcn_image_sample_c_b_cl:
+ case Intrinsic::amdgcn_image_sample_c_lz:
+ case Intrinsic::amdgcn_image_sample_c_cd:
+ case Intrinsic::amdgcn_image_sample_c_cd_cl:
+
+ // Sample with offsets.
+ case Intrinsic::amdgcn_image_sample_o:
+ case Intrinsic::amdgcn_image_sample_cl_o:
+ case Intrinsic::amdgcn_image_sample_d_o:
+ case Intrinsic::amdgcn_image_sample_d_cl_o:
+ case Intrinsic::amdgcn_image_sample_l_o:
+ case Intrinsic::amdgcn_image_sample_b_o:
+ case Intrinsic::amdgcn_image_sample_b_cl_o:
+ case Intrinsic::amdgcn_image_sample_lz_o:
+ case Intrinsic::amdgcn_image_sample_cd_o:
+ case Intrinsic::amdgcn_image_sample_cd_cl_o:
+
+ // Sample with comparison and offsets.
+ case Intrinsic::amdgcn_image_sample_c_o:
+ case Intrinsic::amdgcn_image_sample_c_cl_o:
+ case Intrinsic::amdgcn_image_sample_c_d_o:
+ case Intrinsic::amdgcn_image_sample_c_d_cl_o:
+ case Intrinsic::amdgcn_image_sample_c_l_o:
+ case Intrinsic::amdgcn_image_sample_c_b_o:
+ case Intrinsic::amdgcn_image_sample_c_b_cl_o:
+ case Intrinsic::amdgcn_image_sample_c_lz_o:
+ case Intrinsic::amdgcn_image_sample_c_cd_o:
+ case Intrinsic::amdgcn_image_sample_c_cd_cl_o:
+
+ // Basic gather4
+ case Intrinsic::amdgcn_image_gather4:
+ case Intrinsic::amdgcn_image_gather4_cl:
+ case Intrinsic::amdgcn_image_gather4_l:
+ case Intrinsic::amdgcn_image_gather4_b:
+ case Intrinsic::amdgcn_image_gather4_b_cl:
+ case Intrinsic::amdgcn_image_gather4_lz:
+
+ // Gather4 with comparison
+ case Intrinsic::amdgcn_image_gather4_c:
+ case Intrinsic::amdgcn_image_gather4_c_cl:
+ case Intrinsic::amdgcn_image_gather4_c_l:
+ case Intrinsic::amdgcn_image_gather4_c_b:
+ case Intrinsic::amdgcn_image_gather4_c_b_cl:
+ case Intrinsic::amdgcn_image_gather4_c_lz:
+
+ // Gather4 with offsets
+ case Intrinsic::amdgcn_image_gather4_o:
+ case Intrinsic::amdgcn_image_gather4_cl_o:
+ case Intrinsic::amdgcn_image_gather4_l_o:
+ case Intrinsic::amdgcn_image_gather4_b_o:
+ case Intrinsic::amdgcn_image_gather4_b_cl_o:
+ case Intrinsic::amdgcn_image_gather4_lz_o:
+
+ // Gather4 with comparison and offsets
+ case Intrinsic::amdgcn_image_gather4_c_o:
+ case Intrinsic::amdgcn_image_gather4_c_cl_o:
+ case Intrinsic::amdgcn_image_gather4_c_l_o:
+ case Intrinsic::amdgcn_image_gather4_c_b_o:
+ case Intrinsic::amdgcn_image_gather4_c_b_cl_o:
+ case Intrinsic::amdgcn_image_gather4_c_lz_o: {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(CI.getType());
+ Info.ptrVal = MFI->getImagePSV(
+ *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+ CI.getArgOperand(1));
+ Info.align = 0;
+ Info.flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MODereferenceable;
+ return true;
+ }
+ case Intrinsic::amdgcn_image_store:
+ case Intrinsic::amdgcn_image_store_mip: {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType());
+ Info.ptrVal = MFI->getImagePSV(
+ *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+ CI.getArgOperand(2));
+ Info.flags = MachineMemOperand::MOStore |
+ MachineMemOperand::MODereferenceable;
+ Info.align = 0;
+ return true;
+ }
+ case Intrinsic::amdgcn_image_atomic_swap:
+ case Intrinsic::amdgcn_image_atomic_add:
+ case Intrinsic::amdgcn_image_atomic_sub:
+ case Intrinsic::amdgcn_image_atomic_smin:
+ case Intrinsic::amdgcn_image_atomic_umin:
+ case Intrinsic::amdgcn_image_atomic_smax:
+ case Intrinsic::amdgcn_image_atomic_umax:
+ case Intrinsic::amdgcn_image_atomic_and:
+ case Intrinsic::amdgcn_image_atomic_or:
+ case Intrinsic::amdgcn_image_atomic_xor:
+ case Intrinsic::amdgcn_image_atomic_inc:
+ case Intrinsic::amdgcn_image_atomic_dec: {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(CI.getType());
+ Info.ptrVal = MFI->getImagePSV(
+ *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+ CI.getArgOperand(2));
+
+ Info.flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MOStore |
+ MachineMemOperand::MODereferenceable;
+
+ // XXX - Should this be volatile without known ordering?
+ Info.flags |= MachineMemOperand::MOVolatile;
+ return true;
+ }
+ case Intrinsic::amdgcn_image_atomic_cmpswap: {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(CI.getType());
+ Info.ptrVal = MFI->getImagePSV(
+ *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+ CI.getArgOperand(3));
+
+ Info.flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MOStore |
+ MachineMemOperand::MODereferenceable;
+
+ // XXX - Should this be volatile without known ordering?
+ Info.flags |= MachineMemOperand::MOVolatile;
+ return true;
+ }
default:
return false;
}
@@ -2946,24 +3097,12 @@ MachineBasicBlock *SITargetLowering::Emi
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
if (TII->isMIMG(MI)) {
- if (!MI.memoperands_empty())
- return BB;
+ if (MI.memoperands_empty() && MI.mayLoadOrStore()) {
+ report_fatal_error("missing mem operand from MIMG instruction");
+ }
// Add a memoperand for mimg instructions so that they aren't assumed to
// be ordered memory instuctions.
- MachinePointerInfo PtrInfo(MFI->getImagePSV());
- MachineMemOperand::Flags Flags = MachineMemOperand::MODereferenceable;
- if (MI.mayStore())
- Flags |= MachineMemOperand::MOStore;
-
- if (MI.mayLoad())
- Flags |= MachineMemOperand::MOLoad;
-
- if (Flags != MachineMemOperand::MODereferenceable) {
- auto MMO = MF->getMachineMemOperand(PtrInfo, Flags, 0, 0);
- MI.addMemOperand(*MF, MMO);
- }
-
return BB;
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp?rev=321555&r1=321554&r2=321555&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp Fri Dec 29 09:18:14 2017
@@ -29,7 +29,6 @@ using namespace llvm;
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
BufferPSV(*(MF.getSubtarget().getInstrInfo())),
- ImagePSV(*(MF.getSubtarget().getInstrInfo())),
PrivateSegmentBuffer(false),
DispatchPtr(false),
QueuePtr(false),
Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h?rev=321555&r1=321554&r2=321555&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h Fri Dec 29 09:18:14 2017
@@ -34,12 +34,14 @@ namespace llvm {
class MachineFrameInfo;
class MachineFunction;
+class SIInstrInfo;
class TargetRegisterClass;
class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
public:
+ // TODO: Is the img rsrc useful?
explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :
- PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
+ PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {}
bool isConstant(const MachineFrameInfo *) const override {
// This should probably be true for most images, but we will start by being
@@ -136,7 +138,10 @@ class SIMachineFunctionInfo final : publ
std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};
AMDGPUBufferPseudoSourceValue BufferPSV;
- AMDGPUImagePseudoSourceValue ImagePSV;
+
+ DenseMap<const Value *,
+ std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
+
private:
unsigned LDSWaveSpillSize = 0;
@@ -629,12 +634,18 @@ public:
return LDSWaveSpillSize;
}
+ // FIXME: These should be unique
const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
return &BufferPSV;
}
- const AMDGPUImagePseudoSourceValue *getImagePSV() const {
- return &ImagePSV;
+ const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
+ const Value *ImgRsrc) {
+ assert(ImgRsrc);
+ auto PSV = ImagePSVs.try_emplace(
+ ImgRsrc,
+ llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
+ return PSV.first->second.get();
}
};
More information about the llvm-commits
mailing list