[llvm] r321555 - AMDGPU: Implement getTgtMemIntrinsic for images

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 29 09:18:14 PST 2017


Author: arsenm
Date: Fri Dec 29 09:18:14 2017
New Revision: 321555

URL: http://llvm.org/viewvc/llvm-project?rev=321555&view=rev
Log:
AMDGPU: Implement getTgtMemIntrinsic for images

Currently all images are lowered to have a single
image PseudoSourceValue. Image stores happen to have
overly strict mayLoad/mayStore/hasSideEffects flags
set on them, so this happens to work. When these
are fixed to be correct, the scheduler breaks
this because the identical PSVs are assumed to
be the same address. These need to be unique
to the image resource value.

Modified:
    llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
    llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h

Modified: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td?rev=321555&r1=321554&r2=321555&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td Fri Dec 29 09:18:14 2017
@@ -304,7 +304,8 @@ class AMDGPUImageLoad<bit NoMem = 0> : I
    llvm_i1_ty,        // slc(imm)
    llvm_i1_ty,        // lwe(imm)
    llvm_i1_ty],       // da(imm)
-  !if(NoMem, [IntrNoMem], [IntrReadMem])>;
+  !if(NoMem, [IntrNoMem], [IntrReadMem]), "",
+  !if(NoMem, [], [SDNPMemOperand])>;
 
 def int_amdgcn_image_load : AMDGPUImageLoad;
 def int_amdgcn_image_load_mip : AMDGPUImageLoad;
@@ -320,7 +321,7 @@ class AMDGPUImageStore : Intrinsic <
    llvm_i1_ty,        // slc(imm)
    llvm_i1_ty,        // lwe(imm)
    llvm_i1_ty],       // da(imm)
-  []>;
+  [IntrWriteMem], "", [SDNPMemOperand]>;
 
 def int_amdgcn_image_store : AMDGPUImageStore;
 def int_amdgcn_image_store_mip : AMDGPUImageStore;
@@ -336,7 +337,8 @@ class AMDGPUImageSample<bit NoMem = 0> :
      llvm_i1_ty,        // slc(imm)
      llvm_i1_ty,        // lwe(imm)
      llvm_i1_ty],       // da(imm)
-     !if(NoMem, [IntrNoMem], [IntrReadMem])>;
+     !if(NoMem, [IntrNoMem], [IntrReadMem]), "",
+     !if(NoMem, [], [SDNPMemOperand])>;
 
 // Basic sample
 def int_amdgcn_image_sample : AMDGPUImageSample;
@@ -428,7 +430,7 @@ class AMDGPUImageAtomic : Intrinsic <
    llvm_i1_ty,        // r128(imm)
    llvm_i1_ty,        // da(imm)
    llvm_i1_ty],       // slc(imm)
-  []>;
+  [], "", [SDNPMemOperand]>;
 
 def int_amdgcn_image_atomic_swap : AMDGPUImageAtomic;
 def int_amdgcn_image_atomic_add : AMDGPUImageAtomic;
@@ -451,7 +453,7 @@ def int_amdgcn_image_atomic_cmpswap : In
    llvm_i1_ty,        // r128(imm)
    llvm_i1_ty,        // da(imm)
    llvm_i1_ty],       // slc(imm)
-  []>;
+  [], "", [SDNPMemOperand]>;
 
 class AMDGPUBufferLoad : Intrinsic <
   [llvm_anyfloat_ty],

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=321555&r1=321554&r2=321555&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Fri Dec 29 09:18:14 2017
@@ -575,6 +575,157 @@ bool SITargetLowering::getTgtMemIntrinsi
 
     return true;
   }
+
+  // Image load.
+  case Intrinsic::amdgcn_image_load:
+  case Intrinsic::amdgcn_image_load_mip:
+
+  // Sample.
+  case Intrinsic::amdgcn_image_sample:
+  case Intrinsic::amdgcn_image_sample_cl:
+  case Intrinsic::amdgcn_image_sample_d:
+  case Intrinsic::amdgcn_image_sample_d_cl:
+  case Intrinsic::amdgcn_image_sample_l:
+  case Intrinsic::amdgcn_image_sample_b:
+  case Intrinsic::amdgcn_image_sample_b_cl:
+  case Intrinsic::amdgcn_image_sample_lz:
+  case Intrinsic::amdgcn_image_sample_cd:
+  case Intrinsic::amdgcn_image_sample_cd_cl:
+
+    // Sample with comparison.
+  case Intrinsic::amdgcn_image_sample_c:
+  case Intrinsic::amdgcn_image_sample_c_cl:
+  case Intrinsic::amdgcn_image_sample_c_d:
+  case Intrinsic::amdgcn_image_sample_c_d_cl:
+  case Intrinsic::amdgcn_image_sample_c_l:
+  case Intrinsic::amdgcn_image_sample_c_b:
+  case Intrinsic::amdgcn_image_sample_c_b_cl:
+  case Intrinsic::amdgcn_image_sample_c_lz:
+  case Intrinsic::amdgcn_image_sample_c_cd:
+  case Intrinsic::amdgcn_image_sample_c_cd_cl:
+
+    // Sample with offsets.
+  case Intrinsic::amdgcn_image_sample_o:
+  case Intrinsic::amdgcn_image_sample_cl_o:
+  case Intrinsic::amdgcn_image_sample_d_o:
+  case Intrinsic::amdgcn_image_sample_d_cl_o:
+  case Intrinsic::amdgcn_image_sample_l_o:
+  case Intrinsic::amdgcn_image_sample_b_o:
+  case Intrinsic::amdgcn_image_sample_b_cl_o:
+  case Intrinsic::amdgcn_image_sample_lz_o:
+  case Intrinsic::amdgcn_image_sample_cd_o:
+  case Intrinsic::amdgcn_image_sample_cd_cl_o:
+
+    // Sample with comparison and offsets.
+  case Intrinsic::amdgcn_image_sample_c_o:
+  case Intrinsic::amdgcn_image_sample_c_cl_o:
+  case Intrinsic::amdgcn_image_sample_c_d_o:
+  case Intrinsic::amdgcn_image_sample_c_d_cl_o:
+  case Intrinsic::amdgcn_image_sample_c_l_o:
+  case Intrinsic::amdgcn_image_sample_c_b_o:
+  case Intrinsic::amdgcn_image_sample_c_b_cl_o:
+  case Intrinsic::amdgcn_image_sample_c_lz_o:
+  case Intrinsic::amdgcn_image_sample_c_cd_o:
+  case Intrinsic::amdgcn_image_sample_c_cd_cl_o:
+
+    // Basic gather4
+  case Intrinsic::amdgcn_image_gather4:
+  case Intrinsic::amdgcn_image_gather4_cl:
+  case Intrinsic::amdgcn_image_gather4_l:
+  case Intrinsic::amdgcn_image_gather4_b:
+  case Intrinsic::amdgcn_image_gather4_b_cl:
+  case Intrinsic::amdgcn_image_gather4_lz:
+
+    // Gather4 with comparison
+  case Intrinsic::amdgcn_image_gather4_c:
+  case Intrinsic::amdgcn_image_gather4_c_cl:
+  case Intrinsic::amdgcn_image_gather4_c_l:
+  case Intrinsic::amdgcn_image_gather4_c_b:
+  case Intrinsic::amdgcn_image_gather4_c_b_cl:
+  case Intrinsic::amdgcn_image_gather4_c_lz:
+
+    // Gather4 with offsets
+  case Intrinsic::amdgcn_image_gather4_o:
+  case Intrinsic::amdgcn_image_gather4_cl_o:
+  case Intrinsic::amdgcn_image_gather4_l_o:
+  case Intrinsic::amdgcn_image_gather4_b_o:
+  case Intrinsic::amdgcn_image_gather4_b_cl_o:
+  case Intrinsic::amdgcn_image_gather4_lz_o:
+
+    // Gather4 with comparison and offsets
+  case Intrinsic::amdgcn_image_gather4_c_o:
+  case Intrinsic::amdgcn_image_gather4_c_cl_o:
+  case Intrinsic::amdgcn_image_gather4_c_l_o:
+  case Intrinsic::amdgcn_image_gather4_c_b_o:
+  case Intrinsic::amdgcn_image_gather4_c_b_cl_o:
+  case Intrinsic::amdgcn_image_gather4_c_lz_o: {
+    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::getVT(CI.getType());
+    Info.ptrVal = MFI->getImagePSV(
+      *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+      CI.getArgOperand(1));
+    Info.align = 0;
+    Info.flags = MachineMemOperand::MOLoad |
+                 MachineMemOperand::MODereferenceable;
+    return true;
+  }
+  case Intrinsic::amdgcn_image_store:
+  case Intrinsic::amdgcn_image_store_mip: {
+    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+    Info.opc = ISD::INTRINSIC_VOID;
+    Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType());
+    Info.ptrVal = MFI->getImagePSV(
+      *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+      CI.getArgOperand(2));
+    Info.flags = MachineMemOperand::MOStore |
+                 MachineMemOperand::MODereferenceable;
+    Info.align = 0;
+    return true;
+  }
+  case Intrinsic::amdgcn_image_atomic_swap:
+  case Intrinsic::amdgcn_image_atomic_add:
+  case Intrinsic::amdgcn_image_atomic_sub:
+  case Intrinsic::amdgcn_image_atomic_smin:
+  case Intrinsic::amdgcn_image_atomic_umin:
+  case Intrinsic::amdgcn_image_atomic_smax:
+  case Intrinsic::amdgcn_image_atomic_umax:
+  case Intrinsic::amdgcn_image_atomic_and:
+  case Intrinsic::amdgcn_image_atomic_or:
+  case Intrinsic::amdgcn_image_atomic_xor:
+  case Intrinsic::amdgcn_image_atomic_inc:
+  case Intrinsic::amdgcn_image_atomic_dec: {
+    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::getVT(CI.getType());
+    Info.ptrVal = MFI->getImagePSV(
+      *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+      CI.getArgOperand(2));
+
+    Info.flags = MachineMemOperand::MOLoad |
+                 MachineMemOperand::MOStore |
+                 MachineMemOperand::MODereferenceable;
+
+    // XXX - Should this be volatile without known ordering?
+    Info.flags |= MachineMemOperand::MOVolatile;
+    return true;
+  }
+  case Intrinsic::amdgcn_image_atomic_cmpswap: {
+    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::getVT(CI.getType());
+    Info.ptrVal = MFI->getImagePSV(
+      *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+      CI.getArgOperand(3));
+
+    Info.flags = MachineMemOperand::MOLoad |
+                 MachineMemOperand::MOStore |
+                 MachineMemOperand::MODereferenceable;
+
+    // XXX - Should this be volatile without known ordering?
+    Info.flags |= MachineMemOperand::MOVolatile;
+    return true;
+  }
   default:
     return false;
   }
@@ -2946,24 +3097,12 @@ MachineBasicBlock *SITargetLowering::Emi
   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
 
   if (TII->isMIMG(MI)) {
-      if (!MI.memoperands_empty())
-        return BB;
+    if (MI.memoperands_empty() && MI.mayLoadOrStore()) {
+      report_fatal_error("missing mem operand from MIMG instruction");
+    }
     // Add a memoperand for mimg instructions so that they aren't assumed to
     // be ordered memory instuctions.
 
-    MachinePointerInfo PtrInfo(MFI->getImagePSV());
-    MachineMemOperand::Flags Flags = MachineMemOperand::MODereferenceable;
-    if (MI.mayStore())
-      Flags |= MachineMemOperand::MOStore;
-
-    if (MI.mayLoad())
-      Flags |= MachineMemOperand::MOLoad;
-
-    if (Flags != MachineMemOperand::MODereferenceable) {
-      auto MMO = MF->getMachineMemOperand(PtrInfo, Flags, 0, 0);
-      MI.addMemOperand(*MF, MMO);
-    }
-
     return BB;
   }
 

Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp?rev=321555&r1=321554&r2=321555&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp Fri Dec 29 09:18:14 2017
@@ -29,7 +29,6 @@ using namespace llvm;
 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
   : AMDGPUMachineFunction(MF),
     BufferPSV(*(MF.getSubtarget().getInstrInfo())),
-    ImagePSV(*(MF.getSubtarget().getInstrInfo())),
     PrivateSegmentBuffer(false),
     DispatchPtr(false),
     QueuePtr(false),

Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h?rev=321555&r1=321554&r2=321555&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h Fri Dec 29 09:18:14 2017
@@ -34,12 +34,14 @@ namespace llvm {
 
 class MachineFrameInfo;
 class MachineFunction;
+class SIInstrInfo;
 class TargetRegisterClass;
 
 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
 public:
+  // TODO: Is the img rsrc useful?
   explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :
-    PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
+    PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {}
 
   bool isConstant(const MachineFrameInfo *) const override {
     // This should probably be true for most images, but we will start by being
@@ -136,7 +138,10 @@ class SIMachineFunctionInfo final : publ
   std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};
 
   AMDGPUBufferPseudoSourceValue BufferPSV;
-  AMDGPUImagePseudoSourceValue ImagePSV;
+
+  DenseMap<const Value *,
+           std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
+
 
 private:
   unsigned LDSWaveSpillSize = 0;
@@ -629,12 +634,18 @@ public:
     return LDSWaveSpillSize;
   }
 
+  // FIXME: These should be unique
   const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
     return &BufferPSV;
   }
 
-  const AMDGPUImagePseudoSourceValue *getImagePSV() const {
-    return &ImagePSV;
+  const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
+                                                  const Value *ImgRsrc) {
+    assert(ImgRsrc);
+    auto PSV = ImagePSVs.try_emplace(
+      ImgRsrc,
+      llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
+    return PSV.first->second.get();
   }
 };
 




More information about the llvm-commits mailing list