[llvm] Image attribute access for the AMDGPU backend

Wed Jul 8 04:09:34 PDT 2015

> Why do we need separate 2d and 3d intrinsics?

libclc calls the intrinsics using the image argument, which has
different type for 2d and 3d. At LLVM IR level, the signature of
llvm.AMDGPU.get.image.* will either contain an image2d_t or image3d_t
argument, i.e.
  declare i32 @llvm.AMDGPU.get.image.width(%opencl.image2d_t addrspace(1)*)
So if I want to get the width of a 3d image, LLVM has to compile in a
bitcast before the call to the intrinsic, i.e.
  %0 = bitcast %opencl.image3d_t addrspace(1)* %image to
%opencl.image2d_t addrspace(1)*
  %call = tail call i32 @llvm.AMDGPU.get.image.width(%opencl.image2d_t
addrspace(1)* %0)
Which means the pass will find the bitcast as a user instead of the call.

On Wed, Jul 8, 2015 at 2:13 AM, Tom Stellard <tom at stellard.net> wrote:
> On Wed, Jul 08, 2015 at 12:28:48AM +0200, Zoltán Gilián wrote:
>> I need a pass to obtain compile-time information about the ID of the
>> image in question. As far as I know, if the location of an image
>> attribute is known at kernel compilation time, the CF instruction
>> initiating the ALU clause which uses the attribute can lock the data
>> into the constant cache, so the attribute value is available to the
>> ALU clause.
>> Otherwise, if I eliminate this pass, the attribute data has to be
>> fetched using a vertex fetch instruction (using the run-time value
>> stored in the image kernel argument), so a vertex fetch clause is
>> needed before the ALU one using the attribute value.
>> Furthermore, the AMD Catalyst driver works like the former: the
>> location of the attribute values is known at kernel compilation time,
>> so the constant cache locking mechanism can be used.
>> I could implement five different intrinsics for the five attributes,
>> but that doesn't affect the problem above, I would still need a
>> conversion pass to get a compile-time constant image ID.
>> I believe I'll need a compile-time constant resource ID in case of
>> image reading (texture resource ID) and writing (RAT ID) anyway, so
>> I'll definitely need something similar to this.
>> I could implement an AMDGPU.get.image.id intrinsic, which could be
>> used to implement the image builtins, but I would still need a pass to
>> replace the calls to that with some compile-time ID. Or at least I
>> don't see how else it can be done.
>
> Thanks for the explanation.  I think this conversion pass is fine.  See
> the inline comments.
>
>> What are the problems with having a conversion pass?
>>
>> On Tue, Jul 7, 2015 at 9:33 PM, Tom Stellard <tom at stellard.net> wrote:
>> > On Thu, Jun 18, 2015 at 01:27:27PM +0200, Zoltan Gilian wrote:
>> >> Added an intrinsic to load an image attribute stored as an implicit kernel
>> >> argument.
>> >> Added a pass to the AMDGPU backend to replace image attribute getter
>> >> pseudointrinsics to the new image attribute reader intrinsic.
>> >
>> > Hi,
>> >
>> > Why not expose a high-level builtin for each image intrinsic
>> > rather than using using one builtin and having a conversion pass?
>> >
>> > -Tom
>> >> ---
>> >>  include/llvm/IR/IntrinsicsR600.td                  |   5 +
>> >>  lib/Target/AMDGPU/AMDGPU.h                         |   1 +
>> >>  lib/Target/AMDGPU/AMDGPUTargetMachine.cpp          |   1 +
>> >>  lib/Target/AMDGPU/R600ISelLowering.cpp             |  14 ++
>> >>  .../R600ImageAttributeIntrinsicsReplacer.cpp       | 152 +++++++++++++++++++
>> >>  test/CodeGen/AMDGPU/image-attributes.ll            | 167 +++++++++++++++++++++
>> >>  6 files changed, 340 insertions(+)
>> >>  create mode 100644 lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp
>> >>  create mode 100644 test/CodeGen/AMDGPU/image-attributes.ll
>> >>
>> >> diff --git a/include/llvm/IR/IntrinsicsR600.td b/include/llvm/IR/IntrinsicsR600.td
>> >> index 5055667..635cf16 100644
>> >> --- a/include/llvm/IR/IntrinsicsR600.td
>> >> +++ b/include/llvm/IR/IntrinsicsR600.td
>> >> @@ -33,6 +33,11 @@ defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
>> >>                                         "__builtin_r600_read_tgid">;
>> >>  defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
>> >>                                         "__builtin_r600_read_tidig">;
>> >> +
>> >> +def int_r600_read_image_attribute
>> >> +  : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>,
>> >> +    GCCBuiltin<"__builtin_r600_read_image_attribute">;
>> >> +
>> >>  } // End TargetPrefix = "r600"
>> >>
>> >>  let TargetPrefix = "AMDGPU" in {
>> >> diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
>> >> index 0a05d25..4b5c5aa 100644
>> >> --- a/lib/Target/AMDGPU/AMDGPU.h
>> >> +++ b/lib/Target/AMDGPU/AMDGPU.h
>> >> @@ -27,6 +27,7 @@ class TargetMachine;
>> >>
>> >>  // R600 Passes
>> >>  FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
>> >> +FunctionPass *createR600ImageAttributeIntrinsicsReplacer();
>> >>  FunctionPass *createR600TextureIntrinsicsReplacer();
>> >>  FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
>> >>  FunctionPass *createR600EmitClauseMarkers();
>> >> diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
>> >> index a9a911a..89285ba 100644
>> >> --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
>> >> +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
>> >> @@ -202,6 +202,7 @@ bool AMDGPUPassConfig::addInstSelector() {
>> >>
>> >>  bool R600PassConfig::addPreISel() {
>> >>    AMDGPUPassConfig::addPreISel();
>> >> +  addPass(createR600ImageAttributeIntrinsicsReplacer());
>> >>    addPass(createR600TextureIntrinsicsReplacer());
>> >>    return false;
>> >>  }
>> >> diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
>> >> index 8357b6d..8ef1ad8 100644
>> >> --- a/lib/Target/AMDGPU/R600ISelLowering.cpp
>> >> +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
>> >> @@ -818,6 +818,20 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
>> >>      case Intrinsic::AMDGPU_read_workdim:
>> >>        return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
>> >>
>> >> +    case Intrinsic::r600_read_image_attribute: {
>> >> +      // operand 0: image index
>> >> +      // operand 1: attribute index
>> >> +
>> >> +      uint64_t DWordOffset = MFI->ABIArgOffset / 4;
>> >> +      // Skip grid dim and grid offset.
>> >> +      DWordOffset += 4;
>
> I think we should add a helper function for retrieving the offset
> for the various implicit values (e.g. grid dim, grid offset, image
> attributes).  The function should be added to AMDGPUISelLowering.cpp
> so it can be shared between R600 and SI.  You can use
> SIRegisterInfo::getPreloadedValue() as a template for what it should
> look like.
>
> I think it would make sense to add this function in a separate patch
> and implement support for grid dim and grid offset.  Then in this patch
> you can update the function to handle image attributes too.
>
>> >> +      // There are 5 dword attributes per image.
>> >> +      DWordOffset += 5 * Op.getConstantOperandVal(1);
>> >> +      // Skip to the requested attribute.
>> >> +      DWordOffset += Op.getConstantOperandVal(2);
>> >> +      return LowerImplicitParameter(DAG, VT, DL, DWordOffset);
>> >> +    }
>> >> +
>> >>      case Intrinsic::r600_read_tgid_x:
>> >>        return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
>> >>                                    AMDGPU::T1_X, VT);
>> >> diff --git a/lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp b/lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp
>> >> new file mode 100644
>> >> index 0000000..9727606
>> >> --- /dev/null
>> >> +++ b/lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp
>> >> @@ -0,0 +1,152 @@
>> >> +//===-- R600ImageAttributeIntrinsicsReplacer.cpp --------------------------===//
>> >> +//
>> >> +//                     The LLVM Compiler Infrastructure
>> >> +//
>> >> +// This file is distributed under the University of Illinois Open Source
>> >> +// License. See LICENSE.TXT for details.
>> >> +//
>> >> +//===----------------------------------------------------------------------===//
>> >> +//
>> >> +/// \file
>> >> +/// This pass replaces image attribute getter pseudointrinsics with the
>> >> +/// r600_read_image_attribute intrinsic. The pseudointrinsics are used to
>> >> +/// implement OpenCL C get_image_* builtins to avoid using mangled names here.
>> >> +///
>
> I think you can drop this last sentence.
>
>> >> +/// The r600_read_image_attribute intrinsic identifies the image in question
>> >> +/// using an index of the argument among image arguments. For each image
>> >> +/// argument, calls to getters using that particular argument are replaced
>> >> +/// with calls to the r600_read_image_attribute intrinsic. The image index is
>> >> +/// passed as a parameter along with the attribute index.
>> >> +//===----------------------------------------------------------------------===//
>> >> +
>> >> +#include "AMDGPU.h"
>> >> +#include "llvm/ADT/StringMap.h"
>> >> +#include "llvm/Analysis/Passes.h"
>> >> +#include "llvm/IR/Function.h"
>> >> +#include "llvm/IR/IRBuilder.h"
>> >> +#include "llvm/IR/Intrinsics.h"
>> >> +#include "llvm/IR/Module.h"
>> >> +
>> >> +#include <vector>
>> >> +
>> >> +using namespace llvm;
>> >> +
>> >> +namespace {
>> >> +
>> >> +enum ImageAttribute {
>> >> +  WIDTH = 0,
>> >> +  HEIGHT = 1,
>> >> +  DEPTH = 2,
>> >> +  CHANNEL_DATA_TYPE = 3,
>> >> +  CHANNEL_ORDER = 4
>> >> +};
>> >> +
>> >> +// Fixme: this should be replaced with initalizer_list initialization of
>> >> +// StringMap if and when it gets one.
>> >> +#define INS2MAP(m, a, b) ((m).insert(std::make_pair(a, b)))
>> >> +StringMap<ImageAttribute> InitAttributeFromIntrinsic() {
>> >> +  StringMap<ImageAttribute> M;
>> >> +  INS2MAP(M, "llvm.AMDGPU.get.image.width.2d", WIDTH);
>> >> +  INS2MAP(M, "llvm.AMDGPU.get.image.width.3d", WIDTH);
>> >> +  INS2MAP(M, "llvm.AMDGPU.get.image.height.2d", HEIGHT);
>> >> +  INS2MAP(M, "llvm.AMDGPU.get.image.height.3d", HEIGHT);
>> >> +  INS2MAP(M, "llvm.AMDGPU.get.image.depth.3d", DEPTH);
>> >> +  INS2MAP(M, "llvm.AMDGPU.get.image.channel.data.type.2d", CHANNEL_DATA_TYPE);
>> >> +  INS2MAP(M, "llvm.AMDGPU.get.image.channel.data.type.3d", CHANNEL_DATA_TYPE);
>> >> +  INS2MAP(M, "llvm.AMDGPU.get.image.channel.order.2d", CHANNEL_ORDER);
>> >> +  INS2MAP(M, "llvm.AMDGPU.get.image.channel.order.3d", CHANNEL_ORDER);
>
> Why do we need separate 2d and 3d intrinsics?
>
> -Tom
>> >> +  return M;
>> >> +}
>> >> +#undef INS2MAP
>> >> +auto AttributeFromIntrinsic = InitAttributeFromIntrinsic();
>> >> +
>> >> +class R600ImageAttributeIntrinsicsReplacer : public FunctionPass {
>> >> +  static char ID;
>> >> +
>> >> +  Type *Int32Type;
>> >> +  Function *ReadAttributeFunc;
>> >> +
>> >> +public:
>> >> +  R600ImageAttributeIntrinsicsReplacer() : FunctionPass(ID) {}
>> >> +
>> >> +  bool doInitialization(Module &M) override {
>> >> +    Int32Type = Type::getInt32Ty(M.getContext());
>> >> +
>> >> +    // Create Function for the image attribute reader intrinsic.
>> >> +    ReadAttributeFunc =
>> >> +        Intrinsic::getDeclaration(&M, Intrinsic::r600_read_image_attribute);
>> >> +
>> >> +    return true;
>> >> +  }
>> >> +
>> >> +  unsigned IsImageIntrinsicCall(const CallInst *CallInst,
>> >> +                                ImageAttribute &Attribute) {
>> >> +    StringRef Name = CallInst->getCalledFunction()->getName();
>> >> +    auto It = AttributeFromIntrinsic.find(Name);
>> >> +    if (It == AttributeFromIntrinsic.end()) {
>> >> +      return false;
>> >> +    } else {
>> >> +      Attribute = It->second;
>> >> +      return true;
>> >> +    }
>> >> +  }
>> >> +
>> >> +  bool runOnFunction(Function &F) override {
>> >> +    bool modified = false;
>> >> +    unsigned NumImageArgs = 0;
>> >> +
>> >> +    std::vector<Instruction *> InstsToErase;
>> >> +
>> >> +    for (const auto &Arg : F.args()) {
>> >> +
>> >> +      // Skip non-image types.
>> >> +      Type *ArgType = Arg.getType();
>> >> +      if (!ArgType->isPointerTy())
>> >> +        continue;
>> >> +      Type *ElemType = ArgType->getPointerElementType();
>> >> +      if (!ElemType->isStructTy())
>> >> +        continue;
>> >> +      const llvm::StringRef &TypeName = ElemType->getStructName();
>> >> +      if (!TypeName.startswith("opencl.image2d_t") &&
>> >> +          !TypeName.startswith("opencl.image3d_t"))
>> >> +        continue;
>> >> +      auto ImageIndex = NumImageArgs++;
>> >> +
>> >> +      // Iterate uses to find attribute getters.
>> >> +      for (const auto &Use : Arg.uses()) {
>> >> +
>> >> +        // Only process calls to attribute intrinsics.
>> >> +        auto Inst = dyn_cast<CallInst>(Use.getUser());
>> >> +        if (!Inst)
>> >> +          continue;
>> >> +        ImageAttribute AttributeIndex;
>> >> +        if (!IsImageIntrinsicCall(Inst, AttributeIndex))
>> >> +          continue;
>> >> +
>> >> +        // Replace the instruction with a call to the image attribute reader.
>> >> +        IRBuilder<> Builder(Inst);
>> >> +        Value *Args[] = {ConstantInt::get(Int32Type, ImageIndex),
>> >> +                         ConstantInt::get(Int32Type, AttributeIndex)};
>> >> +        Inst->replaceAllUsesWith(Builder.CreateCall(ReadAttributeFunc, Args));
>> >> +        InstsToErase.push_back(Inst);
>> >> +        modified = true;
>> >> +      }
>> >> +    }
>> >> +    for (unsigned i = 0; i < InstsToErase.size(); ++i) {
>> >> +      InstsToErase[i]->eraseFromParent();
>> >> +    }
>> >> +
>> >> +    return modified;
>> >> +  }
>> >> +
>> >> +  const char *getPassName() const override {
>> >> +    return "R600 Image Attribute Intrinsics Replacer";
>> >> +  }
>> >> +};
>> >> +
>> >> +char R600ImageAttributeIntrinsicsReplacer::ID = 0;
>> >> +}
>> >> +
>> >> +FunctionPass *llvm::createR600ImageAttributeIntrinsicsReplacer() {
>> >> +  return new R600ImageAttributeIntrinsicsReplacer();
>> >> +}
>> >> diff --git a/test/CodeGen/AMDGPU/image-attributes.ll b/test/CodeGen/AMDGPU/image-attributes.ll
>> >> new file mode 100644
>> >> index 0000000..bdde5c6
>> >> --- /dev/null
>> >> +++ b/test/CodeGen/AMDGPU/image-attributes.ll
>> >> @@ -0,0 +1,167 @@
>> >> +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
>> >> +
>> >> +; === WIDTH =================================================================
>> >> +; 9 implicit args + 2 explicit args + 1 grid dim + 3 grid offset = 15 dwords
>> >> +; First width at dword index 15+0 -> KC0[3].W
>> >> +
>> >> +; FUNC-LABEL: {{^}}width_2d:
>> >> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
>> >> +; EG: MOV [[VAL]], KC0[3].W
>> >> +define void @width_2d (%opencl.image2d_t addrspace(1)* %in,
>> >> +                                 i32 addrspace(1)* %out) {
>> >> +entry:
>> >> +  %0 = call i32 @llvm.AMDGPU.get.image.width.2d(
>> >> +      %opencl.image2d_t addrspace(1)* %in) #0
>> >> +  store i32 %0, i32 addrspace(1)* %out
>> >> +  ret void
>> >> +}
>> >> +
>> >> +; FUNC-LABEL: {{^}}width_3d:
>> >> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
>> >> +; EG: MOV [[VAL]], KC0[3].W
>> >> +define void @width_3d (%opencl.image3d_t addrspace(1)* %in,
>> >> +                                 i32 addrspace(1)* %out) {
>> >> +entry:
>> >> +  %0 = call i32 @llvm.AMDGPU.get.image.width.3d(
>> >> +      %opencl.image3d_t addrspace(1)* %in) #0
>> >> +  store i32 %0, i32 addrspace(1)* %out
>> >> +  ret void
>> >> +}
>> >> +
>> >> +
>> >> +; === HEIGHT ================================================================
>> >> +; First height at dword index 15+1 -> KC0[4].X
>> >> +
>> >> +; FUNC-LABEL: {{^}}height_2d:
>> >> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
>> >> +; EG: MOV [[VAL]], KC0[4].X
>> >> +define void @height_2d (%opencl.image2d_t addrspace(1)* %in,
>> >> +                                  i32 addrspace(1)* %out) {
>> >> +entry:
>> >> +  %0 = call i32 @llvm.AMDGPU.get.image.height.2d(
>> >> +      %opencl.image2d_t addrspace(1)* %in) #0
>> >> +  store i32 %0, i32 addrspace(1)* %out
>> >> +  ret void
>> >> +}
>> >> +
>> >> +; FUNC-LABEL: {{^}}height_3d:
>> >> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
>> >> +; EG: MOV [[VAL]], KC0[4].X
>> >> +define void @height_3d (%opencl.image3d_t addrspace(1)* %in,
>> >> +                                  i32 addrspace(1)* %out) {
>> >> +entry:
>> >> +  %0 = call i32 @llvm.AMDGPU.get.image.height.3d(
>> >> +      %opencl.image3d_t addrspace(1)* %in) #0
>> >> +  store i32 %0, i32 addrspace(1)* %out
>> >> +  ret void
>> >> +}
>> >> +
>> >> +
>> >> +; === DEPTH ================================================================
>> >> +; First depth at dword index 15+2 -> KC0[4].Y
>> >> +
>> >> +; FUNC-LABEL: {{^}}depth_3d:
>> >> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
>> >> +; EG: MOV [[VAL]], KC0[4].Y
>> >> +define void @depth_3d (%opencl.image3d_t addrspace(1)* %in,
>> >> +                                 i32 addrspace(1)* %out) {
>> >> +entry:
>> >> +  %0 = call i32 @llvm.AMDGPU.get.image.depth.3d(
>> >> +      %opencl.image3d_t addrspace(1)* %in) #0
>> >> +  store i32 %0, i32 addrspace(1)* %out
>> >> +  ret void
>> >> +}
>> >> +
>> >> +
>> >> +; === CHANNEL DATA TYPE =====================================================
>> >> +; First channel data type at dword index 15+4 -> KC0[4].Z
>> >> +
>> >> +; FUNC-LABEL: {{^}}data_type_2d:
>> >> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
>> >> +; EG: MOV [[VAL]], KC0[4].Z
>> >> +define void @data_type_2d (%opencl.image2d_t addrspace(1)* %in,
>> >> +                                     i32 addrspace(1)* %out) {
>> >> +entry:
>> >> +  %0 = call i32 @llvm.AMDGPU.get.image.channel.data.type.2d(
>> >> +      %opencl.image2d_t addrspace(1)* %in) #0
>> >> +  store i32 %0, i32 addrspace(1)* %out
>> >> +  ret void
>> >> +}
>> >> +
>> >> +; FUNC-LABEL: {{^}}data_type_3d:
>> >> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
>> >> +; EG: MOV [[VAL]], KC0[4].Z
>> >> +define void @data_type_3d (%opencl.image3d_t addrspace(1)* %in,
>> >> +                                     i32 addrspace(1)* %out) {
>> >> +entry:
>> >> +  %0 = call i32 @llvm.AMDGPU.get.image.channel.data.type.3d(
>> >> +      %opencl.image3d_t addrspace(1)* %in) #0
>> >> +  store i32 %0, i32 addrspace(1)* %out
>> >> +  ret void
>> >> +}
>> >> +
>> >> +
>> >> +; === CHANNEL ORDER =====================================================
>> >> +; First channel order at dword index 15+4 -> KC0[4].W
>> >> +
>> >> +; FUNC-LABEL: {{^}}channel_order_2d:
>> >> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
>> >> +; EG: MOV [[VAL]], KC0[4].W
>> >> +define void @channel_order_2d (%opencl.image2d_t addrspace(1)* %in,
>> >> +                                         i32 addrspace(1)* %out) {
>> >> +entry:
>> >> +  %0 = call i32 @llvm.AMDGPU.get.image.channel.order.2d(
>> >> +      %opencl.image2d_t addrspace(1)* %in) #0
>> >> +  store i32 %0, i32 addrspace(1)* %out
>> >> +  ret void
>> >> +}
>> >> +
>> >> +; FUNC-LABEL: {{^}}channel_order_3d:
>> >> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
>> >> +; EG: MOV [[VAL]], KC0[4].W
>> >> +define void @channel_order_3d (%opencl.image3d_t addrspace(1)* %in,
>> >> +                                         i32 addrspace(1)* %out) {
>> >> +entry:
>> >> +  %0 = call i32 @llvm.AMDGPU.get.image.channel.order.3d(
>> >> +      %opencl.image3d_t addrspace(1)* %in) #0
>> >> +  store i32 %0, i32 addrspace(1)* %out
>> >> +  ret void
>> >> +}
>> >> +
>> >> +
>> >> +; === 2ND IMAGE ==============================================================
>> >> +; 9 implicit args + 3 explicit args + 1 grid dim + 3 grid offset = 16 dwords
>> >> +; 16 dwords to first image attrib + 5 attribs for first image = 21 dwords
>> >> +; Height of the second image is at 21+1 -> KC0[5].Z
>> >> +;
>> >> +; FUNC-LABEL: {{^}}image_arg_2nd:
>> >> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
>> >> +; EG: MOV [[VAL]], KC0[5].Z
>> >> +define void @image_arg_2nd (%opencl.image3d_t addrspace(1)* %in1,
>> >> +                            %opencl.image2d_t addrspace(1)* %in2,
>> >> +                            i32 addrspace(1)* %out) {
>> >> +entry:
>> >> +  %0 = call i32 @llvm.AMDGPU.get.image.height.2d(
>> >> +      %opencl.image2d_t addrspace(1)* %in2) #0
>> >> +  store i32 %0, i32 addrspace(1)* %out
>> >> +  ret void
>> >> +}
>> >> +
>> >> +%opencl.image2d_t = type opaque
>> >> +%opencl.image3d_t = type opaque
>> >> +
>> >> +declare i32 @llvm.AMDGPU.get.image.width.2d(%opencl.image2d_t addrspace(1)*) #0
>> >> +declare i32 @llvm.AMDGPU.get.image.width.3d(%opencl.image3d_t addrspace(1)*) #0
>> >> +declare i32 @llvm.AMDGPU.get.image.height.2d(%opencl.image2d_t addrspace(1)*) #0
>> >> +declare i32 @llvm.AMDGPU.get.image.height.3d(%opencl.image3d_t addrspace(1)*) #0
>> >> +declare i32 @llvm.AMDGPU.get.image.depth.3d(%opencl.image3d_t addrspace(1)*) #0
>> >> +declare i32 @llvm.AMDGPU.get.image.channel.data.type.2d(
>> >> +    %opencl.image2d_t addrspace(1)*) #0
>> >> +declare i32 @llvm.AMDGPU.get.image.channel.data.type.3d(
>> >> +    %opencl.image3d_t addrspace(1)*) #0
>> >> +declare i32 @llvm.AMDGPU.get.image.channel.order.2d(
>> >> +    %opencl.image2d_t addrspace(1)*) #0
>> >> +declare i32 @llvm.AMDGPU.get.image.channel.order.3d(
>> >> +    %opencl.image3d_t addrspace(1)*) #0
>> >> +
>> >> +attributes #0 = { readnone }
>> >> --
>> >> 2.4.2
>> >>