[llvm] Image attribute access for the AMDGPU backend

Tue Jul 7 12:33:14 PDT 2015

On Thu, Jun 18, 2015 at 01:27:27PM +0200, Zoltan Gilian wrote:
> Added an intrinsic to load an image attribute stored as an implicit kernel
> argument.
> Added a pass to the AMDGPU backend to replace image attribute getter
> pseudointrinsics to the new image attribute reader intrinsic.

Hi,

Why not expose a high-level builtin for each image intrinsic
rather than using using one builtin and having a conversion pass?

-Tom
> ---
>  include/llvm/IR/IntrinsicsR600.td                  |   5 +
>  lib/Target/AMDGPU/AMDGPU.h                         |   1 +
>  lib/Target/AMDGPU/AMDGPUTargetMachine.cpp          |   1 +
>  lib/Target/AMDGPU/R600ISelLowering.cpp             |  14 ++
>  .../R600ImageAttributeIntrinsicsReplacer.cpp       | 152 +++++++++++++++++++
>  test/CodeGen/AMDGPU/image-attributes.ll            | 167 +++++++++++++++++++++
>  6 files changed, 340 insertions(+)
>  create mode 100644 lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp
>  create mode 100644 test/CodeGen/AMDGPU/image-attributes.ll
> 
> diff --git a/include/llvm/IR/IntrinsicsR600.td b/include/llvm/IR/IntrinsicsR600.td
> index 5055667..635cf16 100644
> --- a/include/llvm/IR/IntrinsicsR600.td
> +++ b/include/llvm/IR/IntrinsicsR600.td
> @@ -33,6 +33,11 @@ defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
>                                         "__builtin_r600_read_tgid">;
>  defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
>                                         "__builtin_r600_read_tidig">;
> +
> +def int_r600_read_image_attribute
> +  : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>,
> +    GCCBuiltin<"__builtin_r600_read_image_attribute">;
> +
>  } // End TargetPrefix = "r600"
>  
>  let TargetPrefix = "AMDGPU" in {
> diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
> index 0a05d25..4b5c5aa 100644
> --- a/lib/Target/AMDGPU/AMDGPU.h
> +++ b/lib/Target/AMDGPU/AMDGPU.h
> @@ -27,6 +27,7 @@ class TargetMachine;
>  
>  // R600 Passes
>  FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
> +FunctionPass *createR600ImageAttributeIntrinsicsReplacer();
>  FunctionPass *createR600TextureIntrinsicsReplacer();
>  FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
>  FunctionPass *createR600EmitClauseMarkers();
> diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
> index a9a911a..89285ba 100644
> --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
> +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
> @@ -202,6 +202,7 @@ bool AMDGPUPassConfig::addInstSelector() {
>  
>  bool R600PassConfig::addPreISel() {
>    AMDGPUPassConfig::addPreISel();
> +  addPass(createR600ImageAttributeIntrinsicsReplacer());
>    addPass(createR600TextureIntrinsicsReplacer());
>    return false;
>  }
> diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
> index 8357b6d..8ef1ad8 100644
> --- a/lib/Target/AMDGPU/R600ISelLowering.cpp
> +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
> @@ -818,6 +818,20 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
>      case Intrinsic::AMDGPU_read_workdim:
>        return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
>  
> +    case Intrinsic::r600_read_image_attribute: {
> +      // operand 0: image index
> +      // operand 1: attribute index
> +
> +      uint64_t DWordOffset = MFI->ABIArgOffset / 4;
> +      // Skip grid dim and grid offset.
> +      DWordOffset += 4;
> +      // There are 5 dword attributes per image.
> +      DWordOffset += 5 * Op.getConstantOperandVal(1);
> +      // Skip to the requested attribute.
> +      DWordOffset += Op.getConstantOperandVal(2);
> +      return LowerImplicitParameter(DAG, VT, DL, DWordOffset);
> +    }
> +
>      case Intrinsic::r600_read_tgid_x:
>        return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
>                                    AMDGPU::T1_X, VT);
> diff --git a/lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp b/lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp
> new file mode 100644
> index 0000000..9727606
> --- /dev/null
> +++ b/lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp
> @@ -0,0 +1,152 @@
> +//===-- R600ImageAttributeIntrinsicsReplacer.cpp --------------------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// This pass replaces image attribute getter pseudointrinsics with the
> +/// r600_read_image_attribute intrinsic. The pseudointrinsics are used to
> +/// implement OpenCL C get_image_* builtins to avoid using mangled names here.
> +///
> +/// The r600_read_image_attribute intrinsic identifies the image in question
> +/// using an index of the argument among image arguments. For each image
> +/// argument, calls to getters using that particular argument are replaced
> +/// with calls to the r600_read_image_attribute intrinsic. The image index is
> +/// passed as a parameter along with the attribute index.
> +//===----------------------------------------------------------------------===//
> +
> +#include "AMDGPU.h"
> +#include "llvm/ADT/StringMap.h"
> +#include "llvm/Analysis/Passes.h"
> +#include "llvm/IR/Function.h"
> +#include "llvm/IR/IRBuilder.h"
> +#include "llvm/IR/Intrinsics.h"
> +#include "llvm/IR/Module.h"
> +
> +#include <vector>
> +
> +using namespace llvm;
> +
> +namespace {
> +
> +enum ImageAttribute {
> +  WIDTH = 0,
> +  HEIGHT = 1,
> +  DEPTH = 2,
> +  CHANNEL_DATA_TYPE = 3,
> +  CHANNEL_ORDER = 4
> +};
> +
> +// Fixme: this should be replaced with initalizer_list initialization of
> +// StringMap if and when it gets one.
> +#define INS2MAP(m, a, b) ((m).insert(std::make_pair(a, b)))
> +StringMap<ImageAttribute> InitAttributeFromIntrinsic() {
> +  StringMap<ImageAttribute> M;
> +  INS2MAP(M, "llvm.AMDGPU.get.image.width.2d", WIDTH);
> +  INS2MAP(M, "llvm.AMDGPU.get.image.width.3d", WIDTH);
> +  INS2MAP(M, "llvm.AMDGPU.get.image.height.2d", HEIGHT);
> +  INS2MAP(M, "llvm.AMDGPU.get.image.height.3d", HEIGHT);
> +  INS2MAP(M, "llvm.AMDGPU.get.image.depth.3d", DEPTH);
> +  INS2MAP(M, "llvm.AMDGPU.get.image.channel.data.type.2d", CHANNEL_DATA_TYPE);
> +  INS2MAP(M, "llvm.AMDGPU.get.image.channel.data.type.3d", CHANNEL_DATA_TYPE);
> +  INS2MAP(M, "llvm.AMDGPU.get.image.channel.order.2d", CHANNEL_ORDER);
> +  INS2MAP(M, "llvm.AMDGPU.get.image.channel.order.3d", CHANNEL_ORDER);
> +  return M;
> +}
> +#undef INS2MAP
> +auto AttributeFromIntrinsic = InitAttributeFromIntrinsic();
> +
> +class R600ImageAttributeIntrinsicsReplacer : public FunctionPass {
> +  static char ID;
> +
> +  Type *Int32Type;
> +  Function *ReadAttributeFunc;
> +
> +public:
> +  R600ImageAttributeIntrinsicsReplacer() : FunctionPass(ID) {}
> +
> +  bool doInitialization(Module &M) override {
> +    Int32Type = Type::getInt32Ty(M.getContext());
> +
> +    // Create Function for the image attribute reader intrinsic.
> +    ReadAttributeFunc =
> +        Intrinsic::getDeclaration(&M, Intrinsic::r600_read_image_attribute);
> +
> +    return true;
> +  }
> +
> +  unsigned IsImageIntrinsicCall(const CallInst *CallInst,
> +                                ImageAttribute &Attribute) {
> +    StringRef Name = CallInst->getCalledFunction()->getName();
> +    auto It = AttributeFromIntrinsic.find(Name);
> +    if (It == AttributeFromIntrinsic.end()) {
> +      return false;
> +    } else {
> +      Attribute = It->second;
> +      return true;
> +    }
> +  }
> +
> +  bool runOnFunction(Function &F) override {
> +    bool modified = false;
> +    unsigned NumImageArgs = 0;
> +
> +    std::vector<Instruction *> InstsToErase;
> +
> +    for (const auto &Arg : F.args()) {
> +
> +      // Skip non-image types.
> +      Type *ArgType = Arg.getType();
> +      if (!ArgType->isPointerTy())
> +        continue;
> +      Type *ElemType = ArgType->getPointerElementType();
> +      if (!ElemType->isStructTy())
> +        continue;
> +      const llvm::StringRef &TypeName = ElemType->getStructName();
> +      if (!TypeName.startswith("opencl.image2d_t") &&
> +          !TypeName.startswith("opencl.image3d_t"))
> +        continue;
> +      auto ImageIndex = NumImageArgs++;
> +
> +      // Iterate uses to find attribute getters.
> +      for (const auto &Use : Arg.uses()) {
> +
> +        // Only process calls to attribute intrinsics.
> +        auto Inst = dyn_cast<CallInst>(Use.getUser());
> +        if (!Inst)
> +          continue;
> +        ImageAttribute AttributeIndex;
> +        if (!IsImageIntrinsicCall(Inst, AttributeIndex))
> +          continue;
> +
> +        // Replace the instruction with a call to the image attribute reader.
> +        IRBuilder<> Builder(Inst);
> +        Value *Args[] = {ConstantInt::get(Int32Type, ImageIndex),
> +                         ConstantInt::get(Int32Type, AttributeIndex)};
> +        Inst->replaceAllUsesWith(Builder.CreateCall(ReadAttributeFunc, Args));
> +        InstsToErase.push_back(Inst);
> +        modified = true;
> +      }
> +    }
> +    for (unsigned i = 0; i < InstsToErase.size(); ++i) {
> +      InstsToErase[i]->eraseFromParent();
> +    }
> +
> +    return modified;
> +  }
> +
> +  const char *getPassName() const override {
> +    return "R600 Image Attribute Intrinsics Replacer";
> +  }
> +};
> +
> +char R600ImageAttributeIntrinsicsReplacer::ID = 0;
> +}
> +
> +FunctionPass *llvm::createR600ImageAttributeIntrinsicsReplacer() {
> +  return new R600ImageAttributeIntrinsicsReplacer();
> +}
> diff --git a/test/CodeGen/AMDGPU/image-attributes.ll b/test/CodeGen/AMDGPU/image-attributes.ll
> new file mode 100644
> index 0000000..bdde5c6
> --- /dev/null
> +++ b/test/CodeGen/AMDGPU/image-attributes.ll
> @@ -0,0 +1,167 @@
> +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
> +
> +; === WIDTH =================================================================
> +; 9 implicit args + 2 explicit args + 1 grid dim + 3 grid offset = 15 dwords
> +; First width at dword index 15+0 -> KC0[3].W
> +
> +; FUNC-LABEL: {{^}}width_2d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[3].W
> +define void @width_2d (%opencl.image2d_t addrspace(1)* %in,
> +                                 i32 addrspace(1)* %out) {
> +entry:
> +  %0 = call i32 @llvm.AMDGPU.get.image.width.2d(
> +      %opencl.image2d_t addrspace(1)* %in) #0
> +  store i32 %0, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FUNC-LABEL: {{^}}width_3d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[3].W
> +define void @width_3d (%opencl.image3d_t addrspace(1)* %in,
> +                                 i32 addrspace(1)* %out) {
> +entry:
> +  %0 = call i32 @llvm.AMDGPU.get.image.width.3d(
> +      %opencl.image3d_t addrspace(1)* %in) #0
> +  store i32 %0, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +
> +; === HEIGHT ================================================================
> +; First height at dword index 15+1 -> KC0[4].X
> +
> +; FUNC-LABEL: {{^}}height_2d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[4].X
> +define void @height_2d (%opencl.image2d_t addrspace(1)* %in,
> +                                  i32 addrspace(1)* %out) {
> +entry:
> +  %0 = call i32 @llvm.AMDGPU.get.image.height.2d(
> +      %opencl.image2d_t addrspace(1)* %in) #0
> +  store i32 %0, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FUNC-LABEL: {{^}}height_3d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[4].X
> +define void @height_3d (%opencl.image3d_t addrspace(1)* %in,
> +                                  i32 addrspace(1)* %out) {
> +entry:
> +  %0 = call i32 @llvm.AMDGPU.get.image.height.3d(
> +      %opencl.image3d_t addrspace(1)* %in) #0
> +  store i32 %0, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +
> +; === DEPTH ================================================================
> +; First depth at dword index 15+2 -> KC0[4].Y
> +
> +; FUNC-LABEL: {{^}}depth_3d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[4].Y
> +define void @depth_3d (%opencl.image3d_t addrspace(1)* %in,
> +                                 i32 addrspace(1)* %out) {
> +entry:
> +  %0 = call i32 @llvm.AMDGPU.get.image.depth.3d(
> +      %opencl.image3d_t addrspace(1)* %in) #0
> +  store i32 %0, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +
> +; === CHANNEL DATA TYPE =====================================================
> +; First channel data type at dword index 15+4 -> KC0[4].Z
> +
> +; FUNC-LABEL: {{^}}data_type_2d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[4].Z
> +define void @data_type_2d (%opencl.image2d_t addrspace(1)* %in,
> +                                     i32 addrspace(1)* %out) {
> +entry:
> +  %0 = call i32 @llvm.AMDGPU.get.image.channel.data.type.2d(
> +      %opencl.image2d_t addrspace(1)* %in) #0
> +  store i32 %0, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FUNC-LABEL: {{^}}data_type_3d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[4].Z
> +define void @data_type_3d (%opencl.image3d_t addrspace(1)* %in,
> +                                     i32 addrspace(1)* %out) {
> +entry:
> +  %0 = call i32 @llvm.AMDGPU.get.image.channel.data.type.3d(
> +      %opencl.image3d_t addrspace(1)* %in) #0
> +  store i32 %0, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +
> +; === CHANNEL ORDER =====================================================
> +; First channel order at dword index 15+4 -> KC0[4].W
> +
> +; FUNC-LABEL: {{^}}channel_order_2d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[4].W
> +define void @channel_order_2d (%opencl.image2d_t addrspace(1)* %in,
> +                                         i32 addrspace(1)* %out) {
> +entry:
> +  %0 = call i32 @llvm.AMDGPU.get.image.channel.order.2d(
> +      %opencl.image2d_t addrspace(1)* %in) #0
> +  store i32 %0, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FUNC-LABEL: {{^}}channel_order_3d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[4].W
> +define void @channel_order_3d (%opencl.image3d_t addrspace(1)* %in,
> +                                         i32 addrspace(1)* %out) {
> +entry:
> +  %0 = call i32 @llvm.AMDGPU.get.image.channel.order.3d(
> +      %opencl.image3d_t addrspace(1)* %in) #0
> +  store i32 %0, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +
> +; === 2ND IMAGE ==============================================================
> +; 9 implicit args + 3 explicit args + 1 grid dim + 3 grid offset = 16 dwords
> +; 16 dwords to first image attrib + 5 attribs for first image = 21 dwords
> +; Height of the second image is at 21+1 -> KC0[5].Z
> +;
> +; FUNC-LABEL: {{^}}image_arg_2nd:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[5].Z
> +define void @image_arg_2nd (%opencl.image3d_t addrspace(1)* %in1,
> +                            %opencl.image2d_t addrspace(1)* %in2,
> +                            i32 addrspace(1)* %out) {
> +entry:
> +  %0 = call i32 @llvm.AMDGPU.get.image.height.2d(
> +      %opencl.image2d_t addrspace(1)* %in2) #0
> +  store i32 %0, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +%opencl.image2d_t = type opaque
> +%opencl.image3d_t = type opaque
> +
> +declare i32 @llvm.AMDGPU.get.image.width.2d(%opencl.image2d_t addrspace(1)*) #0
> +declare i32 @llvm.AMDGPU.get.image.width.3d(%opencl.image3d_t addrspace(1)*) #0
> +declare i32 @llvm.AMDGPU.get.image.height.2d(%opencl.image2d_t addrspace(1)*) #0
> +declare i32 @llvm.AMDGPU.get.image.height.3d(%opencl.image3d_t addrspace(1)*) #0
> +declare i32 @llvm.AMDGPU.get.image.depth.3d(%opencl.image3d_t addrspace(1)*) #0
> +declare i32 @llvm.AMDGPU.get.image.channel.data.type.2d(
> +    %opencl.image2d_t addrspace(1)*) #0
> +declare i32 @llvm.AMDGPU.get.image.channel.data.type.3d(
> +    %opencl.image3d_t addrspace(1)*) #0
> +declare i32 @llvm.AMDGPU.get.image.channel.order.2d(
> +    %opencl.image2d_t addrspace(1)*) #0
> +declare i32 @llvm.AMDGPU.get.image.channel.order.3d(
> +    %opencl.image3d_t addrspace(1)*) #0
> +
> +attributes #0 = { readnone }
> -- 
> 2.4.2
>