[llvm] Image attribute access for the AMDGPU backend
Tom Stellard
tom at stellard.net
Tue Jul 7 12:33:14 PDT 2015
On Thu, Jun 18, 2015 at 01:27:27PM +0200, Zoltan Gilian wrote:
> Added an intrinsic to load an image attribute stored as an implicit kernel
> argument.
> Added a pass to the AMDGPU backend to replace image attribute getter
> pseudointrinsics to the new image attribute reader intrinsic.
Hi,
Why not expose a high-level builtin for each image intrinsic
rather than using using one builtin and having a conversion pass?
-Tom
> ---
> include/llvm/IR/IntrinsicsR600.td | 5 +
> lib/Target/AMDGPU/AMDGPU.h | 1 +
> lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 1 +
> lib/Target/AMDGPU/R600ISelLowering.cpp | 14 ++
> .../R600ImageAttributeIntrinsicsReplacer.cpp | 152 +++++++++++++++++++
> test/CodeGen/AMDGPU/image-attributes.ll | 167 +++++++++++++++++++++
> 6 files changed, 340 insertions(+)
> create mode 100644 lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp
> create mode 100644 test/CodeGen/AMDGPU/image-attributes.ll
>
> diff --git a/include/llvm/IR/IntrinsicsR600.td b/include/llvm/IR/IntrinsicsR600.td
> index 5055667..635cf16 100644
> --- a/include/llvm/IR/IntrinsicsR600.td
> +++ b/include/llvm/IR/IntrinsicsR600.td
> @@ -33,6 +33,11 @@ defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
> "__builtin_r600_read_tgid">;
> defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
> "__builtin_r600_read_tidig">;
> +
> +def int_r600_read_image_attribute
> + : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>,
> + GCCBuiltin<"__builtin_r600_read_image_attribute">;
> +
> } // End TargetPrefix = "r600"
>
> let TargetPrefix = "AMDGPU" in {
> diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
> index 0a05d25..4b5c5aa 100644
> --- a/lib/Target/AMDGPU/AMDGPU.h
> +++ b/lib/Target/AMDGPU/AMDGPU.h
> @@ -27,6 +27,7 @@ class TargetMachine;
>
> // R600 Passes
> FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
> +FunctionPass *createR600ImageAttributeIntrinsicsReplacer();
> FunctionPass *createR600TextureIntrinsicsReplacer();
> FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
> FunctionPass *createR600EmitClauseMarkers();
> diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
> index a9a911a..89285ba 100644
> --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
> +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
> @@ -202,6 +202,7 @@ bool AMDGPUPassConfig::addInstSelector() {
>
> bool R600PassConfig::addPreISel() {
> AMDGPUPassConfig::addPreISel();
> + addPass(createR600ImageAttributeIntrinsicsReplacer());
> addPass(createR600TextureIntrinsicsReplacer());
> return false;
> }
> diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
> index 8357b6d..8ef1ad8 100644
> --- a/lib/Target/AMDGPU/R600ISelLowering.cpp
> +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
> @@ -818,6 +818,20 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
> case Intrinsic::AMDGPU_read_workdim:
> return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
>
> + case Intrinsic::r600_read_image_attribute: {
> + // operand 0: image index
> + // operand 1: attribute index
> +
> + uint64_t DWordOffset = MFI->ABIArgOffset / 4;
> + // Skip grid dim and grid offset.
> + DWordOffset += 4;
> + // There are 5 dword attributes per image.
> + DWordOffset += 5 * Op.getConstantOperandVal(1);
> + // Skip to the requested attribute.
> + DWordOffset += Op.getConstantOperandVal(2);
> + return LowerImplicitParameter(DAG, VT, DL, DWordOffset);
> + }
> +
> case Intrinsic::r600_read_tgid_x:
> return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
> AMDGPU::T1_X, VT);
> diff --git a/lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp b/lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp
> new file mode 100644
> index 0000000..9727606
> --- /dev/null
> +++ b/lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp
> @@ -0,0 +1,152 @@
> +//===-- R600ImageAttributeIntrinsicsReplacer.cpp --------------------------===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// This pass replaces image attribute getter pseudointrinsics with the
> +/// r600_read_image_attribute intrinsic. The pseudointrinsics are used to
> +/// implement OpenCL C get_image_* builtins to avoid using mangled names here.
> +///
> +/// The r600_read_image_attribute intrinsic identifies the image in question
> +/// using an index of the argument among image arguments. For each image
> +/// argument, calls to getters using that particular argument are replaced
> +/// with calls to the r600_read_image_attribute intrinsic. The image index is
> +/// passed as a parameter along with the attribute index.
> +//===----------------------------------------------------------------------===//
> +
> +#include "AMDGPU.h"
> +#include "llvm/ADT/StringMap.h"
> +#include "llvm/Analysis/Passes.h"
> +#include "llvm/IR/Function.h"
> +#include "llvm/IR/IRBuilder.h"
> +#include "llvm/IR/Intrinsics.h"
> +#include "llvm/IR/Module.h"
> +
> +#include <vector>
> +
> +using namespace llvm;
> +
> +namespace {
> +
> +enum ImageAttribute {
> + WIDTH = 0,
> + HEIGHT = 1,
> + DEPTH = 2,
> + CHANNEL_DATA_TYPE = 3,
> + CHANNEL_ORDER = 4
> +};
> +
> +// Fixme: this should be replaced with initalizer_list initialization of
> +// StringMap if and when it gets one.
> +#define INS2MAP(m, a, b) ((m).insert(std::make_pair(a, b)))
> +StringMap<ImageAttribute> InitAttributeFromIntrinsic() {
> + StringMap<ImageAttribute> M;
> + INS2MAP(M, "llvm.AMDGPU.get.image.width.2d", WIDTH);
> + INS2MAP(M, "llvm.AMDGPU.get.image.width.3d", WIDTH);
> + INS2MAP(M, "llvm.AMDGPU.get.image.height.2d", HEIGHT);
> + INS2MAP(M, "llvm.AMDGPU.get.image.height.3d", HEIGHT);
> + INS2MAP(M, "llvm.AMDGPU.get.image.depth.3d", DEPTH);
> + INS2MAP(M, "llvm.AMDGPU.get.image.channel.data.type.2d", CHANNEL_DATA_TYPE);
> + INS2MAP(M, "llvm.AMDGPU.get.image.channel.data.type.3d", CHANNEL_DATA_TYPE);
> + INS2MAP(M, "llvm.AMDGPU.get.image.channel.order.2d", CHANNEL_ORDER);
> + INS2MAP(M, "llvm.AMDGPU.get.image.channel.order.3d", CHANNEL_ORDER);
> + return M;
> +}
> +#undef INS2MAP
> +auto AttributeFromIntrinsic = InitAttributeFromIntrinsic();
> +
> +class R600ImageAttributeIntrinsicsReplacer : public FunctionPass {
> + static char ID;
> +
> + Type *Int32Type;
> + Function *ReadAttributeFunc;
> +
> +public:
> + R600ImageAttributeIntrinsicsReplacer() : FunctionPass(ID) {}
> +
> + bool doInitialization(Module &M) override {
> + Int32Type = Type::getInt32Ty(M.getContext());
> +
> + // Create Function for the image attribute reader intrinsic.
> + ReadAttributeFunc =
> + Intrinsic::getDeclaration(&M, Intrinsic::r600_read_image_attribute);
> +
> + return true;
> + }
> +
> + unsigned IsImageIntrinsicCall(const CallInst *CallInst,
> + ImageAttribute &Attribute) {
> + StringRef Name = CallInst->getCalledFunction()->getName();
> + auto It = AttributeFromIntrinsic.find(Name);
> + if (It == AttributeFromIntrinsic.end()) {
> + return false;
> + } else {
> + Attribute = It->second;
> + return true;
> + }
> + }
> +
> + bool runOnFunction(Function &F) override {
> + bool modified = false;
> + unsigned NumImageArgs = 0;
> +
> + std::vector<Instruction *> InstsToErase;
> +
> + for (const auto &Arg : F.args()) {
> +
> + // Skip non-image types.
> + Type *ArgType = Arg.getType();
> + if (!ArgType->isPointerTy())
> + continue;
> + Type *ElemType = ArgType->getPointerElementType();
> + if (!ElemType->isStructTy())
> + continue;
> + const llvm::StringRef &TypeName = ElemType->getStructName();
> + if (!TypeName.startswith("opencl.image2d_t") &&
> + !TypeName.startswith("opencl.image3d_t"))
> + continue;
> + auto ImageIndex = NumImageArgs++;
> +
> + // Iterate uses to find attribute getters.
> + for (const auto &Use : Arg.uses()) {
> +
> + // Only process calls to attribute intrinsics.
> + auto Inst = dyn_cast<CallInst>(Use.getUser());
> + if (!Inst)
> + continue;
> + ImageAttribute AttributeIndex;
> + if (!IsImageIntrinsicCall(Inst, AttributeIndex))
> + continue;
> +
> + // Replace the instruction with a call to the image attribute reader.
> + IRBuilder<> Builder(Inst);
> + Value *Args[] = {ConstantInt::get(Int32Type, ImageIndex),
> + ConstantInt::get(Int32Type, AttributeIndex)};
> + Inst->replaceAllUsesWith(Builder.CreateCall(ReadAttributeFunc, Args));
> + InstsToErase.push_back(Inst);
> + modified = true;
> + }
> + }
> + for (unsigned i = 0; i < InstsToErase.size(); ++i) {
> + InstsToErase[i]->eraseFromParent();
> + }
> +
> + return modified;
> + }
> +
> + const char *getPassName() const override {
> + return "R600 Image Attribute Intrinsics Replacer";
> + }
> +};
> +
> +char R600ImageAttributeIntrinsicsReplacer::ID = 0;
> +}
> +
> +FunctionPass *llvm::createR600ImageAttributeIntrinsicsReplacer() {
> + return new R600ImageAttributeIntrinsicsReplacer();
> +}
> diff --git a/test/CodeGen/AMDGPU/image-attributes.ll b/test/CodeGen/AMDGPU/image-attributes.ll
> new file mode 100644
> index 0000000..bdde5c6
> --- /dev/null
> +++ b/test/CodeGen/AMDGPU/image-attributes.ll
> @@ -0,0 +1,167 @@
> +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
> +
> +; === WIDTH =================================================================
> +; 9 implicit args + 2 explicit args + 1 grid dim + 3 grid offset = 15 dwords
> +; First width at dword index 15+0 -> KC0[3].W
> +
> +; FUNC-LABEL: {{^}}width_2d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[3].W
> +define void @width_2d (%opencl.image2d_t addrspace(1)* %in,
> + i32 addrspace(1)* %out) {
> +entry:
> + %0 = call i32 @llvm.AMDGPU.get.image.width.2d(
> + %opencl.image2d_t addrspace(1)* %in) #0
> + store i32 %0, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +; FUNC-LABEL: {{^}}width_3d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[3].W
> +define void @width_3d (%opencl.image3d_t addrspace(1)* %in,
> + i32 addrspace(1)* %out) {
> +entry:
> + %0 = call i32 @llvm.AMDGPU.get.image.width.3d(
> + %opencl.image3d_t addrspace(1)* %in) #0
> + store i32 %0, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +
> +; === HEIGHT ================================================================
> +; First height at dword index 15+1 -> KC0[4].X
> +
> +; FUNC-LABEL: {{^}}height_2d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[4].X
> +define void @height_2d (%opencl.image2d_t addrspace(1)* %in,
> + i32 addrspace(1)* %out) {
> +entry:
> + %0 = call i32 @llvm.AMDGPU.get.image.height.2d(
> + %opencl.image2d_t addrspace(1)* %in) #0
> + store i32 %0, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +; FUNC-LABEL: {{^}}height_3d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[4].X
> +define void @height_3d (%opencl.image3d_t addrspace(1)* %in,
> + i32 addrspace(1)* %out) {
> +entry:
> + %0 = call i32 @llvm.AMDGPU.get.image.height.3d(
> + %opencl.image3d_t addrspace(1)* %in) #0
> + store i32 %0, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +
> +; === DEPTH ================================================================
> +; First depth at dword index 15+2 -> KC0[4].Y
> +
> +; FUNC-LABEL: {{^}}depth_3d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[4].Y
> +define void @depth_3d (%opencl.image3d_t addrspace(1)* %in,
> + i32 addrspace(1)* %out) {
> +entry:
> + %0 = call i32 @llvm.AMDGPU.get.image.depth.3d(
> + %opencl.image3d_t addrspace(1)* %in) #0
> + store i32 %0, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +
> +; === CHANNEL DATA TYPE =====================================================
> +; First channel data type at dword index 15+4 -> KC0[4].Z
> +
> +; FUNC-LABEL: {{^}}data_type_2d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[4].Z
> +define void @data_type_2d (%opencl.image2d_t addrspace(1)* %in,
> + i32 addrspace(1)* %out) {
> +entry:
> + %0 = call i32 @llvm.AMDGPU.get.image.channel.data.type.2d(
> + %opencl.image2d_t addrspace(1)* %in) #0
> + store i32 %0, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +; FUNC-LABEL: {{^}}data_type_3d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[4].Z
> +define void @data_type_3d (%opencl.image3d_t addrspace(1)* %in,
> + i32 addrspace(1)* %out) {
> +entry:
> + %0 = call i32 @llvm.AMDGPU.get.image.channel.data.type.3d(
> + %opencl.image3d_t addrspace(1)* %in) #0
> + store i32 %0, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +
> +; === CHANNEL ORDER =====================================================
> +; First channel order at dword index 15+4 -> KC0[4].W
> +
> +; FUNC-LABEL: {{^}}channel_order_2d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[4].W
> +define void @channel_order_2d (%opencl.image2d_t addrspace(1)* %in,
> + i32 addrspace(1)* %out) {
> +entry:
> + %0 = call i32 @llvm.AMDGPU.get.image.channel.order.2d(
> + %opencl.image2d_t addrspace(1)* %in) #0
> + store i32 %0, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +; FUNC-LABEL: {{^}}channel_order_3d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[4].W
> +define void @channel_order_3d (%opencl.image3d_t addrspace(1)* %in,
> + i32 addrspace(1)* %out) {
> +entry:
> + %0 = call i32 @llvm.AMDGPU.get.image.channel.order.3d(
> + %opencl.image3d_t addrspace(1)* %in) #0
> + store i32 %0, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +
> +; === 2ND IMAGE ==============================================================
> +; 9 implicit args + 3 explicit args + 1 grid dim + 3 grid offset = 16 dwords
> +; 16 dwords to first image attrib + 5 attribs for first image = 21 dwords
> +; Height of the second image is at 21+1 -> KC0[5].Z
> +;
> +; FUNC-LABEL: {{^}}image_arg_2nd:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[5].Z
> +define void @image_arg_2nd (%opencl.image3d_t addrspace(1)* %in1,
> + %opencl.image2d_t addrspace(1)* %in2,
> + i32 addrspace(1)* %out) {
> +entry:
> + %0 = call i32 @llvm.AMDGPU.get.image.height.2d(
> + %opencl.image2d_t addrspace(1)* %in2) #0
> + store i32 %0, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +%opencl.image2d_t = type opaque
> +%opencl.image3d_t = type opaque
> +
> +declare i32 @llvm.AMDGPU.get.image.width.2d(%opencl.image2d_t addrspace(1)*) #0
> +declare i32 @llvm.AMDGPU.get.image.width.3d(%opencl.image3d_t addrspace(1)*) #0
> +declare i32 @llvm.AMDGPU.get.image.height.2d(%opencl.image2d_t addrspace(1)*) #0
> +declare i32 @llvm.AMDGPU.get.image.height.3d(%opencl.image3d_t addrspace(1)*) #0
> +declare i32 @llvm.AMDGPU.get.image.depth.3d(%opencl.image3d_t addrspace(1)*) #0
> +declare i32 @llvm.AMDGPU.get.image.channel.data.type.2d(
> + %opencl.image2d_t addrspace(1)*) #0
> +declare i32 @llvm.AMDGPU.get.image.channel.data.type.3d(
> + %opencl.image3d_t addrspace(1)*) #0
> +declare i32 @llvm.AMDGPU.get.image.channel.order.2d(
> + %opencl.image2d_t addrspace(1)*) #0
> +declare i32 @llvm.AMDGPU.get.image.channel.order.3d(
> + %opencl.image3d_t addrspace(1)*) #0
> +
> +attributes #0 = { readnone }
> --
> 2.4.2
>
More information about the llvm-commits
mailing list