[llvm] Image attribute access for the AMDGPU backend
Zoltán Gilián
zoltan.gilian at gmail.com
Tue Jul 7 15:28:48 PDT 2015
I need a pass to obtain compile-time information about the ID of the
image in question. As far as I know, if the location of an image
attribute is known at kernel compilation time, the CF instruction
initiating the ALU clause which uses the attribute can lock the data
into the constant cache, so the attribute value is available to the
ALU clause.
Otherwise, if I eliminate this pass, the attribute data has to be
fetched using a vertex fetch instruction (using the run-time value
stored in the image kernel argument), so a vertex fetch clause is
needed before the ALU one using the attribute value.
Furthermore, the AMD Catalyst driver works like the former: the
location of the attribute values is known at kernel compilation time,
so the constant cache locking mechanism can be used.
I could implement five different intrinsics for the five attributes,
but that doesn't affect the problem above, I would still need a
conversion pass to get a compile-time constant image ID.
I believe I'll need a compile-time constant resource ID in case of
image reading (texture resource ID) and writing (RAT ID) anyway, so
I'll definitely need something similar to this.
I could implement an AMDGPU.get.image.id intrinsic, which could be
used to implement the image builtins, but I would still need a pass to
replace the calls to that with some compile-time ID. Or at least I
don't see how else it can be done.
What are the problems with having a conversion pass?
On Tue, Jul 7, 2015 at 9:33 PM, Tom Stellard <tom at stellard.net> wrote:
> On Thu, Jun 18, 2015 at 01:27:27PM +0200, Zoltan Gilian wrote:
>> Added an intrinsic to load an image attribute stored as an implicit kernel
>> argument.
>> Added a pass to the AMDGPU backend to replace image attribute getter
>> pseudointrinsics to the new image attribute reader intrinsic.
>
> Hi,
>
> Why not expose a high-level builtin for each image intrinsic
> rather than using using one builtin and having a conversion pass?
>
> -Tom
>> ---
>> include/llvm/IR/IntrinsicsR600.td | 5 +
>> lib/Target/AMDGPU/AMDGPU.h | 1 +
>> lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 1 +
>> lib/Target/AMDGPU/R600ISelLowering.cpp | 14 ++
>> .../R600ImageAttributeIntrinsicsReplacer.cpp | 152 +++++++++++++++++++
>> test/CodeGen/AMDGPU/image-attributes.ll | 167 +++++++++++++++++++++
>> 6 files changed, 340 insertions(+)
>> create mode 100644 lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp
>> create mode 100644 test/CodeGen/AMDGPU/image-attributes.ll
>>
>> diff --git a/include/llvm/IR/IntrinsicsR600.td b/include/llvm/IR/IntrinsicsR600.td
>> index 5055667..635cf16 100644
>> --- a/include/llvm/IR/IntrinsicsR600.td
>> +++ b/include/llvm/IR/IntrinsicsR600.td
>> @@ -33,6 +33,11 @@ defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
>> "__builtin_r600_read_tgid">;
>> defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
>> "__builtin_r600_read_tidig">;
>> +
>> +def int_r600_read_image_attribute
>> + : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>,
>> + GCCBuiltin<"__builtin_r600_read_image_attribute">;
>> +
>> } // End TargetPrefix = "r600"
>>
>> let TargetPrefix = "AMDGPU" in {
>> diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
>> index 0a05d25..4b5c5aa 100644
>> --- a/lib/Target/AMDGPU/AMDGPU.h
>> +++ b/lib/Target/AMDGPU/AMDGPU.h
>> @@ -27,6 +27,7 @@ class TargetMachine;
>>
>> // R600 Passes
>> FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
>> +FunctionPass *createR600ImageAttributeIntrinsicsReplacer();
>> FunctionPass *createR600TextureIntrinsicsReplacer();
>> FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
>> FunctionPass *createR600EmitClauseMarkers();
>> diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
>> index a9a911a..89285ba 100644
>> --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
>> +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
>> @@ -202,6 +202,7 @@ bool AMDGPUPassConfig::addInstSelector() {
>>
>> bool R600PassConfig::addPreISel() {
>> AMDGPUPassConfig::addPreISel();
>> + addPass(createR600ImageAttributeIntrinsicsReplacer());
>> addPass(createR600TextureIntrinsicsReplacer());
>> return false;
>> }
>> diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
>> index 8357b6d..8ef1ad8 100644
>> --- a/lib/Target/AMDGPU/R600ISelLowering.cpp
>> +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
>> @@ -818,6 +818,20 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
>> case Intrinsic::AMDGPU_read_workdim:
>> return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
>>
>> + case Intrinsic::r600_read_image_attribute: {
>> + // operand 0: image index
>> + // operand 1: attribute index
>> +
>> + uint64_t DWordOffset = MFI->ABIArgOffset / 4;
>> + // Skip grid dim and grid offset.
>> + DWordOffset += 4;
>> + // There are 5 dword attributes per image.
>> + DWordOffset += 5 * Op.getConstantOperandVal(1);
>> + // Skip to the requested attribute.
>> + DWordOffset += Op.getConstantOperandVal(2);
>> + return LowerImplicitParameter(DAG, VT, DL, DWordOffset);
>> + }
>> +
>> case Intrinsic::r600_read_tgid_x:
>> return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
>> AMDGPU::T1_X, VT);
>> diff --git a/lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp b/lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp
>> new file mode 100644
>> index 0000000..9727606
>> --- /dev/null
>> +++ b/lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp
>> @@ -0,0 +1,152 @@
>> +//===-- R600ImageAttributeIntrinsicsReplacer.cpp --------------------------===//
>> +//
>> +// The LLVM Compiler Infrastructure
>> +//
>> +// This file is distributed under the University of Illinois Open Source
>> +// License. See LICENSE.TXT for details.
>> +//
>> +//===----------------------------------------------------------------------===//
>> +//
>> +/// \file
>> +/// This pass replaces image attribute getter pseudointrinsics with the
>> +/// r600_read_image_attribute intrinsic. The pseudointrinsics are used to
>> +/// implement OpenCL C get_image_* builtins to avoid using mangled names here.
>> +///
>> +/// The r600_read_image_attribute intrinsic identifies the image in question
>> +/// using an index of the argument among image arguments. For each image
>> +/// argument, calls to getters using that particular argument are replaced
>> +/// with calls to the r600_read_image_attribute intrinsic. The image index is
>> +/// passed as a parameter along with the attribute index.
>> +//===----------------------------------------------------------------------===//
>> +
>> +#include "AMDGPU.h"
>> +#include "llvm/ADT/StringMap.h"
>> +#include "llvm/Analysis/Passes.h"
>> +#include "llvm/IR/Function.h"
>> +#include "llvm/IR/IRBuilder.h"
>> +#include "llvm/IR/Intrinsics.h"
>> +#include "llvm/IR/Module.h"
>> +
>> +#include <vector>
>> +
>> +using namespace llvm;
>> +
>> +namespace {
>> +
>> +enum ImageAttribute {
>> + WIDTH = 0,
>> + HEIGHT = 1,
>> + DEPTH = 2,
>> + CHANNEL_DATA_TYPE = 3,
>> + CHANNEL_ORDER = 4
>> +};
>> +
>> +// Fixme: this should be replaced with initalizer_list initialization of
>> +// StringMap if and when it gets one.
>> +#define INS2MAP(m, a, b) ((m).insert(std::make_pair(a, b)))
>> +StringMap<ImageAttribute> InitAttributeFromIntrinsic() {
>> + StringMap<ImageAttribute> M;
>> + INS2MAP(M, "llvm.AMDGPU.get.image.width.2d", WIDTH);
>> + INS2MAP(M, "llvm.AMDGPU.get.image.width.3d", WIDTH);
>> + INS2MAP(M, "llvm.AMDGPU.get.image.height.2d", HEIGHT);
>> + INS2MAP(M, "llvm.AMDGPU.get.image.height.3d", HEIGHT);
>> + INS2MAP(M, "llvm.AMDGPU.get.image.depth.3d", DEPTH);
>> + INS2MAP(M, "llvm.AMDGPU.get.image.channel.data.type.2d", CHANNEL_DATA_TYPE);
>> + INS2MAP(M, "llvm.AMDGPU.get.image.channel.data.type.3d", CHANNEL_DATA_TYPE);
>> + INS2MAP(M, "llvm.AMDGPU.get.image.channel.order.2d", CHANNEL_ORDER);
>> + INS2MAP(M, "llvm.AMDGPU.get.image.channel.order.3d", CHANNEL_ORDER);
>> + return M;
>> +}
>> +#undef INS2MAP
>> +auto AttributeFromIntrinsic = InitAttributeFromIntrinsic();
>> +
>> +class R600ImageAttributeIntrinsicsReplacer : public FunctionPass {
>> + static char ID;
>> +
>> + Type *Int32Type;
>> + Function *ReadAttributeFunc;
>> +
>> +public:
>> + R600ImageAttributeIntrinsicsReplacer() : FunctionPass(ID) {}
>> +
>> + bool doInitialization(Module &M) override {
>> + Int32Type = Type::getInt32Ty(M.getContext());
>> +
>> + // Create Function for the image attribute reader intrinsic.
>> + ReadAttributeFunc =
>> + Intrinsic::getDeclaration(&M, Intrinsic::r600_read_image_attribute);
>> +
>> + return true;
>> + }
>> +
>> + unsigned IsImageIntrinsicCall(const CallInst *CallInst,
>> + ImageAttribute &Attribute) {
>> + StringRef Name = CallInst->getCalledFunction()->getName();
>> + auto It = AttributeFromIntrinsic.find(Name);
>> + if (It == AttributeFromIntrinsic.end()) {
>> + return false;
>> + } else {
>> + Attribute = It->second;
>> + return true;
>> + }
>> + }
>> +
>> + bool runOnFunction(Function &F) override {
>> + bool modified = false;
>> + unsigned NumImageArgs = 0;
>> +
>> + std::vector<Instruction *> InstsToErase;
>> +
>> + for (const auto &Arg : F.args()) {
>> +
>> + // Skip non-image types.
>> + Type *ArgType = Arg.getType();
>> + if (!ArgType->isPointerTy())
>> + continue;
>> + Type *ElemType = ArgType->getPointerElementType();
>> + if (!ElemType->isStructTy())
>> + continue;
>> + const llvm::StringRef &TypeName = ElemType->getStructName();
>> + if (!TypeName.startswith("opencl.image2d_t") &&
>> + !TypeName.startswith("opencl.image3d_t"))
>> + continue;
>> + auto ImageIndex = NumImageArgs++;
>> +
>> + // Iterate uses to find attribute getters.
>> + for (const auto &Use : Arg.uses()) {
>> +
>> + // Only process calls to attribute intrinsics.
>> + auto Inst = dyn_cast<CallInst>(Use.getUser());
>> + if (!Inst)
>> + continue;
>> + ImageAttribute AttributeIndex;
>> + if (!IsImageIntrinsicCall(Inst, AttributeIndex))
>> + continue;
>> +
>> + // Replace the instruction with a call to the image attribute reader.
>> + IRBuilder<> Builder(Inst);
>> + Value *Args[] = {ConstantInt::get(Int32Type, ImageIndex),
>> + ConstantInt::get(Int32Type, AttributeIndex)};
>> + Inst->replaceAllUsesWith(Builder.CreateCall(ReadAttributeFunc, Args));
>> + InstsToErase.push_back(Inst);
>> + modified = true;
>> + }
>> + }
>> + for (unsigned i = 0; i < InstsToErase.size(); ++i) {
>> + InstsToErase[i]->eraseFromParent();
>> + }
>> +
>> + return modified;
>> + }
>> +
>> + const char *getPassName() const override {
>> + return "R600 Image Attribute Intrinsics Replacer";
>> + }
>> +};
>> +
>> +char R600ImageAttributeIntrinsicsReplacer::ID = 0;
>> +}
>> +
>> +FunctionPass *llvm::createR600ImageAttributeIntrinsicsReplacer() {
>> + return new R600ImageAttributeIntrinsicsReplacer();
>> +}
>> diff --git a/test/CodeGen/AMDGPU/image-attributes.ll b/test/CodeGen/AMDGPU/image-attributes.ll
>> new file mode 100644
>> index 0000000..bdde5c6
>> --- /dev/null
>> +++ b/test/CodeGen/AMDGPU/image-attributes.ll
>> @@ -0,0 +1,167 @@
>> +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
>> +
>> +; === WIDTH =================================================================
>> +; 9 implicit args + 2 explicit args + 1 grid dim + 3 grid offset = 15 dwords
>> +; First width at dword index 15+0 -> KC0[3].W
>> +
>> +; FUNC-LABEL: {{^}}width_2d:
>> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
>> +; EG: MOV [[VAL]], KC0[3].W
>> +define void @width_2d (%opencl.image2d_t addrspace(1)* %in,
>> + i32 addrspace(1)* %out) {
>> +entry:
>> + %0 = call i32 @llvm.AMDGPU.get.image.width.2d(
>> + %opencl.image2d_t addrspace(1)* %in) #0
>> + store i32 %0, i32 addrspace(1)* %out
>> + ret void
>> +}
>> +
>> +; FUNC-LABEL: {{^}}width_3d:
>> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
>> +; EG: MOV [[VAL]], KC0[3].W
>> +define void @width_3d (%opencl.image3d_t addrspace(1)* %in,
>> + i32 addrspace(1)* %out) {
>> +entry:
>> + %0 = call i32 @llvm.AMDGPU.get.image.width.3d(
>> + %opencl.image3d_t addrspace(1)* %in) #0
>> + store i32 %0, i32 addrspace(1)* %out
>> + ret void
>> +}
>> +
>> +
>> +; === HEIGHT ================================================================
>> +; First height at dword index 15+1 -> KC0[4].X
>> +
>> +; FUNC-LABEL: {{^}}height_2d:
>> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
>> +; EG: MOV [[VAL]], KC0[4].X
>> +define void @height_2d (%opencl.image2d_t addrspace(1)* %in,
>> + i32 addrspace(1)* %out) {
>> +entry:
>> + %0 = call i32 @llvm.AMDGPU.get.image.height.2d(
>> + %opencl.image2d_t addrspace(1)* %in) #0
>> + store i32 %0, i32 addrspace(1)* %out
>> + ret void
>> +}
>> +
>> +; FUNC-LABEL: {{^}}height_3d:
>> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
>> +; EG: MOV [[VAL]], KC0[4].X
>> +define void @height_3d (%opencl.image3d_t addrspace(1)* %in,
>> + i32 addrspace(1)* %out) {
>> +entry:
>> + %0 = call i32 @llvm.AMDGPU.get.image.height.3d(
>> + %opencl.image3d_t addrspace(1)* %in) #0
>> + store i32 %0, i32 addrspace(1)* %out
>> + ret void
>> +}
>> +
>> +
>> +; === DEPTH ================================================================
>> +; First depth at dword index 15+2 -> KC0[4].Y
>> +
>> +; FUNC-LABEL: {{^}}depth_3d:
>> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
>> +; EG: MOV [[VAL]], KC0[4].Y
>> +define void @depth_3d (%opencl.image3d_t addrspace(1)* %in,
>> + i32 addrspace(1)* %out) {
>> +entry:
>> + %0 = call i32 @llvm.AMDGPU.get.image.depth.3d(
>> + %opencl.image3d_t addrspace(1)* %in) #0
>> + store i32 %0, i32 addrspace(1)* %out
>> + ret void
>> +}
>> +
>> +
>> +; === CHANNEL DATA TYPE =====================================================
>> +; First channel data type at dword index 15+4 -> KC0[4].Z
>> +
>> +; FUNC-LABEL: {{^}}data_type_2d:
>> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
>> +; EG: MOV [[VAL]], KC0[4].Z
>> +define void @data_type_2d (%opencl.image2d_t addrspace(1)* %in,
>> + i32 addrspace(1)* %out) {
>> +entry:
>> + %0 = call i32 @llvm.AMDGPU.get.image.channel.data.type.2d(
>> + %opencl.image2d_t addrspace(1)* %in) #0
>> + store i32 %0, i32 addrspace(1)* %out
>> + ret void
>> +}
>> +
>> +; FUNC-LABEL: {{^}}data_type_3d:
>> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
>> +; EG: MOV [[VAL]], KC0[4].Z
>> +define void @data_type_3d (%opencl.image3d_t addrspace(1)* %in,
>> + i32 addrspace(1)* %out) {
>> +entry:
>> + %0 = call i32 @llvm.AMDGPU.get.image.channel.data.type.3d(
>> + %opencl.image3d_t addrspace(1)* %in) #0
>> + store i32 %0, i32 addrspace(1)* %out
>> + ret void
>> +}
>> +
>> +
>> +; === CHANNEL ORDER =====================================================
>> +; First channel order at dword index 15+4 -> KC0[4].W
>> +
>> +; FUNC-LABEL: {{^}}channel_order_2d:
>> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
>> +; EG: MOV [[VAL]], KC0[4].W
>> +define void @channel_order_2d (%opencl.image2d_t addrspace(1)* %in,
>> + i32 addrspace(1)* %out) {
>> +entry:
>> + %0 = call i32 @llvm.AMDGPU.get.image.channel.order.2d(
>> + %opencl.image2d_t addrspace(1)* %in) #0
>> + store i32 %0, i32 addrspace(1)* %out
>> + ret void
>> +}
>> +
>> +; FUNC-LABEL: {{^}}channel_order_3d:
>> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
>> +; EG: MOV [[VAL]], KC0[4].W
>> +define void @channel_order_3d (%opencl.image3d_t addrspace(1)* %in,
>> + i32 addrspace(1)* %out) {
>> +entry:
>> + %0 = call i32 @llvm.AMDGPU.get.image.channel.order.3d(
>> + %opencl.image3d_t addrspace(1)* %in) #0
>> + store i32 %0, i32 addrspace(1)* %out
>> + ret void
>> +}
>> +
>> +
>> +; === 2ND IMAGE ==============================================================
>> +; 9 implicit args + 3 explicit args + 1 grid dim + 3 grid offset = 16 dwords
>> +; 16 dwords to first image attrib + 5 attribs for first image = 21 dwords
>> +; Height of the second image is at 21+1 -> KC0[5].Z
>> +;
>> +; FUNC-LABEL: {{^}}image_arg_2nd:
>> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
>> +; EG: MOV [[VAL]], KC0[5].Z
>> +define void @image_arg_2nd (%opencl.image3d_t addrspace(1)* %in1,
>> + %opencl.image2d_t addrspace(1)* %in2,
>> + i32 addrspace(1)* %out) {
>> +entry:
>> + %0 = call i32 @llvm.AMDGPU.get.image.height.2d(
>> + %opencl.image2d_t addrspace(1)* %in2) #0
>> + store i32 %0, i32 addrspace(1)* %out
>> + ret void
>> +}
>> +
>> +%opencl.image2d_t = type opaque
>> +%opencl.image3d_t = type opaque
>> +
>> +declare i32 @llvm.AMDGPU.get.image.width.2d(%opencl.image2d_t addrspace(1)*) #0
>> +declare i32 @llvm.AMDGPU.get.image.width.3d(%opencl.image3d_t addrspace(1)*) #0
>> +declare i32 @llvm.AMDGPU.get.image.height.2d(%opencl.image2d_t addrspace(1)*) #0
>> +declare i32 @llvm.AMDGPU.get.image.height.3d(%opencl.image3d_t addrspace(1)*) #0
>> +declare i32 @llvm.AMDGPU.get.image.depth.3d(%opencl.image3d_t addrspace(1)*) #0
>> +declare i32 @llvm.AMDGPU.get.image.channel.data.type.2d(
>> + %opencl.image2d_t addrspace(1)*) #0
>> +declare i32 @llvm.AMDGPU.get.image.channel.data.type.3d(
>> + %opencl.image3d_t addrspace(1)*) #0
>> +declare i32 @llvm.AMDGPU.get.image.channel.order.2d(
>> + %opencl.image2d_t addrspace(1)*) #0
>> +declare i32 @llvm.AMDGPU.get.image.channel.order.3d(
>> + %opencl.image3d_t addrspace(1)*) #0
>> +
>> +attributes #0 = { readnone }
>> --
>> 2.4.2
>>
More information about the llvm-commits
mailing list