[llvm] Image attribute access for the AMDGPU backend
Zoltan Gilian
zoltan.gilian at gmail.com
Thu Jun 18 04:27:27 PDT 2015
Added an intrinsic to load an image attribute stored as an implicit kernel
argument.
Added a pass to the AMDGPU backend to replace image attribute getter
pseudointrinsics to the new image attribute reader intrinsic.
---
include/llvm/IR/IntrinsicsR600.td | 5 +
lib/Target/AMDGPU/AMDGPU.h | 1 +
lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 1 +
lib/Target/AMDGPU/R600ISelLowering.cpp | 14 ++
.../R600ImageAttributeIntrinsicsReplacer.cpp | 152 +++++++++++++++++++
test/CodeGen/AMDGPU/image-attributes.ll | 167 +++++++++++++++++++++
6 files changed, 340 insertions(+)
create mode 100644 lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp
create mode 100644 test/CodeGen/AMDGPU/image-attributes.ll
diff --git a/include/llvm/IR/IntrinsicsR600.td b/include/llvm/IR/IntrinsicsR600.td
index 5055667..635cf16 100644
--- a/include/llvm/IR/IntrinsicsR600.td
+++ b/include/llvm/IR/IntrinsicsR600.td
@@ -33,6 +33,11 @@ defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
"__builtin_r600_read_tgid">;
defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
"__builtin_r600_read_tidig">;
+
+def int_r600_read_image_attribute
+ : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>,
+ GCCBuiltin<"__builtin_r600_read_image_attribute">;
+
} // End TargetPrefix = "r600"
let TargetPrefix = "AMDGPU" in {
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
index 0a05d25..4b5c5aa 100644
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -27,6 +27,7 @@ class TargetMachine;
// R600 Passes
FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
+FunctionPass *createR600ImageAttributeIntrinsicsReplacer();
FunctionPass *createR600TextureIntrinsicsReplacer();
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
FunctionPass *createR600EmitClauseMarkers();
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index a9a911a..89285ba 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -202,6 +202,7 @@ bool AMDGPUPassConfig::addInstSelector() {
bool R600PassConfig::addPreISel() {
AMDGPUPassConfig::addPreISel();
+ addPass(createR600ImageAttributeIntrinsicsReplacer());
addPass(createR600TextureIntrinsicsReplacer());
return false;
}
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 8357b6d..8ef1ad8 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -818,6 +818,20 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::AMDGPU_read_workdim:
return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
+ case Intrinsic::r600_read_image_attribute: {
+ // operand 0: image index
+ // operand 1: attribute index
+
+ uint64_t DWordOffset = MFI->ABIArgOffset / 4;
+ // Skip grid dim and grid offset.
+ DWordOffset += 4;
+ // There are 5 dword attributes per image.
+ DWordOffset += 5 * Op.getConstantOperandVal(1);
+ // Skip to the requested attribute.
+ DWordOffset += Op.getConstantOperandVal(2);
+ return LowerImplicitParameter(DAG, VT, DL, DWordOffset);
+ }
+
case Intrinsic::r600_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_X, VT);
diff --git a/lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp b/lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp
new file mode 100644
index 0000000..9727606
--- /dev/null
+++ b/lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp
@@ -0,0 +1,152 @@
+//===-- R600ImageAttributeIntrinsicsReplacer.cpp --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass replaces image attribute getter pseudointrinsics with the
+/// r600_read_image_attribute intrinsic. The pseudointrinsics are used to
+/// implement OpenCL C get_image_* builtins to avoid using mangled names here.
+///
+/// The r600_read_image_attribute intrinsic identifies the image in question
+/// using an index of the argument among image arguments. For each image
+/// argument, calls to getters using that particular argument are replaced
+/// with calls to the r600_read_image_attribute intrinsic. The image index is
+/// passed as a parameter along with the attribute index.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+
+#include <vector>
+
+using namespace llvm;
+
+namespace {
+
+enum ImageAttribute {
+ WIDTH = 0,
+ HEIGHT = 1,
+ DEPTH = 2,
+ CHANNEL_DATA_TYPE = 3,
+ CHANNEL_ORDER = 4
+};
+
+// Fixme: this should be replaced with initalizer_list initialization of
+// StringMap if and when it gets one.
+#define INS2MAP(m, a, b) ((m).insert(std::make_pair(a, b)))
+StringMap<ImageAttribute> InitAttributeFromIntrinsic() {
+ StringMap<ImageAttribute> M;
+ INS2MAP(M, "llvm.AMDGPU.get.image.width.2d", WIDTH);
+ INS2MAP(M, "llvm.AMDGPU.get.image.width.3d", WIDTH);
+ INS2MAP(M, "llvm.AMDGPU.get.image.height.2d", HEIGHT);
+ INS2MAP(M, "llvm.AMDGPU.get.image.height.3d", HEIGHT);
+ INS2MAP(M, "llvm.AMDGPU.get.image.depth.3d", DEPTH);
+ INS2MAP(M, "llvm.AMDGPU.get.image.channel.data.type.2d", CHANNEL_DATA_TYPE);
+ INS2MAP(M, "llvm.AMDGPU.get.image.channel.data.type.3d", CHANNEL_DATA_TYPE);
+ INS2MAP(M, "llvm.AMDGPU.get.image.channel.order.2d", CHANNEL_ORDER);
+ INS2MAP(M, "llvm.AMDGPU.get.image.channel.order.3d", CHANNEL_ORDER);
+ return M;
+}
+#undef INS2MAP
+auto AttributeFromIntrinsic = InitAttributeFromIntrinsic();
+
+class R600ImageAttributeIntrinsicsReplacer : public FunctionPass {
+ static char ID;
+
+ Type *Int32Type;
+ Function *ReadAttributeFunc;
+
+public:
+ R600ImageAttributeIntrinsicsReplacer() : FunctionPass(ID) {}
+
+ bool doInitialization(Module &M) override {
+ Int32Type = Type::getInt32Ty(M.getContext());
+
+ // Create Function for the image attribute reader intrinsic.
+ ReadAttributeFunc =
+ Intrinsic::getDeclaration(&M, Intrinsic::r600_read_image_attribute);
+
+ return true;
+ }
+
+ unsigned IsImageIntrinsicCall(const CallInst *CallInst,
+ ImageAttribute &Attribute) {
+ StringRef Name = CallInst->getCalledFunction()->getName();
+ auto It = AttributeFromIntrinsic.find(Name);
+ if (It == AttributeFromIntrinsic.end()) {
+ return false;
+ } else {
+ Attribute = It->second;
+ return true;
+ }
+ }
+
+ bool runOnFunction(Function &F) override {
+ bool modified = false;
+ unsigned NumImageArgs = 0;
+
+ std::vector<Instruction *> InstsToErase;
+
+ for (const auto &Arg : F.args()) {
+
+ // Skip non-image types.
+ Type *ArgType = Arg.getType();
+ if (!ArgType->isPointerTy())
+ continue;
+ Type *ElemType = ArgType->getPointerElementType();
+ if (!ElemType->isStructTy())
+ continue;
+ const llvm::StringRef &TypeName = ElemType->getStructName();
+ if (!TypeName.startswith("opencl.image2d_t") &&
+ !TypeName.startswith("opencl.image3d_t"))
+ continue;
+ auto ImageIndex = NumImageArgs++;
+
+ // Iterate uses to find attribute getters.
+ for (const auto &Use : Arg.uses()) {
+
+ // Only process calls to attribute intrinsics.
+ auto Inst = dyn_cast<CallInst>(Use.getUser());
+ if (!Inst)
+ continue;
+ ImageAttribute AttributeIndex;
+ if (!IsImageIntrinsicCall(Inst, AttributeIndex))
+ continue;
+
+ // Replace the instruction with a call to the image attribute reader.
+ IRBuilder<> Builder(Inst);
+ Value *Args[] = {ConstantInt::get(Int32Type, ImageIndex),
+ ConstantInt::get(Int32Type, AttributeIndex)};
+ Inst->replaceAllUsesWith(Builder.CreateCall(ReadAttributeFunc, Args));
+ InstsToErase.push_back(Inst);
+ modified = true;
+ }
+ }
+ for (unsigned i = 0; i < InstsToErase.size(); ++i) {
+ InstsToErase[i]->eraseFromParent();
+ }
+
+ return modified;
+ }
+
+ const char *getPassName() const override {
+ return "R600 Image Attribute Intrinsics Replacer";
+ }
+};
+
+char R600ImageAttributeIntrinsicsReplacer::ID = 0;
+}
+
+FunctionPass *llvm::createR600ImageAttributeIntrinsicsReplacer() {
+ return new R600ImageAttributeIntrinsicsReplacer();
+}
diff --git a/test/CodeGen/AMDGPU/image-attributes.ll b/test/CodeGen/AMDGPU/image-attributes.ll
new file mode 100644
index 0000000..bdde5c6
--- /dev/null
+++ b/test/CodeGen/AMDGPU/image-attributes.ll
@@ -0,0 +1,167 @@
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; === WIDTH =================================================================
+; 9 implicit args + 2 explicit args + 1 grid dim + 3 grid offset = 15 dwords
+; First width at dword index 15+0 -> KC0[3].W
+
+; FUNC-LABEL: {{^}}width_2d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[3].W
+define void @width_2d (%opencl.image2d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.get.image.width.2d(
+ %opencl.image2d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}width_3d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[3].W
+define void @width_3d (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.get.image.width.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; === HEIGHT ================================================================
+; First height at dword index 15+1 -> KC0[4].X
+
+; FUNC-LABEL: {{^}}height_2d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].X
+define void @height_2d (%opencl.image2d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.get.image.height.2d(
+ %opencl.image2d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}height_3d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].X
+define void @height_3d (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.get.image.height.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; === DEPTH ================================================================
+; First depth at dword index 15+2 -> KC0[4].Y
+
+; FUNC-LABEL: {{^}}depth_3d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].Y
+define void @depth_3d (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.get.image.depth.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; === CHANNEL DATA TYPE =====================================================
+; First channel data type at dword index 15+4 -> KC0[4].Z
+
+; FUNC-LABEL: {{^}}data_type_2d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].Z
+define void @data_type_2d (%opencl.image2d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.get.image.channel.data.type.2d(
+ %opencl.image2d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}data_type_3d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].Z
+define void @data_type_3d (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.get.image.channel.data.type.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; === CHANNEL ORDER =====================================================
+; First channel order at dword index 15+4 -> KC0[4].W
+
+; FUNC-LABEL: {{^}}channel_order_2d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].W
+define void @channel_order_2d (%opencl.image2d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.get.image.channel.order.2d(
+ %opencl.image2d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}channel_order_3d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].W
+define void @channel_order_3d (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.get.image.channel.order.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; === 2ND IMAGE ==============================================================
+; 9 implicit args + 3 explicit args + 1 grid dim + 3 grid offset = 16 dwords
+; 16 dwords to first image attrib + 5 attribs for first image = 21 dwords
+; Height of the second image is at 21+1 -> KC0[5].Z
+;
+; FUNC-LABEL: {{^}}image_arg_2nd:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[5].Z
+define void @image_arg_2nd (%opencl.image3d_t addrspace(1)* %in1,
+ %opencl.image2d_t addrspace(1)* %in2,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.get.image.height.2d(
+ %opencl.image2d_t addrspace(1)* %in2) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+%opencl.image2d_t = type opaque
+%opencl.image3d_t = type opaque
+
+declare i32 @llvm.AMDGPU.get.image.width.2d(%opencl.image2d_t addrspace(1)*) #0
+declare i32 @llvm.AMDGPU.get.image.width.3d(%opencl.image3d_t addrspace(1)*) #0
+declare i32 @llvm.AMDGPU.get.image.height.2d(%opencl.image2d_t addrspace(1)*) #0
+declare i32 @llvm.AMDGPU.get.image.height.3d(%opencl.image3d_t addrspace(1)*) #0
+declare i32 @llvm.AMDGPU.get.image.depth.3d(%opencl.image3d_t addrspace(1)*) #0
+declare i32 @llvm.AMDGPU.get.image.channel.data.type.2d(
+ %opencl.image2d_t addrspace(1)*) #0
+declare i32 @llvm.AMDGPU.get.image.channel.data.type.3d(
+ %opencl.image3d_t addrspace(1)*) #0
+declare i32 @llvm.AMDGPU.get.image.channel.order.2d(
+ %opencl.image2d_t addrspace(1)*) #0
+declare i32 @llvm.AMDGPU.get.image.channel.order.3d(
+ %opencl.image3d_t addrspace(1)*) #0
+
+attributes #0 = { readnone }
--
2.4.2
More information about the llvm-commits
mailing list