[llvm] Image attribute access for the AMDGPU backend
Zoltan Gilian
zoltan.gilian at gmail.com
Tue Jun 16 06:46:37 PDT 2015
Added an intrinsic to load an image attribute stored as an implicit kernel
argument.
Added a pass to the AMDGPU backend to replace image attribute getter
pseudointrinsics to the new image attribute reader intrinsic.
---
include/llvm/IR/IntrinsicsR600.td | 5 +
lib/Target/AMDGPU/AMDGPU.h | 1 +
lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 1 +
lib/Target/AMDGPU/R600ISelLowering.cpp | 15 +
.../R600ImageAttributeIntrinsicsReplacer.cpp | 198 ++++++++++++
test/CodeGen/AMDGPU/image-attributes.ll | 353 +++++++++++++++++++++
6 files changed, 573 insertions(+)
create mode 100644 lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp
create mode 100644 test/CodeGen/AMDGPU/image-attributes.ll
diff --git a/include/llvm/IR/IntrinsicsR600.td b/include/llvm/IR/IntrinsicsR600.td
index 5055667..635cf16 100644
--- a/include/llvm/IR/IntrinsicsR600.td
+++ b/include/llvm/IR/IntrinsicsR600.td
@@ -33,6 +33,11 @@ defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
"__builtin_r600_read_tgid">;
defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
"__builtin_r600_read_tidig">;
+
+def int_r600_read_image_attribute
+ : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>,
+ GCCBuiltin<"__builtin_r600_read_image_attribute">;
+
} // End TargetPrefix = "r600"
let TargetPrefix = "AMDGPU" in {
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
index 0a05d25..4b5c5aa 100644
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -27,6 +27,7 @@ class TargetMachine;
// R600 Passes
FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
+FunctionPass *createR600ImageAttributeIntrinsicsReplacer();
FunctionPass *createR600TextureIntrinsicsReplacer();
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
FunctionPass *createR600EmitClauseMarkers();
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index a9a911a..89285ba 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -202,6 +202,7 @@ bool AMDGPUPassConfig::addInstSelector() {
bool R600PassConfig::addPreISel() {
AMDGPUPassConfig::addPreISel();
+ addPass(createR600ImageAttributeIntrinsicsReplacer());
addPass(createR600TextureIntrinsicsReplacer());
return false;
}
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 8357b6d..7bea7117 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -818,6 +818,21 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::AMDGPU_read_workdim:
return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
+ case Intrinsic::r600_read_image_attribute:
+ {
+ // operand 0: image index
+ // operand 1: attribute index
+
+ uint64_t DWordOffset = MFI->ABIArgOffset / 4;
+ // Skip grid dim and grid offset.
+ DWordOffset += 4;
+ // There are 5 dword attributes per image.
+ DWordOffset += 5 * Op.getConstantOperandVal(1);
+ // Skip to the requested attribute.
+ DWordOffset += Op.getConstantOperandVal(2);
+ return LowerImplicitParameter(DAG, VT, DL, DWordOffset);
+ }
+
case Intrinsic::r600_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_X, VT);
diff --git a/lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp b/lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp
new file mode 100644
index 0000000..a0785e5
--- /dev/null
+++ b/lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp
@@ -0,0 +1,198 @@
+//===-- R600ImageAttributeIntrinsicsReplacer.cpp --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass replaces image attribute getter pseudointrinsics with the
+/// r600_read_image_attribute intrinsic. The pseudointrinsics are used to
+/// implement OpenCL C get_image_* builtins to avoid using mangled names here.
+///
+/// The r600_read_image_attribute intrinsic identifies the image in question
+/// using an index defined by the following ordering of the image arguments:
+/// write-only images are ordered before read-only ones; images having the
+/// same access qualifier follow the order of the image arguments as defined
+/// by the kernel signature. For each call to an attribute getter the pass
+/// determines the index of the image operand and passes it to the
+/// r600_read_image_attribute intrinsic.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Metadata.h"
+
+using namespace llvm;
+
+namespace {
+
+// Names of the attribute getter pseudointrinsics.
+const char * ImageAttributeIntrinsics[] = {
+ "llvm.r600.get.image.width",
+ "llvm.r600.get.image.height",
+ "llvm.r600.get.image.depth",
+ "llvm.r600.get.image.channel.data.type",
+ "llvm.r600.get.image.channel.order"
+ };
+const unsigned NumImageAttributes = sizeof(ImageAttributeIntrinsics) /
+ sizeof(const char*);
+
+class R600ImageAttributeIntrinsicsReplacer :
+ public FunctionPass,
+ public InstVisitor<R600ImageAttributeIntrinsicsReplacer> {
+ static char ID;
+
+ struct AccessQualInfo {
+ unsigned NumWriteOnlyArgs;
+ SmallBitVector IsWriteOnly;
+ AccessQualInfo(unsigned NumWriteOnlyArgs_, SmallBitVector IsWriteOnly_):
+ NumWriteOnlyArgs(NumWriteOnlyArgs_), IsWriteOnly(IsWriteOnly_) {}
+ };
+
+ // Per-module state.
+ Type *Int32Type;
+ Function *ReadImageAttributeFun;
+ DenseMap<const Function*, AccessQualInfo> FunctionAccessQualInfo;
+
+ // Per-function state for visitCallInst.
+ bool modified;
+ DenseMap<const Value*, unsigned> ImageArgIndices;
+
+public:
+ R600ImageAttributeIntrinsicsReplacer():
+ FunctionPass(ID) {
+ }
+
+ bool doInitialization(Module &M) override {
+ Int32Type = Type::getInt32Ty(M.getContext());
+
+ // Collect image access qualifier metadata.
+ auto KernelsMD = M.getNamedMetadata("opencl.kernels");
+ if (!KernelsMD) return false;
+ for (const MDNode* Op: KernelsMD->operands()) {
+
+ // Get the access qualifier metadata node.
+ if (Op->getNumOperands() < 1 || !Op->getOperand(0)) continue;
+ auto KernelFun = mdconst::dyn_extract<Function>(Op->getOperand(0));
+ if (!KernelFun) continue;
+ if (Op->getNumOperands() < 3) continue;
+ auto AccessQualMD = cast<MDNode>(Op->getOperand(2));
+ if (!AccessQualMD || AccessQualMD->getNumOperands() < 1) continue;
+ auto MDName = cast<MDString>(AccessQualMD->getOperand(0));
+ if (!MDName || MDName->getString() != "kernel_arg_access_qual") continue;
+
+ // Create a bit vector of write only args and count them.
+ unsigned NumArgs = AccessQualMD->getNumOperands() - 1;
+ SmallBitVector WriteOnlyArgs(NumArgs);
+ unsigned NumWriteOnlyArgs = 0;
+ for (unsigned i = 0; i < NumArgs; ++i) {
+ auto AccessQual = cast<MDString>(AccessQualMD->getOperand(i + 1));
+ if (!AccessQual) continue;
+ bool WriteOnly = AccessQual->getString() == "write_only";
+ if (WriteOnly) NumWriteOnlyArgs++;
+ WriteOnlyArgs[i] = WriteOnly;
+ }
+ auto Info = AccessQualInfo(NumWriteOnlyArgs, WriteOnlyArgs);
+ FunctionAccessQualInfo.insert(std::make_pair(KernelFun, Info));
+ }
+
+ // Create Function for the image attribute reading intrinsic.
+ ReadImageAttributeFun = Intrinsic::getDeclaration(&M,
+ Intrinsic::r600_read_image_attribute);
+
+ return true;
+ }
+
+ bool runOnFunction(Function &F) override {
+
+ // Try to get access qualifier info.
+ auto InfoIt = FunctionAccessQualInfo.find(&F);
+ if (InfoIt == FunctionAccessQualInfo.end()) return false;
+ auto Info = InfoIt->second;
+
+ modified = false;
+
+ // Store indices of image arguments.
+ ImageArgIndices.clear();
+ unsigned NumWriteOnlyArgsSoFar = 0, NumReadOnlyArgsSoFar = 0, i = 0;
+ for (auto arg = F.arg_begin(), E = F.arg_end(); arg != E; ++arg, ++i) {
+
+ // Skip non-image types.
+ Type *arg_type = arg->getType();
+ if (!arg_type->isPointerTy()) continue;
+ Type *elem_type = arg_type->getPointerElementType();
+ if (!elem_type->isStructTy()) continue;
+ const llvm::StringRef &type_name = elem_type->getStructName();
+ if (!type_name.startswith("opencl.image2d_t") &&
+ !type_name.startswith("opencl.image3d_t")) continue;
+
+ // Calculate and save image index.
+ // Offset read only image indices by the number of write only ones.
+ unsigned Index = 0;
+ if (Info.IsWriteOnly[i]) {
+ Index = NumWriteOnlyArgsSoFar++;
+ } else {
+ Index = Info.NumWriteOnlyArgs + NumReadOnlyArgsSoFar++;
+ }
+ ImageArgIndices[arg] = Index;
+ }
+
+ visit(F);
+
+ return modified;
+ }
+
+ void visitCallInst(CallInst &I) {
+ Function* F = I.getCalledFunction();
+ if (!F)
+ return;
+
+ // Find out if this is a call to one of the pseudointrinsics.
+ StringRef Name = I.getCalledFunction()->getName();
+ unsigned Attribute;
+ for (Attribute = 0; Attribute < NumImageAttributes; Attribute++) {
+ if (Name.startswith(ImageAttributeIntrinsics[Attribute])) {
+ break;
+ }
+ }
+ if (Attribute >= NumImageAttributes) {
+ // Not an image attribute getter pseudointrinsic.
+ return;
+ }
+
+ // Get image argument index.
+ auto ImageArg = I.getArgOperand(0);
+ auto ImageArgIt = ImageArgIndices.find(ImageArg);
+ if (ImageArgIt == ImageArgIndices.end()) return;
+ auto ImageArgIndex = ImageArgIt->second;
+
+ // Replace instuction with call to r600_read_image_attribute intrinsic.
+ IRBuilder<> Builder(&I);
+ Value* Args[] = { ConstantInt::get(Int32Type, ImageArgIndex),
+ ConstantInt::get(Int32Type, Attribute) };
+ I.replaceAllUsesWith(Builder.CreateCall(ReadImageAttributeFun, Args));
+ I.eraseFromParent();
+ modified = true;
+ }
+
+ const char *getPassName() const override {
+ return "R600 Image Attribute Intrinsics Replacer";
+ }
+};
+
+char R600ImageAttributeIntrinsicsReplacer::ID = 0;
+
+}
+
+FunctionPass *llvm::createR600ImageAttributeIntrinsicsReplacer() {
+ return new R600ImageAttributeIntrinsicsReplacer();
+}
diff --git a/test/CodeGen/AMDGPU/image-attributes.ll b/test/CodeGen/AMDGPU/image-attributes.ll
new file mode 100644
index 0000000..a5fd4d7
--- /dev/null
+++ b/test/CodeGen/AMDGPU/image-attributes.ll
@@ -0,0 +1,353 @@
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; === WIDTH =================================================================
+; 9 implicit args + 2 explicit args + 1 grid dim + 3 grid offset = 15 dwords
+; First width at dword index 15+0 -> KC0[3].W
+
+; FUNC-LABEL: {{^}}read_only_2d_width:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[3].W
+define void @read_only_2d_width (%opencl.image2d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.get.image.width.2d(
+ %opencl.image2d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}read_only_3d_width:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[3].W
+define void @read_only_3d_width (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.get.image.width.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}write_only_2d_width:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[3].W
+define void @write_only_2d_width (%opencl.image2d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.get.image.width.2d(
+ %opencl.image2d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}write_only_3d_width:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[3].W
+define void @write_only_3d_width (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.get.image.width.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; === HEIGHT ================================================================
+; First height at dword index 15+1 -> KC0[4].X
+
+; FUNC-LABEL: {{^}}read_only_2d_height:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].X
+define void @read_only_2d_height (%opencl.image2d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.get.image.height.2d(
+ %opencl.image2d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}read_only_3d_height:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].X
+define void @read_only_3d_height (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.get.image.height.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}write_only_2d_height:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].X
+define void @write_only_2d_height (%opencl.image2d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.get.image.height.2d(
+ %opencl.image2d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}write_only_3d_height:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].X
+define void @write_only_3d_height (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.get.image.height.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; === DEPTH ================================================================
+; First depth at dword index 15+2 -> KC0[4].Y
+
+; FUNC-LABEL: {{^}}read_only_3d_depth:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].Y
+define void @read_only_3d_depth (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.get.image.depth.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}write_only_3d_depth:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].Y
+define void @write_only_3d_depth (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.get.image.depth.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; === CHANNEL DATA TYPE =====================================================
+; First channel data type at dword index 15+4 -> KC0[4].Z
+
+; FUNC-LABEL: {{^}}read_only_2d_data_type:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].Z
+define void @read_only_2d_data_type (%opencl.image2d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.get.image.channel.data.type.2d(
+ %opencl.image2d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}read_only_3d_data_type:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].Z
+define void @read_only_3d_data_type (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.get.image.channel.data.type.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}write_only_2d_data_type:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].Z
+define void @write_only_2d_data_type (%opencl.image2d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.get.image.channel.data.type.2d(
+ %opencl.image2d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}write_only_3d_data_type:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].Z
+define void @write_only_3d_data_type (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.get.image.channel.data.type.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; === CHANNEL ORDER =====================================================
+; First channel order at dword index 15+4 -> KC0[4].W
+
+; FUNC-LABEL: {{^}}read_only_2d_channel_order:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].W
+define void @read_only_2d_channel_order (%opencl.image2d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.get.image.channel.order.2d(
+ %opencl.image2d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}read_only_3d_channel_order:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].W
+define void @read_only_3d_channel_order (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.get.image.channel.order.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}write_only_2d_channel_order:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].W
+define void @write_only_2d_channel_order (%opencl.image2d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.get.image.channel.order.2d(
+ %opencl.image2d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}write_only_3d_channel_order:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].W
+define void @write_only_3d_channel_order (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.get.image.channel.order.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; === WRITE-ONLY FIRST =======================================================
+; 9 implicit args + 5 explicit args + 1 grid dim + 3 grid offset = 18 dwords
+; 19 dwords to first write-only attrib + 2 x 5 write-only attribs = 28 dwords
+; Height of the first read-only image is at 28+1 -> KC0[7].Y
+;
+; FUNC-LABEL: {{^}}write_only_first:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[7].Y
+define void @write_only_first (%opencl.image2d_t addrspace(1)* %in1, ; ro
+ %opencl.image3d_t addrspace(1)* %in2, ; wo
+ %opencl.image3d_t addrspace(1)* %in3, ; ro
+ %opencl.image2d_t addrspace(1)* %in4, ; wo
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.get.image.height.2d(
+ %opencl.image2d_t addrspace(1)* %in1) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+%opencl.image2d_t = type opaque
+%opencl.image3d_t = type opaque
+
+declare i32 @llvm.r600.get.image.width.2d(%opencl.image2d_t addrspace(1)*) #0
+declare i32 @llvm.r600.get.image.width.3d(%opencl.image3d_t addrspace(1)*) #0
+declare i32 @llvm.r600.get.image.height.2d(%opencl.image2d_t addrspace(1)*) #0
+declare i32 @llvm.r600.get.image.height.3d(%opencl.image3d_t addrspace(1)*) #0
+declare i32 @llvm.r600.get.image.depth.3d(%opencl.image3d_t addrspace(1)*) #0
+declare i32 @llvm.r600.get.image.channel.data.type.2d(
+ %opencl.image2d_t addrspace(1)*) #0
+declare i32 @llvm.r600.get.image.channel.data.type.3d(
+ %opencl.image3d_t addrspace(1)*) #0
+declare i32 @llvm.r600.get.image.channel.order.2d(
+ %opencl.image2d_t addrspace(1)*) #0
+declare i32 @llvm.r600.get.image.channel.order.3d(
+ %opencl.image3d_t addrspace(1)*) #0
+
+attributes #0 = { readnone }
+
+
+!opencl.kernels = !{ !0, !6, !9, !11, ; width
+ !12, !13, !14, !15, ; height
+ !16, !17, ; depth
+ !18, !19, !20, !21, ; data type
+ !22, !23, !24, !25, ; channel order
+ !109 ; other
+ }
+
+!1 = !{!"kernel_arg_addr_space", i32 1, i32 1}
+!2 = !{!"kernel_arg_access_qual", !"read_only", !"none"}
+!10 = !{!"kernel_arg_access_qual", !"write_only", !"none"}
+!3 = !{!"kernel_arg_type", !"image2d_t", !"int*"}
+!7 = !{!"kernel_arg_type", !"image3d_t", !"int*"}
+!4 = !{!"kernel_arg_base_type", !"image2d_t", !"int*"}
+!8 = !{!"kernel_arg_base_type", !"image3d_t", !"int*"}
+!5 = !{!"kernel_arg_type_qual", !"", !""}
+
+!0 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)*
+ @read_only_2d_width, !1, !2, !3, !4, !5}
+!6 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)*
+ @read_only_3d_width, !1, !2, !7, !8, !5}
+!9 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)*
+ @write_only_2d_width, !1, !10, !3, !4, !5}
+!11 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)*
+ @write_only_3d_width, !1, !10, !7, !8, !5}
+
+!12 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)*
+ @read_only_2d_height, !1, !2, !3, !4, !5}
+!13 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)*
+ @read_only_3d_height, !1, !2, !7, !8, !5}
+!14 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)*
+ @write_only_2d_height, !1, !10, !3, !4, !5}
+!15 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)*
+ @write_only_3d_height, !1, !10, !7, !8, !5}
+
+!16 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)*
+ @read_only_3d_depth, !1, !2, !7, !8, !5}
+!17 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)*
+ @write_only_3d_depth, !1, !10, !7, !8, !5}
+
+!18 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)*
+ @read_only_2d_data_type, !1, !2, !3, !4, !5}
+!19 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)*
+ @read_only_3d_data_type, !1, !2, !7, !8, !5}
+!20 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)*
+ @write_only_2d_data_type, !1, !10, !3, !4, !5}
+!21 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)*
+ @write_only_3d_data_type, !1, !10, !7, !8, !5}
+
+!22 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)*
+ @read_only_2d_channel_order, !1, !2, !3, !4, !5}
+!23 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)*
+ @read_only_3d_channel_order, !1, !2, !7, !8, !5}
+!24 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)*
+ @write_only_2d_channel_order, !1, !10, !3, !4, !5}
+!25 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)*
+ @write_only_3d_channel_order, !1, !10, !7, !8, !5}
+
+
+!109 = !{void (%opencl.image2d_t addrspace(1)*,
+ %opencl.image3d_t addrspace(1)*,
+ %opencl.image3d_t addrspace(1)*,
+ %opencl.image2d_t addrspace(1)*,
+ i32 addrspace(1)*)*
+ @write_only_first, !110, !111, !112, !113, !114}
+!110 = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 1, i32 1, i32 1}
+!111 = !{!"kernel_arg_access_qual", !"read_only", !"write_only",
+ !"read_only", !"write_only", !"none"}
+!112 = !{!"kernel_arg_type", !"image2d_t", !"image3d_t",
+ !"image3d_t", !"image2d_t", !"int*"}
+!113 = !{!"kernel_arg_base_type", !"image2d_t", !"image3d_t",
+ !"image3d_t", !"image2d_t", !"int*"}
+!114 = !{!"kernel_arg_type_qual", !"", !"", !"", !"", !""}
+
--
2.4.2
More information about the llvm-commits
mailing list