[llvm] Image attribute access for the AMDGPU backend

Zoltan Gilian zoltan.gilian at gmail.com
Tue Jun 16 06:46:37 PDT 2015


Added an intrinsic to load an image attribute stored as an implicit kernel
argument.
Added a pass to the AMDGPU backend to replace image attribute getter
pseudointrinsics to the new image attribute reader intrinsic.
---
 include/llvm/IR/IntrinsicsR600.td                  |   5 +
 lib/Target/AMDGPU/AMDGPU.h                         |   1 +
 lib/Target/AMDGPU/AMDGPUTargetMachine.cpp          |   1 +
 lib/Target/AMDGPU/R600ISelLowering.cpp             |  15 +
 .../R600ImageAttributeIntrinsicsReplacer.cpp       | 198 ++++++++++++
 test/CodeGen/AMDGPU/image-attributes.ll            | 353 +++++++++++++++++++++
 6 files changed, 573 insertions(+)
 create mode 100644 lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp
 create mode 100644 test/CodeGen/AMDGPU/image-attributes.ll

diff --git a/include/llvm/IR/IntrinsicsR600.td b/include/llvm/IR/IntrinsicsR600.td
index 5055667..635cf16 100644
--- a/include/llvm/IR/IntrinsicsR600.td
+++ b/include/llvm/IR/IntrinsicsR600.td
@@ -33,6 +33,11 @@ defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
                                        "__builtin_r600_read_tgid">;
 defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
                                        "__builtin_r600_read_tidig">;
+
+def int_r600_read_image_attribute
+  : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>,
+    GCCBuiltin<"__builtin_r600_read_image_attribute">;
+
 } // End TargetPrefix = "r600"
 
 let TargetPrefix = "AMDGPU" in {
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
index 0a05d25..4b5c5aa 100644
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -27,6 +27,7 @@ class TargetMachine;
 
 // R600 Passes
 FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
+FunctionPass *createR600ImageAttributeIntrinsicsReplacer();
 FunctionPass *createR600TextureIntrinsicsReplacer();
 FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
 FunctionPass *createR600EmitClauseMarkers();
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index a9a911a..89285ba 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -202,6 +202,7 @@ bool AMDGPUPassConfig::addInstSelector() {
 
 bool R600PassConfig::addPreISel() {
   AMDGPUPassConfig::addPreISel();
+  addPass(createR600ImageAttributeIntrinsicsReplacer());
   addPass(createR600TextureIntrinsicsReplacer());
   return false;
 }
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 8357b6d..7bea7117 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -818,6 +818,21 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
     case Intrinsic::AMDGPU_read_workdim:
       return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
 
+    case Intrinsic::r600_read_image_attribute:
+      {
+        // operand 0: image index
+        // operand 1: attribute index
+
+        uint64_t DWordOffset = MFI->ABIArgOffset / 4;
+        // Skip grid dim and grid offset.
+        DWordOffset += 4;
+        // There are 5 dword attributes per image.
+        DWordOffset += 5 * Op.getConstantOperandVal(1);
+        // Skip to the requested attribute.
+        DWordOffset += Op.getConstantOperandVal(2);
+        return LowerImplicitParameter(DAG, VT, DL, DWordOffset);
+      }
+
     case Intrinsic::r600_read_tgid_x:
       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
                                   AMDGPU::T1_X, VT);
diff --git a/lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp b/lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp
new file mode 100644
index 0000000..a0785e5
--- /dev/null
+++ b/lib/Target/AMDGPU/R600ImageAttributeIntrinsicsReplacer.cpp
@@ -0,0 +1,198 @@
+//===-- R600ImageAttributeIntrinsicsReplacer.cpp --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass replaces image attribute getter pseudointrinsics with the
+/// r600_read_image_attribute intrinsic. The pseudointrinsics are used to
+/// implement OpenCL C get_image_* builtins to avoid using mangled names here.
+///
+/// The r600_read_image_attribute intrinsic identifies the image in question
+/// using an index defined by the following ordering of the image arguments:
+/// write-only images are ordered before read-only ones; images having the
+/// same access qualifier follow the order of the image arguments as defined
+/// by the kernel signature. For each call to an attribute getter the pass
+/// determines the index of the image operand and passes it to the
+/// r600_read_image_attribute intrinsic.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Metadata.h"
+
+using namespace llvm;
+
+namespace {
+
+// Names of the attribute getter pseudointrinsics.
+const char * ImageAttributeIntrinsics[] = {
+  "llvm.r600.get.image.width",
+  "llvm.r600.get.image.height",
+  "llvm.r600.get.image.depth",
+  "llvm.r600.get.image.channel.data.type",
+  "llvm.r600.get.image.channel.order"
+  };
+const unsigned NumImageAttributes = sizeof(ImageAttributeIntrinsics) /
+                                    sizeof(const char*);
+
+class R600ImageAttributeIntrinsicsReplacer :
+    public FunctionPass,
+    public InstVisitor<R600ImageAttributeIntrinsicsReplacer> {
+  static char ID;
+
+  struct AccessQualInfo {
+    unsigned NumWriteOnlyArgs;
+    SmallBitVector IsWriteOnly;
+    AccessQualInfo(unsigned NumWriteOnlyArgs_, SmallBitVector IsWriteOnly_):
+        NumWriteOnlyArgs(NumWriteOnlyArgs_), IsWriteOnly(IsWriteOnly_) {}
+  };
+
+  // Per-module state.
+  Type *Int32Type;
+  Function *ReadImageAttributeFun;
+  DenseMap<const Function*, AccessQualInfo> FunctionAccessQualInfo;
+
+  // Per-function state for visitCallInst.
+  bool modified;
+  DenseMap<const Value*, unsigned> ImageArgIndices;
+
+public:
+  R600ImageAttributeIntrinsicsReplacer():
+    FunctionPass(ID) {
+  }
+
+  bool doInitialization(Module &M) override {
+    Int32Type = Type::getInt32Ty(M.getContext());
+
+    // Collect image access qualifier metadata.
+    auto KernelsMD = M.getNamedMetadata("opencl.kernels");
+    if (!KernelsMD) return false;
+    for (const MDNode* Op: KernelsMD->operands()) {
+
+      // Get the access qualifier metadata node.
+      if (Op->getNumOperands() < 1 || !Op->getOperand(0)) continue;
+      auto KernelFun = mdconst::dyn_extract<Function>(Op->getOperand(0));
+      if (!KernelFun) continue;
+      if (Op->getNumOperands() < 3) continue;
+      auto AccessQualMD = cast<MDNode>(Op->getOperand(2));
+      if (!AccessQualMD || AccessQualMD->getNumOperands() < 1) continue;
+      auto MDName = cast<MDString>(AccessQualMD->getOperand(0));
+      if (!MDName || MDName->getString() != "kernel_arg_access_qual") continue;
+
+      // Create a bit vector of write only args and count them.
+      unsigned NumArgs = AccessQualMD->getNumOperands() - 1;
+      SmallBitVector WriteOnlyArgs(NumArgs);
+      unsigned NumWriteOnlyArgs = 0;
+      for (unsigned i = 0; i < NumArgs; ++i) {
+        auto AccessQual = cast<MDString>(AccessQualMD->getOperand(i + 1));
+        if (!AccessQual) continue;
+        bool WriteOnly = AccessQual->getString() == "write_only";
+        if (WriteOnly) NumWriteOnlyArgs++;
+        WriteOnlyArgs[i] = WriteOnly;
+      }
+      auto Info = AccessQualInfo(NumWriteOnlyArgs, WriteOnlyArgs);
+      FunctionAccessQualInfo.insert(std::make_pair(KernelFun, Info));
+    }
+
+    // Create Function for the image attribute reading intrinsic.
+    ReadImageAttributeFun = Intrinsic::getDeclaration(&M,
+        Intrinsic::r600_read_image_attribute);
+
+    return true;
+  }
+
+  bool runOnFunction(Function &F) override {
+
+    // Try to get access qualifier info.
+    auto InfoIt = FunctionAccessQualInfo.find(&F);
+    if (InfoIt == FunctionAccessQualInfo.end()) return false;
+    auto Info = InfoIt->second;
+
+    modified = false;
+
+    // Store indices of image arguments.
+    ImageArgIndices.clear();
+    unsigned NumWriteOnlyArgsSoFar = 0, NumReadOnlyArgsSoFar = 0, i = 0;
+    for (auto arg = F.arg_begin(), E = F.arg_end(); arg != E; ++arg, ++i) {
+
+      // Skip non-image types.
+      Type *arg_type = arg->getType();
+      if (!arg_type->isPointerTy()) continue;
+      Type *elem_type = arg_type->getPointerElementType();
+      if (!elem_type->isStructTy()) continue;
+      const llvm::StringRef &type_name = elem_type->getStructName();
+      if (!type_name.startswith("opencl.image2d_t") &&
+          !type_name.startswith("opencl.image3d_t")) continue;
+
+      // Calculate and save image index.
+      // Offset read only image indices by the number of write only ones.
+      unsigned Index = 0;
+      if (Info.IsWriteOnly[i]) {
+        Index = NumWriteOnlyArgsSoFar++;
+      } else {
+        Index = Info.NumWriteOnlyArgs + NumReadOnlyArgsSoFar++;
+      }
+      ImageArgIndices[arg] = Index;
+    }
+
+    visit(F);
+
+    return modified;
+  }
+
+  void visitCallInst(CallInst &I) {
+    Function* F = I.getCalledFunction();
+    if (!F)
+      return;
+
+    // Find out if this is a call to one of the pseudointrinsics.
+    StringRef Name = I.getCalledFunction()->getName();
+    unsigned Attribute;
+    for (Attribute = 0; Attribute < NumImageAttributes; Attribute++) {
+      if (Name.startswith(ImageAttributeIntrinsics[Attribute])) {
+        break;
+      }
+    }
+    if (Attribute >= NumImageAttributes) {
+      // Not an image attribute getter pseudointrinsic.
+      return;
+    }
+
+    // Get image argument index.
+    auto ImageArg = I.getArgOperand(0);
+    auto ImageArgIt = ImageArgIndices.find(ImageArg);
+    if (ImageArgIt == ImageArgIndices.end()) return;
+    auto ImageArgIndex = ImageArgIt->second;
+
+    // Replace instuction with call to r600_read_image_attribute intrinsic.
+    IRBuilder<> Builder(&I);
+    Value* Args[] = { ConstantInt::get(Int32Type, ImageArgIndex),
+                      ConstantInt::get(Int32Type, Attribute) };
+    I.replaceAllUsesWith(Builder.CreateCall(ReadImageAttributeFun, Args));
+    I.eraseFromParent();
+    modified = true;
+  }
+
+  const char *getPassName() const override {
+    return "R600 Image Attribute Intrinsics Replacer";
+  }
+};
+
+char R600ImageAttributeIntrinsicsReplacer::ID = 0;
+
+}
+
+FunctionPass *llvm::createR600ImageAttributeIntrinsicsReplacer() {
+  return new R600ImageAttributeIntrinsicsReplacer();
+}
diff --git a/test/CodeGen/AMDGPU/image-attributes.ll b/test/CodeGen/AMDGPU/image-attributes.ll
new file mode 100644
index 0000000..a5fd4d7
--- /dev/null
+++ b/test/CodeGen/AMDGPU/image-attributes.ll
@@ -0,0 +1,353 @@
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; === WIDTH =================================================================
+; 9 implicit args + 2 explicit args + 1 grid dim + 3 grid offset = 15 dwords
+; First width at dword index 15+0 -> KC0[3].W
+
+; FUNC-LABEL: {{^}}read_only_2d_width:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[3].W
+define void @read_only_2d_width (%opencl.image2d_t addrspace(1)* %in,
+                                 i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.get.image.width.2d(
+      %opencl.image2d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}read_only_3d_width:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[3].W
+define void @read_only_3d_width (%opencl.image3d_t addrspace(1)* %in,
+                                 i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.get.image.width.3d(
+      %opencl.image3d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}write_only_2d_width:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[3].W
+define void @write_only_2d_width (%opencl.image2d_t addrspace(1)* %in,
+                                  i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.get.image.width.2d(
+      %opencl.image2d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}write_only_3d_width:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[3].W
+define void @write_only_3d_width (%opencl.image3d_t addrspace(1)* %in,
+                                  i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.get.image.width.3d(
+      %opencl.image3d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+
+; === HEIGHT ================================================================
+; First height at dword index 15+1 -> KC0[4].X
+
+; FUNC-LABEL: {{^}}read_only_2d_height:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].X
+define void @read_only_2d_height (%opencl.image2d_t addrspace(1)* %in,
+                                  i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.get.image.height.2d(
+      %opencl.image2d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}read_only_3d_height:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].X
+define void @read_only_3d_height (%opencl.image3d_t addrspace(1)* %in,
+                                  i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.get.image.height.3d(
+      %opencl.image3d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}write_only_2d_height:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].X
+define void @write_only_2d_height (%opencl.image2d_t addrspace(1)* %in,
+                                   i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.get.image.height.2d(
+      %opencl.image2d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}write_only_3d_height:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].X
+define void @write_only_3d_height (%opencl.image3d_t addrspace(1)* %in,
+                                   i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.get.image.height.3d(
+      %opencl.image3d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+
+; === DEPTH ================================================================
+; First depth at dword index 15+2 -> KC0[4].Y
+
+; FUNC-LABEL: {{^}}read_only_3d_depth:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].Y
+define void @read_only_3d_depth (%opencl.image3d_t addrspace(1)* %in,
+                                 i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.get.image.depth.3d(
+      %opencl.image3d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}write_only_3d_depth:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].Y
+define void @write_only_3d_depth (%opencl.image3d_t addrspace(1)* %in,
+                                  i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.get.image.depth.3d(
+      %opencl.image3d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+
+; === CHANNEL DATA TYPE =====================================================
+; First channel data type at dword index 15+4 -> KC0[4].Z
+
+; FUNC-LABEL: {{^}}read_only_2d_data_type:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].Z
+define void @read_only_2d_data_type (%opencl.image2d_t addrspace(1)* %in,
+                                     i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.get.image.channel.data.type.2d(
+      %opencl.image2d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}read_only_3d_data_type:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].Z
+define void @read_only_3d_data_type (%opencl.image3d_t addrspace(1)* %in,
+                                     i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.get.image.channel.data.type.3d(
+      %opencl.image3d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}write_only_2d_data_type:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].Z
+define void @write_only_2d_data_type (%opencl.image2d_t addrspace(1)* %in,
+                                      i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.get.image.channel.data.type.2d(
+      %opencl.image2d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}write_only_3d_data_type:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].Z
+define void @write_only_3d_data_type (%opencl.image3d_t addrspace(1)* %in,
+                                      i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.get.image.channel.data.type.3d(
+      %opencl.image3d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+
+; === CHANNEL ORDER =====================================================
+; First channel order at dword index 15+4 -> KC0[4].W
+
+; FUNC-LABEL: {{^}}read_only_2d_channel_order:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].W
+define void @read_only_2d_channel_order (%opencl.image2d_t addrspace(1)* %in,
+                                         i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.get.image.channel.order.2d(
+      %opencl.image2d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}read_only_3d_channel_order:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].W
+define void @read_only_3d_channel_order (%opencl.image3d_t addrspace(1)* %in,
+                                         i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.get.image.channel.order.3d(
+      %opencl.image3d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}write_only_2d_channel_order:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].W
+define void @write_only_2d_channel_order (%opencl.image2d_t addrspace(1)* %in,
+                                          i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.get.image.channel.order.2d(
+      %opencl.image2d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}write_only_3d_channel_order:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].W
+define void @write_only_3d_channel_order (%opencl.image3d_t addrspace(1)* %in,
+                                          i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.get.image.channel.order.3d(
+      %opencl.image3d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+
+; === WRITE-ONLY FIRST =======================================================
+; 9 implicit args + 5 explicit args + 1 grid dim + 3 grid offset = 18 dwords
+; 19 dwords to first write-only attrib + 2 x 5 write-only attribs = 28 dwords
+; Height of the first read-only image is at 28+1 -> KC0[7].Y
+;
+; FUNC-LABEL: {{^}}write_only_first:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[7].Y
+define void @write_only_first (%opencl.image2d_t addrspace(1)* %in1, ; ro
+                               %opencl.image3d_t addrspace(1)* %in2, ; wo
+                               %opencl.image3d_t addrspace(1)* %in3, ; ro
+                               %opencl.image2d_t addrspace(1)* %in4, ; wo
+                               i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.get.image.height.2d(
+      %opencl.image2d_t addrspace(1)* %in1) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+%opencl.image2d_t = type opaque
+%opencl.image3d_t = type opaque
+
+declare i32 @llvm.r600.get.image.width.2d(%opencl.image2d_t addrspace(1)*) #0
+declare i32 @llvm.r600.get.image.width.3d(%opencl.image3d_t addrspace(1)*) #0
+declare i32 @llvm.r600.get.image.height.2d(%opencl.image2d_t addrspace(1)*) #0
+declare i32 @llvm.r600.get.image.height.3d(%opencl.image3d_t addrspace(1)*) #0
+declare i32 @llvm.r600.get.image.depth.3d(%opencl.image3d_t addrspace(1)*) #0
+declare i32 @llvm.r600.get.image.channel.data.type.2d(
+    %opencl.image2d_t addrspace(1)*) #0
+declare i32 @llvm.r600.get.image.channel.data.type.3d(
+    %opencl.image3d_t addrspace(1)*) #0
+declare i32 @llvm.r600.get.image.channel.order.2d(
+    %opencl.image2d_t addrspace(1)*) #0
+declare i32 @llvm.r600.get.image.channel.order.3d(
+    %opencl.image3d_t addrspace(1)*) #0
+
+attributes #0 = { readnone }
+
+
+!opencl.kernels = !{ !0,  !6,  !9, !11, ; width
+                    !12, !13, !14, !15, ; height
+                    !16, !17,           ; depth
+                    !18, !19, !20, !21, ; data type
+                    !22, !23, !24, !25, ; channel order
+                    !109                ; other
+                   }
+
+!1 = !{!"kernel_arg_addr_space", i32 1, i32 1}
+!2 = !{!"kernel_arg_access_qual", !"read_only", !"none"}
+!10 = !{!"kernel_arg_access_qual", !"write_only", !"none"}
+!3 = !{!"kernel_arg_type", !"image2d_t", !"int*"}
+!7 = !{!"kernel_arg_type", !"image3d_t", !"int*"}
+!4 = !{!"kernel_arg_base_type", !"image2d_t", !"int*"}
+!8 = !{!"kernel_arg_base_type", !"image3d_t", !"int*"}
+!5 = !{!"kernel_arg_type_qual", !"", !""}
+
+!0 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)*
+       @read_only_2d_width, !1, !2, !3, !4, !5}
+!6 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)*
+       @read_only_3d_width, !1, !2, !7, !8, !5}
+!9 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)*
+       @write_only_2d_width, !1, !10, !3, !4, !5}
+!11 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)*
+       @write_only_3d_width, !1, !10, !7, !8, !5}
+
+!12 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)*
+       @read_only_2d_height, !1, !2, !3, !4, !5}
+!13 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)*
+       @read_only_3d_height, !1, !2, !7, !8, !5}
+!14 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)*
+       @write_only_2d_height, !1, !10, !3, !4, !5}
+!15 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)*
+       @write_only_3d_height, !1, !10, !7, !8, !5}
+
+!16 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)*
+       @read_only_3d_depth, !1, !2, !7, !8, !5}
+!17 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)*
+       @write_only_3d_depth, !1, !10, !7, !8, !5}
+
+!18 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)*
+       @read_only_2d_data_type, !1, !2, !3, !4, !5}
+!19 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)*
+       @read_only_3d_data_type, !1, !2, !7, !8, !5}
+!20 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)*
+       @write_only_2d_data_type, !1, !10, !3, !4, !5}
+!21 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)*
+       @write_only_3d_data_type, !1, !10, !7, !8, !5}
+
+!22 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)*
+       @read_only_2d_channel_order, !1, !2, !3, !4, !5}
+!23 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)*
+       @read_only_3d_channel_order, !1, !2, !7, !8, !5}
+!24 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)*
+       @write_only_2d_channel_order, !1, !10, !3, !4, !5}
+!25 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)*
+       @write_only_3d_channel_order, !1, !10, !7, !8, !5}
+
+
+!109 = !{void (%opencl.image2d_t addrspace(1)*,
+               %opencl.image3d_t addrspace(1)*,
+               %opencl.image3d_t addrspace(1)*,
+               %opencl.image2d_t addrspace(1)*,
+               i32 addrspace(1)*)*
+        @write_only_first, !110, !111, !112, !113, !114}
+!110 = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 1, i32 1, i32 1}
+!111 = !{!"kernel_arg_access_qual", !"read_only", !"write_only",
+                                    !"read_only", !"write_only", !"none"}
+!112 = !{!"kernel_arg_type", !"image2d_t", !"image3d_t",
+                             !"image3d_t", !"image2d_t", !"int*"}
+!113 = !{!"kernel_arg_base_type", !"image2d_t", !"image3d_t",
+                                  !"image3d_t", !"image2d_t", !"int*"}
+!114 = !{!"kernel_arg_type_qual", !"", !"", !"", !"", !""}
+
-- 
2.4.2




More information about the llvm-commits mailing list