[llvm] [PATCH 1/2] AMDGPU: Support OpenCL image ID getter intrinsics.

Zoltan Gilian zoltan.gilian at gmail.com
Thu Jul 9 22:45:46 PDT 2015


Added a pass to replace calls to the llvm.AMDGPU.get.image.id.[23]d dummy
intrinsics with a compile-time constant image ID for OpenCL images.
The image ID is the index of the image argument among the image[23]d_t
arguments of the OpenCL kernel.
---
 lib/Target/AMDGPU/AMDGPU.h                         |   1 +
 .../AMDGPU/AMDGPUImageIDIntrinsicsReplacer.cpp     |  98 ++++++++++++++++
 lib/Target/AMDGPU/AMDGPUTargetMachine.cpp          |   1 +
 test/CodeGen/AMDGPU/image-id.ll                    | 129 +++++++++++++++++++++
 4 files changed, 229 insertions(+)
 create mode 100644 lib/Target/AMDGPU/AMDGPUImageIDIntrinsicsReplacer.cpp
 create mode 100644 test/CodeGen/AMDGPU/image-id.ll

diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
index 0a05d25..9586afd 100644
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -64,6 +64,7 @@ FunctionPass *createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST);
 Pass *createAMDGPUStructurizeCFGPass();
 FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
 ModulePass *createAMDGPUAlwaysInlinePass();
+FunctionPass *createAMDGPUImageIDIntrinsicsReplacer();
 
 void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&);
 extern char &SIFixControlFlowLiveIntervalsID;
diff --git a/lib/Target/AMDGPU/AMDGPUImageIDIntrinsicsReplacer.cpp b/lib/Target/AMDGPU/AMDGPUImageIDIntrinsicsReplacer.cpp
new file mode 100644
index 0000000..68fff54
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUImageIDIntrinsicsReplacer.cpp
@@ -0,0 +1,98 @@
+//===-- AMDGPUImageIDIntrinsicsReplacer.cpp --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass replaces image ID getter pseudointrinsics with compile-time
+/// constant ID values for OpenCL images. The ID is the index of the image
+/// argument among the image[23]d_t arguments of the OpenCL kernel.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+
+#include <vector>
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUImageIDIntrinsicsReplacer : public FunctionPass {
+  static char ID;
+  Type *Int32Type;
+
+public:
+  AMDGPUImageIDIntrinsicsReplacer() : FunctionPass(ID) {}
+
+  bool doInitialization(Module &M) override {
+    Int32Type = Type::getInt32Ty(M.getContext());
+    return true;
+  }
+
+  bool runOnFunction(Function &F) override {
+    bool modified = false;
+    int32_t NumImageArgs = 0;
+
+    std::vector<Instruction *> InstsToErase;
+
+    for (const auto &Arg : F.args()) {
+
+      // Skip non-image types.
+      Type *ArgType = Arg.getType();
+      if (!ArgType->isPointerTy())
+        continue;
+      Type *ElemType = ArgType->getPointerElementType();
+      if (!ElemType->isStructTy())
+        continue;
+      const llvm::StringRef &TypeName = ElemType->getStructName();
+      if (!TypeName.startswith("opencl.image2d_t") &&
+          !TypeName.startswith("opencl.image3d_t"))
+        continue;
+      int32_t ImageIndex = NumImageArgs++;
+
+      // Iterate uses to find ID getters.
+      for (const auto &Use : Arg.uses()) {
+
+        // Only process calls to ID getter intrinsics.
+        auto Inst = dyn_cast<CallInst>(Use.getUser());
+        if (!Inst)
+          continue;
+
+        StringRef Name = Inst->getCalledFunction()->getName();
+        if (Name != "llvm.AMDGPU.get.image.id.2d" &&
+            Name != "llvm.AMDGPU.get.image.id.3d") {
+          continue;
+        }
+
+        Inst->replaceAllUsesWith(ConstantInt::get(Int32Type, ImageIndex));
+        InstsToErase.push_back(Inst);
+        modified = true;
+      }
+    }
+    for (size_t i = 0; i < InstsToErase.size(); ++i) {
+      InstsToErase[i]->eraseFromParent();
+    }
+
+    return modified;
+  }
+
+  const char *getPassName() const override {
+    return "AMDGPU Image ID Intrinsics Replacer";
+  }
+};
+
+char AMDGPUImageIDIntrinsicsReplacer::ID = 0;
+}
+
+FunctionPass *llvm::createAMDGPUImageIDIntrinsicsReplacer() {
+  return new AMDGPUImageIDIntrinsicsReplacer();
+}
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index f395565..d69858e 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -188,6 +188,7 @@ AMDGPUPassConfig::addPreISel() {
   addPass(createFlattenCFGPass());
   if (ST.IsIRStructurizerEnabled())
     addPass(createStructurizeCFGPass());
+  addPass(createAMDGPUImageIDIntrinsicsReplacer());
   return false;
 }
 
diff --git a/test/CodeGen/AMDGPU/image-id.ll b/test/CodeGen/AMDGPU/image-id.ll
new file mode 100644
index 0000000..a5da037
--- /dev/null
+++ b/test/CodeGen/AMDGPU/image-id.ll
@@ -0,0 +1,129 @@
+; RUN: llc -march=r600 -mcpu=juniper < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}test_2d_1:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 0(
+define void @test_2d_1(%opencl.image2d_t addrspace(1)* %in,
+                                 i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.AMDGPU.get.image.id.2d(
+      %opencl.image2d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_2d_21:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 0(
+define void @test_2d_21(%opencl.image2d_t addrspace(1)* %in1,
+                     %opencl.image2d_t addrspace(1)* %in2,
+                                 i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.AMDGPU.get.image.id.2d(
+      %opencl.image2d_t addrspace(1)* %in1) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_2d_22:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 1(
+define void @test_2d_22(%opencl.image2d_t addrspace(1)* %in1,
+                     %opencl.image2d_t addrspace(1)* %in2,
+                                 i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.AMDGPU.get.image.id.2d(
+      %opencl.image2d_t addrspace(1)* %in2) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_2d_3:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 2(
+define void @test_2d_3(%opencl.image2d_t addrspace(1)* %in1,
+                    %opencl.image3d_t addrspace(1)* %in2,
+                    %opencl.image2d_t addrspace(1)* %in3,
+                                 i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.AMDGPU.get.image.id.2d(
+      %opencl.image2d_t addrspace(1)* %in3) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_3d_1:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 0(
+define void @test_3d_1(%opencl.image3d_t addrspace(1)* %in,
+                                 i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.AMDGPU.get.image.id.3d(
+      %opencl.image3d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_3d_21:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 0(
+define void @test_3d_21(%opencl.image3d_t addrspace(1)* %in1,
+                     %opencl.image3d_t addrspace(1)* %in2,
+                                 i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.AMDGPU.get.image.id.3d(
+      %opencl.image3d_t addrspace(1)* %in1) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_3d_22:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 1(
+define void @test_3d_22(%opencl.image3d_t addrspace(1)* %in1,
+                     %opencl.image3d_t addrspace(1)* %in2,
+                                 i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.AMDGPU.get.image.id.3d(
+      %opencl.image3d_t addrspace(1)* %in2) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_3d_3:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 2(
+define void @test_3d_3(%opencl.image3d_t addrspace(1)* %in1,
+                    %opencl.image2d_t addrspace(1)* %in2,
+                    %opencl.image3d_t addrspace(1)* %in3,
+                                 i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.AMDGPU.get.image.id.3d(
+      %opencl.image3d_t addrspace(1)* %in3) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+%opencl.image2d_t = type opaque
+%opencl.image3d_t = type opaque
+
+declare i32 @llvm.AMDGPU.get.image.id.2d(%opencl.image2d_t addrspace(1)*) #0
+declare i32 @llvm.AMDGPU.get.image.id.3d(%opencl.image3d_t addrspace(1)*) #0
+
+attributes #0 = { readnone }
-- 
2.4.2




More information about the llvm-commits mailing list