[llvm] [PATCH 1/2] AMDGPU: Support OpenCL image ID getter intrinsics.
Tom Stellard
tom at stellard.net
Fri Jul 10 14:08:52 PDT 2015
On Fri, Jul 10, 2015 at 07:45:46AM +0200, Zoltan Gilian wrote:
> Added a pass to replace calls to the llvm.AMDGPU.get.image.id.[23]d dummy
> intrinsics with a compile-time constant image ID for OpenCL images.
> The image ID is the index of the image argument among the image[23]d_t
> arguments of the OpenCL kernel.
LGTM.
> ---
> lib/Target/AMDGPU/AMDGPU.h | 1 +
> .../AMDGPU/AMDGPUImageIDIntrinsicsReplacer.cpp | 98 ++++++++++++++++
> lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 1 +
> test/CodeGen/AMDGPU/image-id.ll | 129 +++++++++++++++++++++
> 4 files changed, 229 insertions(+)
> create mode 100644 lib/Target/AMDGPU/AMDGPUImageIDIntrinsicsReplacer.cpp
> create mode 100644 test/CodeGen/AMDGPU/image-id.ll
>
> diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
> index 0a05d25..9586afd 100644
> --- a/lib/Target/AMDGPU/AMDGPU.h
> +++ b/lib/Target/AMDGPU/AMDGPU.h
> @@ -64,6 +64,7 @@ FunctionPass *createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST);
> Pass *createAMDGPUStructurizeCFGPass();
> FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
> ModulePass *createAMDGPUAlwaysInlinePass();
> +FunctionPass *createAMDGPUImageIDIntrinsicsReplacer();
>
> void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&);
> extern char &SIFixControlFlowLiveIntervalsID;
> diff --git a/lib/Target/AMDGPU/AMDGPUImageIDIntrinsicsReplacer.cpp b/lib/Target/AMDGPU/AMDGPUImageIDIntrinsicsReplacer.cpp
> new file mode 100644
> index 0000000..68fff54
> --- /dev/null
> +++ b/lib/Target/AMDGPU/AMDGPUImageIDIntrinsicsReplacer.cpp
> @@ -0,0 +1,98 @@
> +//===-- AMDGPUImageIDIntrinsicsReplacer.cpp --------------------------===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// This pass replaces image ID getter pseudointrinsics with compile-time
> +/// constant ID values for OpenCL images. The ID is the index of the image
> +/// argument among the image[23]d_t arguments of the OpenCL kernel.
> +//===----------------------------------------------------------------------===//
> +
> +#include "AMDGPU.h"
> +#include "llvm/Analysis/Passes.h"
> +#include "llvm/IR/Constants.h"
> +#include "llvm/IR/Function.h"
> +#include "llvm/IR/Instructions.h"
> +#include "llvm/IR/Module.h"
> +
> +#include <vector>
> +
> +using namespace llvm;
> +
> +namespace {
> +
> +class AMDGPUImageIDIntrinsicsReplacer : public FunctionPass {
> + static char ID;
> + Type *Int32Type;
> +
> +public:
> + AMDGPUImageIDIntrinsicsReplacer() : FunctionPass(ID) {}
> +
> + bool doInitialization(Module &M) override {
> + Int32Type = Type::getInt32Ty(M.getContext());
> + return true;
> + }
> +
> + bool runOnFunction(Function &F) override {
> + bool modified = false;
> + int32_t NumImageArgs = 0;
> +
> + std::vector<Instruction *> InstsToErase;
> +
> + for (const auto &Arg : F.args()) {
> +
> + // Skip non-image types.
> + Type *ArgType = Arg.getType();
> + if (!ArgType->isPointerTy())
> + continue;
> + Type *ElemType = ArgType->getPointerElementType();
> + if (!ElemType->isStructTy())
> + continue;
> + const llvm::StringRef &TypeName = ElemType->getStructName();
> + if (!TypeName.startswith("opencl.image2d_t") &&
> + !TypeName.startswith("opencl.image3d_t"))
> + continue;
> + int32_t ImageIndex = NumImageArgs++;
> +
> + // Iterate uses to find ID getters.
> + for (const auto &Use : Arg.uses()) {
> +
> + // Only process calls to ID getter intrinsics.
> + auto Inst = dyn_cast<CallInst>(Use.getUser());
> + if (!Inst)
> + continue;
> +
> + StringRef Name = Inst->getCalledFunction()->getName();
> + if (Name != "llvm.AMDGPU.get.image.id.2d" &&
> + Name != "llvm.AMDGPU.get.image.id.3d") {
> + continue;
> + }
> +
> + Inst->replaceAllUsesWith(ConstantInt::get(Int32Type, ImageIndex));
> + InstsToErase.push_back(Inst);
> + modified = true;
> + }
> + }
> + for (size_t i = 0; i < InstsToErase.size(); ++i) {
> + InstsToErase[i]->eraseFromParent();
> + }
> +
> + return modified;
> + }
> +
> + const char *getPassName() const override {
> + return "AMDGPU Image ID Intrinsics Replacer";
> + }
> +};
> +
> +char AMDGPUImageIDIntrinsicsReplacer::ID = 0;
> +}
> +
> +FunctionPass *llvm::createAMDGPUImageIDIntrinsicsReplacer() {
> + return new AMDGPUImageIDIntrinsicsReplacer();
> +}
> diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
> index f395565..d69858e 100644
> --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
> +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
> @@ -188,6 +188,7 @@ AMDGPUPassConfig::addPreISel() {
> addPass(createFlattenCFGPass());
> if (ST.IsIRStructurizerEnabled())
> addPass(createStructurizeCFGPass());
> + addPass(createAMDGPUImageIDIntrinsicsReplacer());
> return false;
> }
>
> diff --git a/test/CodeGen/AMDGPU/image-id.ll b/test/CodeGen/AMDGPU/image-id.ll
> new file mode 100644
> index 0000000..a5da037
> --- /dev/null
> +++ b/test/CodeGen/AMDGPU/image-id.ll
> @@ -0,0 +1,129 @@
> +; RUN: llc -march=r600 -mcpu=juniper < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
> +
> +; FUNC-LABEL: {{^}}test_2d_1:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], literal.x
> +; EG-NEXT: LSHR
> +; EG-NEXT: 0(
> +define void @test_2d_1(%opencl.image2d_t addrspace(1)* %in,
> + i32 addrspace(1)* %out) {
> +entry:
> + %0 = call i32 @llvm.AMDGPU.get.image.id.2d(
> + %opencl.image2d_t addrspace(1)* %in) #0
> + store i32 %0, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +; FUNC-LABEL: {{^}}test_2d_21:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], literal.x
> +; EG-NEXT: LSHR
> +; EG-NEXT: 0(
> +define void @test_2d_21(%opencl.image2d_t addrspace(1)* %in1,
> + %opencl.image2d_t addrspace(1)* %in2,
> + i32 addrspace(1)* %out) {
> +entry:
> + %0 = call i32 @llvm.AMDGPU.get.image.id.2d(
> + %opencl.image2d_t addrspace(1)* %in1) #0
> + store i32 %0, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +; FUNC-LABEL: {{^}}test_2d_22:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], literal.x
> +; EG-NEXT: LSHR
> +; EG-NEXT: 1(
> +define void @test_2d_22(%opencl.image2d_t addrspace(1)* %in1,
> + %opencl.image2d_t addrspace(1)* %in2,
> + i32 addrspace(1)* %out) {
> +entry:
> + %0 = call i32 @llvm.AMDGPU.get.image.id.2d(
> + %opencl.image2d_t addrspace(1)* %in2) #0
> + store i32 %0, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +; FUNC-LABEL: {{^}}test_2d_3:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], literal.x
> +; EG-NEXT: LSHR
> +; EG-NEXT: 2(
> +define void @test_2d_3(%opencl.image2d_t addrspace(1)* %in1,
> + %opencl.image3d_t addrspace(1)* %in2,
> + %opencl.image2d_t addrspace(1)* %in3,
> + i32 addrspace(1)* %out) {
> +entry:
> + %0 = call i32 @llvm.AMDGPU.get.image.id.2d(
> + %opencl.image2d_t addrspace(1)* %in3) #0
> + store i32 %0, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +; FUNC-LABEL: {{^}}test_3d_1:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], literal.x
> +; EG-NEXT: LSHR
> +; EG-NEXT: 0(
> +define void @test_3d_1(%opencl.image3d_t addrspace(1)* %in,
> + i32 addrspace(1)* %out) {
> +entry:
> + %0 = call i32 @llvm.AMDGPU.get.image.id.3d(
> + %opencl.image3d_t addrspace(1)* %in) #0
> + store i32 %0, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +; FUNC-LABEL: {{^}}test_3d_21:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], literal.x
> +; EG-NEXT: LSHR
> +; EG-NEXT: 0(
> +define void @test_3d_21(%opencl.image3d_t addrspace(1)* %in1,
> + %opencl.image3d_t addrspace(1)* %in2,
> + i32 addrspace(1)* %out) {
> +entry:
> + %0 = call i32 @llvm.AMDGPU.get.image.id.3d(
> + %opencl.image3d_t addrspace(1)* %in1) #0
> + store i32 %0, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +; FUNC-LABEL: {{^}}test_3d_22:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], literal.x
> +; EG-NEXT: LSHR
> +; EG-NEXT: 1(
> +define void @test_3d_22(%opencl.image3d_t addrspace(1)* %in1,
> + %opencl.image3d_t addrspace(1)* %in2,
> + i32 addrspace(1)* %out) {
> +entry:
> + %0 = call i32 @llvm.AMDGPU.get.image.id.3d(
> + %opencl.image3d_t addrspace(1)* %in2) #0
> + store i32 %0, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +; FUNC-LABEL: {{^}}test_3d_3:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], literal.x
> +; EG-NEXT: LSHR
> +; EG-NEXT: 2(
> +define void @test_3d_3(%opencl.image3d_t addrspace(1)* %in1,
> + %opencl.image2d_t addrspace(1)* %in2,
> + %opencl.image3d_t addrspace(1)* %in3,
> + i32 addrspace(1)* %out) {
> +entry:
> + %0 = call i32 @llvm.AMDGPU.get.image.id.3d(
> + %opencl.image3d_t addrspace(1)* %in3) #0
> + store i32 %0, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +%opencl.image2d_t = type opaque
> +%opencl.image3d_t = type opaque
> +
> +declare i32 @llvm.AMDGPU.get.image.id.2d(%opencl.image2d_t addrspace(1)*) #0
> +declare i32 @llvm.AMDGPU.get.image.id.3d(%opencl.image3d_t addrspace(1)*) #0
> +
> +attributes #0 = { readnone }
> --
> 2.4.2
>
More information about the llvm-commits
mailing list