[llvm] [PATCH 2/2] AMDGPU: Add image attribute reader intrinsic.

Tom Stellard tom at stellard.net
Fri Jul 10 14:08:05 PDT 2015


On Fri, Jul 10, 2015 at 07:45:47AM +0200, Zoltan Gilian wrote:
> Added the llvm.r600.read.image.attribute intrinsic to read OpenCL image
> attributes. The inrinsic is lowered to an implicit parameter load.
> ---
>  include/llvm/IR/IntrinsicsAMDGPU.td      |   5 +
>  lib/Target/AMDGPU/AMDGPUISelLowering.cpp |   2 +
>  lib/Target/AMDGPU/AMDGPUISelLowering.h   |   3 +-
>  lib/Target/AMDGPU/R600ISelLowering.cpp   |  13 +++
>  test/CodeGen/AMDGPU/image-attributes.ll  | 168 +++++++++++++++++++++++++++++++
>  5 files changed, 190 insertions(+), 1 deletion(-)
>  create mode 100644 test/CodeGen/AMDGPU/image-attributes.ll
> 
> diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td
> index 510e5ad..05adcfa 100644
> --- a/include/llvm/IR/IntrinsicsAMDGPU.td
> +++ b/include/llvm/IR/IntrinsicsAMDGPU.td
> @@ -33,6 +33,11 @@ defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
>                                         "__builtin_r600_read_tgid">;
>  defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
>                                         "__builtin_r600_read_tidig">;
> +
> +def int_r600_read_image_attribute
> +  : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>,
> +    GCCBuiltin<"__builtin_r600_read_image_attribute">;
> +

This should use the amdgpu target prefix.

>  } // End TargetPrefix = "r600"
>  
>  let TargetPrefix = "AMDGPU" in {
> diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
> index c65abf9..6d60d61 100644
> --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
> +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
> @@ -2653,6 +2653,8 @@ uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
>      return ArgOffset;
>    case GRID_OFFSET:
>      return ArgOffset + 4;
> +  case IMAGE_ATTRIBUTES:
> +    return ArgOffset + 16;

Why is this +16 and not +8?

>    }
>    llvm_unreachable("unexpected implicit parameter type");
>  }
> diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
> index 27ceaf1..b05505e 100644
> --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
> +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
> @@ -210,7 +210,8 @@ public:
>  
>    enum ImplicitParameter {
>      GRID_DIM,
> -    GRID_OFFSET
> +    GRID_OFFSET,
> +    IMAGE_ATTRIBUTES
>    };
>  
>    /// \brief Helper function that returns the byte offset of the given
> diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
> index 9552c72..6f5b318 100644
> --- a/lib/Target/AMDGPU/R600ISelLowering.cpp
> +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
> @@ -820,6 +820,19 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
>        return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
>      }
>  
> +    case Intrinsic::r600_read_image_attribute: {
> +      // operand 0: image index
> +      // operand 1: attribute index
> +
> +      auto ByteOffset = getImplicitParameterOffset(MFI, IMAGE_ATTRIBUTES);
> +      auto DWordOffset = ByteOffset / 4;
> +      // There are 5 dword attributes per image.
> +      DWordOffset += 5 * Op.getConstantOperandVal(1);
> +      // Skip to the requested attribute.
> +      DWordOffset += Op.getConstantOperandVal(2);
> +      return LowerImplicitParameter(DAG, VT, DL, DWordOffset);
> +    }
> +
>      case Intrinsic::r600_read_tgid_x:
>        return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
>                                    AMDGPU::T1_X, VT);
> diff --git a/test/CodeGen/AMDGPU/image-attributes.ll b/test/CodeGen/AMDGPU/image-attributes.ll
> new file mode 100644
> index 0000000..b70e2f4
> --- /dev/null
> +++ b/test/CodeGen/AMDGPU/image-attributes.ll
> @@ -0,0 +1,168 @@
> +; RUN: llc -march=r600 -mcpu=juniper < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
> +
> +; === WIDTH ==================================================================
> +; 9 implicit args + 2 explicit args + 1 grid dim + 3 grid offset = 15 dwords
> +; First width at dword index 15+0 -> KC0[3].W
> +
> +; FUNC-LABEL: {{^}}width_2d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[3].W
> +define void @width_2d (%opencl.image2d_t addrspace(1)* %in,
> +                                 i32 addrspace(1)* %out) {
> +entry:
> +  %0 = call i32 @llvm.AMDGPU.get.image.id.2d(
> +      %opencl.image2d_t addrspace(1)* %in) #0
> +  %1 = call i32 @llvm.r600.read.image.attribute(i32 %0, i32 0) #0
> +  store i32 %1, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FUNC-LABEL: {{^}}width_3d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[3].W
> +define void @width_3d (%opencl.image3d_t addrspace(1)* %in,
> +                                 i32 addrspace(1)* %out) {
> +entry:
> +  %0 = call i32 @llvm.AMDGPU.get.image.id.3d(
> +      %opencl.image3d_t addrspace(1)* %in) #0
> +  %1 = call i32 @llvm.r600.read.image.attribute(i32 %0, i32 0) #0
> +  store i32 %1, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +
> +; === HEIGHT =================================================================
> +; First height at dword index 15+1 -> KC0[4].X
> +
> +; FUNC-LABEL: {{^}}height_2d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[4].X
> +define void @height_2d (%opencl.image2d_t addrspace(1)* %in,
> +                                  i32 addrspace(1)* %out) {
> +entry:
> +  %0 = call i32 @llvm.AMDGPU.get.image.id.2d(
> +      %opencl.image2d_t addrspace(1)* %in) #0
> +  %1 = call i32 @llvm.r600.read.image.attribute(i32 %0, i32 1) #0
> +  store i32 %1, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FUNC-LABEL: {{^}}height_3d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[4].X
> +define void @height_3d (%opencl.image3d_t addrspace(1)* %in,
> +                                  i32 addrspace(1)* %out) {
> +entry:
> +  %0 = call i32 @llvm.AMDGPU.get.image.id.3d(
> +      %opencl.image3d_t addrspace(1)* %in) #0
> +  %1 = call i32 @llvm.r600.read.image.attribute(i32 %0, i32 1) #0
> +  store i32 %1, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +
> +; === DEPTH ==================================================================
> +; First depth at dword index 15+2 -> KC0[4].Y
> +
> +; FUNC-LABEL: {{^}}depth_3d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[4].Y
> +define void @depth_3d (%opencl.image3d_t addrspace(1)* %in,
> +                                 i32 addrspace(1)* %out) {
> +entry:
> +  %0 = call i32 @llvm.AMDGPU.get.image.id.3d(
> +      %opencl.image3d_t addrspace(1)* %in) #0
> +  %1 = call i32 @llvm.r600.read.image.attribute(i32 %0, i32 2) #0
> +  store i32 %1, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +
> +; === CHANNEL DATA TYPE ======================================================
> +; First channel data type at dword index 15+4 -> KC0[4].Z
> +
> +; FUNC-LABEL: {{^}}data_type_2d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[4].Z
> +define void @data_type_2d (%opencl.image2d_t addrspace(1)* %in,
> +                                     i32 addrspace(1)* %out) {
> +entry:
> +  %0 = call i32 @llvm.AMDGPU.get.image.id.2d(
> +      %opencl.image2d_t addrspace(1)* %in) #0
> +  %1 = call i32 @llvm.r600.read.image.attribute(i32 %0, i32 3) #0
> +  store i32 %1, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FUNC-LABEL: {{^}}data_type_3d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[4].Z
> +define void @data_type_3d (%opencl.image3d_t addrspace(1)* %in,
> +                                     i32 addrspace(1)* %out) {
> +entry:
> +  %0 = call i32 @llvm.AMDGPU.get.image.id.3d(
> +      %opencl.image3d_t addrspace(1)* %in) #0
> +  %1 = call i32 @llvm.r600.read.image.attribute(i32 %0, i32 3) #0
> +  store i32 %1, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +
> +; === CHANNEL ORDER ==========================================================
> +; First channel order at dword index 15+4 -> KC0[4].W
> +
> +; FUNC-LABEL: {{^}}channel_order_2d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[4].W
> +define void @channel_order_2d (%opencl.image2d_t addrspace(1)* %in,
> +                                         i32 addrspace(1)* %out) {
> +entry:
> +  %0 = call i32 @llvm.AMDGPU.get.image.id.2d(
> +      %opencl.image2d_t addrspace(1)* %in) #0
> +  %1 = call i32 @llvm.r600.read.image.attribute(i32 %0, i32 4) #0
> +  store i32 %1, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FUNC-LABEL: {{^}}channel_order_3d:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[4].W
> +define void @channel_order_3d (%opencl.image3d_t addrspace(1)* %in,
> +                                         i32 addrspace(1)* %out) {
> +entry:
> +  %0 = call i32 @llvm.AMDGPU.get.image.id.3d(
> +      %opencl.image3d_t addrspace(1)* %in) #0
> +  %1 = call i32 @llvm.r600.read.image.attribute(i32 %0, i32 4) #0
> +  store i32 %1, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +
> +; === 2ND IMAGE ==============================================================
> +; 9 implicit args + 4 explicit args + 1 grid dim + 3 grid offset = 17 dwords
> +; 17 dwords to first image attrib + 5 attribs for first image = 22 dwords
> +; Height of the second image is at 22+1 -> KC0[5].W
> +;
> +; FUNC-LABEL: {{^}}image_arg_2nd:
> +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
> +; EG: MOV [[VAL]], KC0[5].W
> +define void @image_arg_2nd (%opencl.image3d_t addrspace(1)* %in1,
> +                            i32 %x,
> +                            %opencl.image2d_t addrspace(1)* %in2,
> +                            i32 addrspace(1)* %out) {
> +entry:
> +  %0 = call i32 @llvm.AMDGPU.get.image.id.2d(
> +      %opencl.image2d_t addrspace(1)* %in2) #0
> +  %1 = call i32 @llvm.r600.read.image.attribute(i32 %0, i32 1) #0
> +  store i32 %1, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +%opencl.image2d_t = type opaque
> +%opencl.image3d_t = type opaque
> +
> +declare i32 @llvm.AMDGPU.get.image.id.2d(%opencl.image2d_t addrspace(1)*) #0
> +declare i32 @llvm.AMDGPU.get.image.id.3d(%opencl.image3d_t addrspace(1)*) #0
> +declare i32 @llvm.r600.read.image.attribute(i32, i32) #0
> +
> +attributes #0 = { readnone }
> -- 
> 2.4.2
> 



More information about the llvm-commits mailing list