[llvm] [PATCH 2/2] AMDGPU: Add image attribute reader intrinsic.
Zoltan Gilian
zoltan.gilian at gmail.com
Thu Jul 9 22:45:47 PDT 2015
Added the llvm.r600.read.image.attribute intrinsic to read OpenCL image
attributes. The inrinsic is lowered to an implicit parameter load.
---
include/llvm/IR/IntrinsicsAMDGPU.td | 5 +
lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 2 +
lib/Target/AMDGPU/AMDGPUISelLowering.h | 3 +-
lib/Target/AMDGPU/R600ISelLowering.cpp | 13 +++
test/CodeGen/AMDGPU/image-attributes.ll | 168 +++++++++++++++++++++++++++++++
5 files changed, 190 insertions(+), 1 deletion(-)
create mode 100644 test/CodeGen/AMDGPU/image-attributes.ll
diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td
index 510e5ad..05adcfa 100644
--- a/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -33,6 +33,11 @@ defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
"__builtin_r600_read_tgid">;
defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
"__builtin_r600_read_tidig">;
+
+def int_r600_read_image_attribute
+ : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>,
+ GCCBuiltin<"__builtin_r600_read_image_attribute">;
+
} // End TargetPrefix = "r600"
let TargetPrefix = "AMDGPU" in {
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index c65abf9..6d60d61 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2653,6 +2653,8 @@ uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
return ArgOffset;
case GRID_OFFSET:
return ArgOffset + 4;
+ case IMAGE_ATTRIBUTES:
+ return ArgOffset + 16;
}
llvm_unreachable("unexpected implicit parameter type");
}
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 27ceaf1..b05505e 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -210,7 +210,8 @@ public:
enum ImplicitParameter {
GRID_DIM,
- GRID_OFFSET
+ GRID_OFFSET,
+ IMAGE_ATTRIBUTES
};
/// \brief Helper function that returns the byte offset of the given
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 9552c72..6f5b318 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -820,6 +820,19 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
}
+ case Intrinsic::r600_read_image_attribute: {
+ // operand 0: image index
+ // operand 1: attribute index
+
+ auto ByteOffset = getImplicitParameterOffset(MFI, IMAGE_ATTRIBUTES);
+ auto DWordOffset = ByteOffset / 4;
+ // There are 5 dword attributes per image.
+ DWordOffset += 5 * Op.getConstantOperandVal(1);
+ // Skip to the requested attribute.
+ DWordOffset += Op.getConstantOperandVal(2);
+ return LowerImplicitParameter(DAG, VT, DL, DWordOffset);
+ }
+
case Intrinsic::r600_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_X, VT);
diff --git a/test/CodeGen/AMDGPU/image-attributes.ll b/test/CodeGen/AMDGPU/image-attributes.ll
new file mode 100644
index 0000000..b70e2f4
--- /dev/null
+++ b/test/CodeGen/AMDGPU/image-attributes.ll
@@ -0,0 +1,168 @@
+; RUN: llc -march=r600 -mcpu=juniper < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; === WIDTH ==================================================================
+; 9 implicit args + 2 explicit args + 1 grid dim + 3 grid offset = 15 dwords
+; First width at dword index 15+0 -> KC0[3].W
+
+; FUNC-LABEL: {{^}}width_2d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[3].W
+define void @width_2d (%opencl.image2d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.get.image.id.2d(
+ %opencl.image2d_t addrspace(1)* %in) #0
+ %1 = call i32 @llvm.r600.read.image.attribute(i32 %0, i32 0) #0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}width_3d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[3].W
+define void @width_3d (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.get.image.id.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ %1 = call i32 @llvm.r600.read.image.attribute(i32 %0, i32 0) #0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; === HEIGHT =================================================================
+; First height at dword index 15+1 -> KC0[4].X
+
+; FUNC-LABEL: {{^}}height_2d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].X
+define void @height_2d (%opencl.image2d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.get.image.id.2d(
+ %opencl.image2d_t addrspace(1)* %in) #0
+ %1 = call i32 @llvm.r600.read.image.attribute(i32 %0, i32 1) #0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}height_3d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].X
+define void @height_3d (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.get.image.id.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ %1 = call i32 @llvm.r600.read.image.attribute(i32 %0, i32 1) #0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; === DEPTH ==================================================================
+; First depth at dword index 15+2 -> KC0[4].Y
+
+; FUNC-LABEL: {{^}}depth_3d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].Y
+define void @depth_3d (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.get.image.id.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ %1 = call i32 @llvm.r600.read.image.attribute(i32 %0, i32 2) #0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; === CHANNEL DATA TYPE ======================================================
+; First channel data type at dword index 15+4 -> KC0[4].Z
+
+; FUNC-LABEL: {{^}}data_type_2d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].Z
+define void @data_type_2d (%opencl.image2d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.get.image.id.2d(
+ %opencl.image2d_t addrspace(1)* %in) #0
+ %1 = call i32 @llvm.r600.read.image.attribute(i32 %0, i32 3) #0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}data_type_3d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].Z
+define void @data_type_3d (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.get.image.id.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ %1 = call i32 @llvm.r600.read.image.attribute(i32 %0, i32 3) #0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; === CHANNEL ORDER ==========================================================
+; First channel order at dword index 15+4 -> KC0[4].W
+
+; FUNC-LABEL: {{^}}channel_order_2d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].W
+define void @channel_order_2d (%opencl.image2d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.get.image.id.2d(
+ %opencl.image2d_t addrspace(1)* %in) #0
+ %1 = call i32 @llvm.r600.read.image.attribute(i32 %0, i32 4) #0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}channel_order_3d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[4].W
+define void @channel_order_3d (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.get.image.id.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ %1 = call i32 @llvm.r600.read.image.attribute(i32 %0, i32 4) #0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; === 2ND IMAGE ==============================================================
+; 9 implicit args + 4 explicit args + 1 grid dim + 3 grid offset = 17 dwords
+; 17 dwords to first image attrib + 5 attribs for first image = 22 dwords
+; Height of the second image is at 22+1 -> KC0[5].W
+;
+; FUNC-LABEL: {{^}}image_arg_2nd:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[5].W
+define void @image_arg_2nd (%opencl.image3d_t addrspace(1)* %in1,
+ i32 %x,
+ %opencl.image2d_t addrspace(1)* %in2,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.get.image.id.2d(
+ %opencl.image2d_t addrspace(1)* %in2) #0
+ %1 = call i32 @llvm.r600.read.image.attribute(i32 %0, i32 1) #0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+%opencl.image2d_t = type opaque
+%opencl.image3d_t = type opaque
+
+declare i32 @llvm.AMDGPU.get.image.id.2d(%opencl.image2d_t addrspace(1)*) #0
+declare i32 @llvm.AMDGPU.get.image.id.3d(%opencl.image3d_t addrspace(1)*) #0
+declare i32 @llvm.r600.read.image.attribute(i32, i32) #0
+
+attributes #0 = { readnone }
--
2.4.2
More information about the llvm-commits
mailing list