[llvm] AMDGPU: Try to add some more amdgpu-perf-hint tests (PR #102644)
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 9 13:18:31 PDT 2024
================
@@ -168,4 +312,179 @@ bb2: ; preds = %bb1
ret void
}
+define void @test_membound_func(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1) {
+; CHECK-LABEL: define void @test_membound_func(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], ptr addrspace(1) nocapture [[ARG1:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[BB:.*:]]
+; CHECK-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP]] to i64
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[ARG]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP3]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[ARG1]], i64 [[TMP2]]
+; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr addrspace(1) [[TMP5]], align 16
+; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP2]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[ARG]], i64 [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP7]], align 16
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[ARG1]], i64 [[TMP6]]
+; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr addrspace(1) [[TMP9]], align 16
+; CHECK-NEXT: ret void
+;
+bb:
+ %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %tmp2 = zext i32 %tmp to i64
+ %tmp3 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg, i64 %tmp2
+ %tmp4 = load <4 x i32>, ptr addrspace(1) %tmp3, align 16
+ %tmp5 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg1, i64 %tmp2
+ store <4 x i32> %tmp4, ptr addrspace(1) %tmp5, align 16
+ %tmp6 = add nuw nsw i64 %tmp2, 1
+ %tmp7 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg, i64 %tmp6
+ %tmp8 = load <4 x i32>, ptr addrspace(1) %tmp7, align 16
+ %tmp9 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg1, i64 %tmp6
+ store <4 x i32> %tmp8, ptr addrspace(1) %tmp9, align 16
+ ret void
+}
+
+; GCN-LABEL: {{^}}kernel_call_test_membound_func:
+; GCN: MemoryBound: 1
+; GCN: WaveLimiterHint : 1
+define amdgpu_kernel void @kernel_call_test_membound_func(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_call_test_membound_func(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], ptr addrspace(1) nocapture [[ARG1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @test_membound_func(ptr addrspace(1) nocapture readonly [[ARG]], ptr addrspace(1) nocapture [[ARG1]])
+; CHECK-NEXT: ret void
+;
+ call void @test_membound_func(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1)
+ ret void
+}
+
+; TODO: Probably should assume yes?
----------------
rampitec wrote:
No, setting membound allows to pessimise scheduling. Safe assumption it is not memory bound.
https://github.com/llvm/llvm-project/pull/102644
More information about the llvm-commits
mailing list