[llvm] AMDGPU: Try to add some more amdgpu-perf-hint tests (PR #102644)

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 9 13:18:31 PDT 2024


================
@@ -168,4 +312,179 @@ bb2:                                              ; preds = %bb1
   ret void
 }
 
+define void @test_membound_func(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1) {
+; CHECK-LABEL: define void @test_membound_func(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], ptr addrspace(1) nocapture [[ARG1:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  [[BB:.*:]]
+; CHECK-NEXT:    [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[ARG]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP3]], align 16
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[ARG1]], i64 [[TMP2]]
+; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr addrspace(1) [[TMP5]], align 16
+; CHECK-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[ARG]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP7]], align 16
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[ARG1]], i64 [[TMP6]]
+; CHECK-NEXT:    store <4 x i32> [[TMP8]], ptr addrspace(1) [[TMP9]], align 16
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %tmp2 = zext i32 %tmp to i64
+  %tmp3 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg, i64 %tmp2
+  %tmp4 = load <4 x i32>, ptr addrspace(1) %tmp3, align 16
+  %tmp5 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg1, i64 %tmp2
+  store <4 x i32> %tmp4, ptr addrspace(1) %tmp5, align 16
+  %tmp6 = add nuw nsw i64 %tmp2, 1
+  %tmp7 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg, i64 %tmp6
+  %tmp8 = load <4 x i32>, ptr addrspace(1) %tmp7, align 16
+  %tmp9 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg1, i64 %tmp6
+  store <4 x i32> %tmp8, ptr addrspace(1) %tmp9, align 16
+  ret void
+}
+
+; GCN-LABEL: {{^}}kernel_call_test_membound_func:
+; GCN: MemoryBound: 1
+; GCN: WaveLimiterHint : 1
+define amdgpu_kernel void @kernel_call_test_membound_func(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_call_test_membound_func(
+; CHECK-SAME: ptr addrspace(1) nocapture readonly [[ARG:%.*]], ptr addrspace(1) nocapture [[ARG1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    call void @test_membound_func(ptr addrspace(1) nocapture readonly [[ARG]], ptr addrspace(1) nocapture [[ARG1]])
+; CHECK-NEXT:    ret void
+;
+  call void @test_membound_func(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1)
+  ret void
+}
+
+; TODO: Probably should assume yes?
----------------
rampitec wrote:

No, setting membound allows to pessimise scheduling. Safe assumption it is not memory bound.

https://github.com/llvm/llvm-project/pull/102644


More information about the llvm-commits mailing list