[PATCH] D32831: [AMDGPU] In the new waitcnt insertion pass, use getHeader instead of getTopBlock to find the loop header.

Wed May 3 15:18:45 PDT 2017

arsenm added inline comments.

================
Comment at: test/CodeGen/AMDGPU/waitcnt-looptest.ll:1
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global | FileCheck --check-prefix=DEFAULT %s
+
----------------
s/DEFAULT/GCN

================
Comment at: test/CodeGen/AMDGPU/waitcnt-looptest.ll:26-29
+; ModuleID = 'testfile.opt.bc'
+source_filename = "llvm-link"
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+target triple = "amdgcn-amd-amdhsa-opencl"
----------------
Remove these

================
Comment at: test/CodeGen/AMDGPU/waitcnt-looptest.ll:34
+; Function Attrs: nounwind
+define amdgpu_kernel void @testKernel(i32 addrspace(1)* nocapture) local_unnamed_addr #2 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 !kernel_arg_name !9 {
+  %2 = tail call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #3
----------------
How is this function testing loops when it doesn't have any control flow?

================
Comment at: test/CodeGen/AMDGPU/waitcnt-looptest.ll:38
+  %4 = tail call i32 @llvm.amdgcn.workgroup.id.x() #3
+  %5 = getelementptr inbounds i8, i8 addrspace(2)* %2, i64 4
+  %6 = bitcast i8 addrspace(2)* %5 to i16 addrspace(2)*
----------------
Can you run instanter on this test?

================
Comment at: test/CodeGen/AMDGPU/waitcnt-looptest.ll:84-86
+attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "target-features"="+16-bit-insts,+dpp,+fp64-fp16-denormals,+s-memrealtime,-fp32-denormals" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "target-features"="+16-bit-insts,+dpp,+fp64-fp16-denormals,+s-memrealtime,-fp32-denormals" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "target-features"="+16-bit-insts,+dpp,+fp64-fp16-denormals,+s-memrealtime,-fp32-denormals" "unsafe-fp-math"="false" "use-soft-float"="false" }
----------------
Remove extra string attributes

================
Comment at: test/CodeGen/AMDGPU/waitcnt-looptest.ll:89-104
+!opencl.ocl.version = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 2, i32 0}
+!1 = !{!"clang version 4.0 "}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
----------------
Remove metadata

https://reviews.llvm.org/D32831