[llvm] r280399 - AMDGPU: Add runtime metadata for pointee alignment of argument.

Yaxun Liu via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 1 11:46:49 PDT 2016


Author: yaxunl
Date: Thu Sep  1 13:46:49 2016
New Revision: 280399

URL: http://llvm.org/viewvc/llvm-project?rev=280399&view=rev
Log:
AMDGPU: Add runtime metadata for pointee alignment of argument.

Add runtime metdata for pointee alignment of pointer type kernel argument. The key is KeyArgPointeeAlign and the value is a 32 bit unsigned integer.

Differential Revision: https://reviews.llvm.org/D24145

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
    llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=280399&r1=280398&r2=280399&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Thu Sep  1 13:46:49 2016
@@ -886,13 +886,19 @@ void AMDGPUAsmPrinter::emitRuntimeMetada
     unsigned I = Arg.getArgNo();
     OutStreamer->EmitIntValue(RuntimeMD::KeyArgBegin, 1);
 
-    // Emit KeyArgSize and KeyArgAlign.
+    // Emit KeyArgSize, KeyArgAlign and KeyArgPointeeAlign.
     Type *T = Arg.getType();
     const DataLayout &DL = F.getParent()->getDataLayout();
     emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyArgSize,
                           DL.getTypeAllocSize(T), 4);
     emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyArgAlign,
                           DL.getABITypeAlignment(T), 4);
+    if (auto PT = dyn_cast<PointerType>(T)) {
+      auto ET = PT->getElementType();
+      if (ET->isSized())
+        emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyArgPointeeAlign,
+                          DL.getABITypeAlignment(ET), 4);
+    }
 
     // Emit KeyArgTypeName.
     auto TypeName = dyn_cast<MDString>(F.getMetadata(

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h?rev=280399&r1=280398&r2=280399&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h Thu Sep  1 13:46:49 2016
@@ -84,6 +84,7 @@ namespace RuntimeMD {
     KeyFlatWorkGroupSizeLimits  = 29, // Flat work group size limits
     KeyMaxWorkGroupSize         = 30, // Maximum work group size
     KeyNoPartialWorkGroups      = 31, // No partial work groups
+    KeyArgPointeeAlign          = 32, // Alignment of pointee type
   };
 
   enum Language : uint8_t {

Modified: llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll?rev=280399&r1=280398&r2=280399&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll Thu Sep  1 13:46:49 2016
@@ -217,6 +217,8 @@ define amdgpu_kernel void @test_double16
 ; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .byte	10
 ; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	32
+; CHECK-NEXT: .long	4
 ; CHECK-NEXT: .byte	11
 ; CHECK-NEXT: .long	5
 ; CHECK-NEXT: .ascii	"int *"
@@ -331,6 +333,8 @@ define amdgpu_kernel void @test_queue(%o
 ; CHECK-NEXT: .long	4
 ; CHECK-NEXT: .byte	10
 ; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	32
+; CHECK-NEXT: .long	4
 ; CHECK-NEXT: .byte	11
 ; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .ascii	"struct A"
@@ -444,6 +448,8 @@ define amdgpu_kernel void @test_multi_ar
 ; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .byte	10
 ; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	32
+; CHECK-NEXT: .long	4
 ; CHECK-NEXT: .byte	11
 ; CHECK-NEXT: .long	5
 ; CHECK-NEXT: .ascii	"int *"
@@ -461,6 +467,8 @@ define amdgpu_kernel void @test_multi_ar
 ; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .byte	10
 ; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	32
+; CHECK-NEXT: .long	4
 ; CHECK-NEXT: .byte	11
 ; CHECK-NEXT: .long	5
 ; CHECK-NEXT: .ascii	"int *"
@@ -478,6 +486,8 @@ define amdgpu_kernel void @test_multi_ar
 ; CHECK-NEXT: .long	4
 ; CHECK-NEXT: .byte	10
 ; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	32
+; CHECK-NEXT: .long	4
 ; CHECK-NEXT: .byte	11
 ; CHECK-NEXT: .long	5
 ; CHECK-NEXT: .ascii	"int *"
@@ -507,6 +517,8 @@ define amdgpu_kernel void @test_addr_spa
 ; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .byte	10
 ; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	32
+; CHECK-NEXT: .long	4
 ; CHECK-NEXT: .byte	11
 ; CHECK-NEXT: .long	5
 ; CHECK-NEXT: .ascii	"int *"
@@ -525,6 +537,8 @@ define amdgpu_kernel void @test_addr_spa
 ; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .byte	10
 ; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	32
+; CHECK-NEXT: .long	4
 ; CHECK-NEXT: .byte	11
 ; CHECK-NEXT: .long	5
 ; CHECK-NEXT: .ascii	"int *"
@@ -915,6 +929,8 @@ define amdgpu_kernel void @test_wgs_hint
 ; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .byte	10
 ; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	32
+; CHECK-NEXT: .long	4
 ; CHECK-NEXT: .byte	11
 ; CHECK-NEXT: .long	6
 ; CHECK-NEXT: .ascii	"int **"
@@ -944,6 +960,8 @@ define amdgpu_kernel void @test_arg_ptr_
 ; CHECK-NEXT: .long	4
 ; CHECK-NEXT: .byte	10
 ; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	32
+; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .byte	11
 ; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .ascii	"struct B"
@@ -1018,6 +1036,151 @@ define amdgpu_kernel void @test_arg_unkn
   ret void
 }
 
+; CHECK-LABEL:{{^}}test_pointee_align:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	18
+; CHECK-NEXT: .ascii	"test_pointee_align"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	32
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	6
+; CHECK-NEXT: .ascii	"long *"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	32
+; CHECK-NEXT: .long	1
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	6
+; CHECK-NEXT: .ascii	"char *"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	3
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	32
+; CHECK-NEXT: .long	2
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	7
+; CHECK-NEXT: .ascii	"char2 *"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	3
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	32
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	7
+; CHECK-NEXT: .ascii	"char3 *"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	3
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	32
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	7
+; CHECK-NEXT: .ascii	"char4 *"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	3
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	32
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	7
+; CHECK-NEXT: .ascii	"char8 *"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	3
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	32
+; CHECK-NEXT: .long	16
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .ascii	"char16 *"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	3
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a, i8 addrspace(3)* %b, <2 x i8> addrspace(3)* %c, <3 x i8> addrspace(3)* %d, <4 x i8> addrspace(3)* %e, <8 x i8> addrspace(3)* %f, <16 x i8> addrspace(3)* %g) !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93 !kernel_arg_base_type !93 !kernel_arg_type_qual !94 {
+  ret void
+}
+
 !1 = !{i32 0}
 !2 = !{!"none"}
 !3 = !{!"int"}
@@ -1063,3 +1226,7 @@ define amdgpu_kernel void @test_arg_unkn
 !84 = !{!"clk_event_t"}
 !opencl.ocl.version = !{!90}
 !90 = !{i32 2, i32 0}
+!91 = !{i32 0, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3}
+!92 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"}
+!93 = !{!"long *", !"char *", !"char2 *", !"char3 *", !"char4 *", !"char8 *", !"char16 *"}
+!94 = !{!"", !"", !"", !"", !"", !"", !""}




More information about the llvm-commits mailing list