[Libclc-dev] [PATCH 6/9] Add optimized generic addrspace(0) vload implementation

Aaron Watry awatry at gmail.com
Tue Jul 22 18:46:47 PDT 2014


Not used yet... but very soon.

Signed-off-by: Aaron Watry <awatry at gmail.com>
---
 generic/lib/shared/vload_impl.ll | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/generic/lib/shared/vload_impl.ll b/generic/lib/shared/vload_impl.ll
index 33ba996..7c5e580 100644
--- a/generic/lib/shared/vload_impl.ll
+++ b/generic/lib/shared/vload_impl.ll
@@ -1,6 +1,36 @@
 ; This provides optimized implementations of vload2/3/4/8/16 for 32-bit int/uint
 ; The address spaces get mapped to data types in target-specific usages
 
+define <2 x i32> @__clc_vload2_i32__addr0(i32 addrspace(0)* nocapture %addr) nounwind readonly alwaysinline {
+  %1 = bitcast i32 addrspace(0)* %addr to <2 x i32> addrspace(0)*
+  %2 = load <2 x i32> addrspace(0)* %1, align 4, !tbaa !3
+  ret <2 x i32> %2
+}
+
+define <3 x i32> @__clc_vload3_i32__addr0(i32 addrspace(0)* nocapture %addr) nounwind readonly alwaysinline {
+  %1 = bitcast i32 addrspace(0)* %addr to <3 x i32> addrspace(0)*
+  %2 = load <3 x i32> addrspace(0)* %1, align 4, !tbaa !3
+  ret <3 x i32> %2
+}
+
+define <4 x i32> @__clc_vload4_i32__addr0(i32 addrspace(0)* nocapture %addr) nounwind readonly alwaysinline {
+  %1 = bitcast i32 addrspace(0)* %addr to <4 x i32> addrspace(0)*
+  %2 = load <4 x i32> addrspace(0)* %1, align 4, !tbaa !3
+  ret <4 x i32> %2
+}
+
+define <8 x i32> @__clc_vload8_i32__addr0(i32 addrspace(0)* nocapture %addr) nounwind readonly alwaysinline {
+  %1 = bitcast i32 addrspace(0)* %addr to <8 x i32> addrspace(0)*
+  %2 = load <8 x i32> addrspace(0)* %1, align 4, !tbaa !3
+  ret <8 x i32> %2
+}
+
+define <16 x i32> @__clc_vload16_i32__addr0(i32 addrspace(0)* nocapture %addr) nounwind readonly alwaysinline {
+  %1 = bitcast i32 addrspace(0)* %addr to <16 x i32> addrspace(0)*
+  %2 = load <16 x i32> addrspace(0)* %1, align 4, !tbaa !3
+  ret <16 x i32> %2
+}
+
 define <2 x i32> @__clc_vload2_i32__addr1(i32 addrspace(1)* nocapture %addr) nounwind readonly alwaysinline {
   %1 = bitcast i32 addrspace(1)* %addr to <2 x i32> addrspace(1)*
   %2 = load <2 x i32> addrspace(1)* %1, align 4, !tbaa !3
-- 
1.9.1





More information about the Libclc-dev mailing list