[llvm-commits] [vector_llvm] CVS: llvm/examples/SIMD/Transpose/Makefile main.c transpose.altivec.handwritten.c transpose.sse.handwritten.c transpose.vectorc.c
Robert L. Bocchino Jr.
bocchino at persephone.cs.uiuc.edu
Sun Oct 23 15:50:24 PDT 2005
Changes in directory llvm/examples/SIMD/Transpose:
Makefile added (r1.1.2.1)
main.c added (r1.1.2.1)
transpose.altivec.handwritten.c added (r1.1.2.1)
transpose.sse.handwritten.c added (r1.1.2.1)
transpose.vectorc.c added (r1.1.2.1)
---
Log message:
Examples to illustrate Vector LLVM's SIMD support.
---
Diffs of the changes: (+226 -0)
Makefile | 4 +
main.c | 98 ++++++++++++++++++++++++++++++++++++++++
transpose.altivec.handwritten.c | 44 +++++++++++++++++
transpose.sse.handwritten.c | 38 +++++++++++++++
transpose.vectorc.c | 42 +++++++++++++++++
5 files changed, 226 insertions
Index: llvm/examples/SIMD/Transpose/Makefile
diff -c /dev/null llvm/examples/SIMD/Transpose/Makefile:1.1.2.1
*** /dev/null Sun Oct 23 17:50:00 2005
--- llvm/examples/SIMD/Transpose/Makefile Sun Oct 23 17:49:42 2005
***************
*** 0 ****
--- 1,4 ----
+ NAME= transpose
+
+ include ../Makefile.common
+
Index: llvm/examples/SIMD/Transpose/main.c
diff -c /dev/null llvm/examples/SIMD/Transpose/main.c:1.1.2.1
*** /dev/null Sun Oct 23 17:50:24 2005
--- llvm/examples/SIMD/Transpose/main.c Sun Oct 23 17:49:42 2005
***************
*** 0 ****
--- 1,98 ----
+ #define N 1024
+
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <sys/time.h>
+ #include <sys/times.h>
+ #include <assert.h>
+ #include "../_malloc.h"
+
+ inline void transpose_scalar(short*, short*);
+ void transpose_vector(short*, short*);
+
+ short *in;
+ short *out_vector;
+ short *out_scalar;
+
+ void init() {
+ int i;
+
+ // Force 16-byte alignment
+ //
+ in = (short*) _malloc(N*sizeof(short));
+ out_vector = (short*) _malloc(N*sizeof(short));
+ out_scalar = (short*) _malloc(N*sizeof(short));
+
+ // Populate in with a range of values
+ //
+ for (i = 0; i < N; ++i) {
+ in[i] = N/2-i;
+ }
+
+ }
+
+ float run() {
+ long t0, t1, t2;
+ int i,j;
+ struct tms buf_s, buf_e;
+ long scalar_time = 0, vector_time = 0;
+
+ times(&buf_s);
+ for (i = 0; i < 1000000; ++i)
+ for (j = 0; j < N/64; ++j)
+ transpose_scalar(in+64*j, out_scalar+64*j);
+ times(&buf_e);
+ scalar_time = buf_e.tms_utime - buf_s.tms_utime;
+ printf("scalar time=%d, ", scalar_time);
+
+ times(&buf_s);
+ for (i = 0; i < 1000000; ++i)
+ for (j = 0; j < N/64; ++j)
+ transpose_vector(in+64*j, out_vector+64*j);
+ times(&buf_e);
+ vector_time = buf_e.tms_utime - buf_s.tms_utime;
+ printf("vector time=%d, ", vector_time);
+
+ float speedup = ((float) scalar_time)/vector_time;
+ printf("speedup=%f\n", speedup);
+
+ for (i = 0; i < N; i++) {
+ if (out_vector[i] != out_scalar[i]) {
+ printf("FAILED\n");
+ exit(1);
+ }
+ }
+
+ return speedup;
+ }
+
+ int
+ main (void)
+ {
+ unsigned i;
+ init();
+ float best = 0;
+ for (i = 0; i < NRUNS; ++i) {
+ float speedup = run();
+ if (speedup > best)
+ best = speedup;
+ }
+ printf("best speedup=%f\n", best);
+
+ printf ("PASSED\n");
+ return 0;
+ }
+
+ void transpose_scalar ( short *input_scalar, short *output_scalar) {
+ unsigned i;
+ for (i = 0; i < 8; ++i) {
+ output_scalar[i] = input_scalar[8*i];
+ output_scalar[8+i] = input_scalar[8*i+1];
+ output_scalar[16+i] = input_scalar[8*i+2];
+ output_scalar[24+i] = input_scalar[8*i+3];
+ output_scalar[32+i] = input_scalar[8*i+4];
+ output_scalar[40+i] = input_scalar[8*i+5];
+ output_scalar[48+i] = input_scalar[8*i+6];
+ output_scalar[56+i] = input_scalar[8*i+7];
+ }
+ }
Index: llvm/examples/SIMD/Transpose/transpose.altivec.handwritten.c
diff -c /dev/null llvm/examples/SIMD/Transpose/transpose.altivec.handwritten.c:1.1.2.1
*** /dev/null Sun Oct 23 17:50:24 2005
--- llvm/examples/SIMD/Transpose/transpose.altivec.handwritten.c Sun Oct 23 17:49:42 2005
***************
*** 0 ****
--- 1,44 ----
+ void print_vector(vector short v) {
+ unsigned i;
+ short *p = ((short*) &v);
+ for (i = 0; i < 8; ++i)
+ printf("%04X ", p[i]);
+ printf("\n");
+ }
+
+ inline void transpose_vector ( short *input_scalar, short *output_scalar)
+ {
+ vector signed short *input = (vector signed short*) input_scalar;
+ vector signed short *output = (vector signed short*) output_scalar;
+
+ vector signed short a0, a1, a2, a3, a4, a5, a6, a7;
+ vector signed short b0, b1, b2, b3, b4, b5, b6, b7;
+
+ b0 = vec_mergeh( input[0], input[4] ); /* [ 00 40 01 41 02 42 03 43 ]*/
+ b1 = vec_mergel( input[0], input[4] ); /* [ 04 44 05 45 06 46 07 47 ]*/
+ b2 = vec_mergeh( input[1], input[5] ); /* [ 10 50 11 51 12 52 13 53 ]*/
+ b3 = vec_mergel( input[1], input[5] ); /* [ 14 54 15 55 16 56 17 57 ]*/
+ b4 = vec_mergeh( input[2], input[6] ); /* [ 20 60 21 61 22 62 23 63 ]*/
+ b5 = vec_mergel( input[2], input[6] ); /* [ 24 64 25 65 26 66 27 67 ]*/
+ b6 = vec_mergeh( input[3], input[7] ); /* [ 30 70 31 71 32 72 33 73 ]*/
+ b7 = vec_mergel( input[3], input[7] ); /* [ 34 74 35 75 36 76 37 77 ]*/
+
+ a0 = vec_mergeh( b0, b4 ); /* [ 00 20 40 60 01 21 41 61 ]*/
+ a1 = vec_mergel( b0, b4 ); /* [ 02 22 42 62 03 23 43 63 ]*/
+ a2 = vec_mergeh( b1, b5 ); /* [ 04 24 44 64 05 25 45 65 ]*/
+ a3 = vec_mergel( b1, b5 ); /* [ 06 26 46 66 07 27 47 67 ]*/
+ a4 = vec_mergeh( b2, b6 ); /* [ 10 30 50 70 11 31 51 71 ]*/
+ a5 = vec_mergel( b2, b6 ); /* [ 12 32 52 72 13 33 53 73 ]*/
+ a6 = vec_mergeh( b3, b7 ); /* [ 14 34 54 74 15 35 55 75 ]*/
+ a7 = vec_mergel( b3, b7 ); /* [ 16 36 56 76 17 37 57 77 ]*/
+
+ output[0] = vec_mergeh( a0, a4 ); /* [ 00 10 20 30 40 50 60 70 ]*/
+ output[1] = vec_mergel( a0, a4 ); /* [ 01 11 21 31 41 51 61 71 ]*/
+ output[2] = vec_mergeh( a1, a5 ); /* [ 02 12 22 32 42 52 62 72 ]*/
+ output[3] = vec_mergel( a1, a5 ); /* [ 03 13 23 33 43 53 63 73 ]*/
+ output[4] = vec_mergeh( a2, a6 ); /* [ 04 14 24 34 44 54 64 74 ]*/
+ output[5] = vec_mergel( a2, a6 ); /* [ 05 15 25 35 45 55 65 75 ]*/
+ output[6] = vec_mergeh( a3, a7 ); /* [ 06 16 26 36 46 56 66 76 ]*/
+ output[7] = vec_mergel( a3, a7 ); /* [ 07 17 27 37 47 57 67 77 ]*/
+
+ }
Index: llvm/examples/SIMD/Transpose/transpose.sse.handwritten.c
diff -c /dev/null llvm/examples/SIMD/Transpose/transpose.sse.handwritten.c:1.1.2.1
*** /dev/null Sun Oct 23 17:50:24 2005
--- llvm/examples/SIMD/Transpose/transpose.sse.handwritten.c Sun Oct 23 17:49:42 2005
***************
*** 0 ****
--- 1,38 ----
+ #include "SSE.h"
+
+ inline void transpose_vector ( short *input_scalar, short *output_scalar)
+ {
+ __m128i *input = (__m128i*) input_scalar;
+ __m128i *output = (__m128i*) output_scalar;
+
+ __m128i a0, a1, a2, a3, a4, a5, a6, a7;
+ __m128i b0, b1, b2, b3, b4, b5, b6, b7;
+
+ b0 = _mm_unpacklo_epi16( input[0], input[4] ); /* [ 00 40 01 41 02 42 03 43 ]*/
+ b1 = _mm_unpackhi_epi16( input[0], input[4] ); /* [ 04 44 05 45 06 46 07 47 ]*/
+ b2 = _mm_unpacklo_epi16( input[1], input[5] ); /* [ 10 50 11 51 12 52 13 53 ]*/
+ b3 = _mm_unpackhi_epi16( input[1], input[5] ); /* [ 14 54 15 55 16 56 17 57 ]*/
+ b4 = _mm_unpacklo_epi16( input[2], input[6] ); /* [ 20 60 21 61 22 62 23 63 ]*/
+ b5 = _mm_unpackhi_epi16( input[2], input[6] ); /* [ 24 64 25 65 26 66 27 67 ]*/
+ b6 = _mm_unpacklo_epi16( input[3], input[7] ); /* [ 30 70 31 71 32 72 33 73 ]*/
+ b7 = _mm_unpackhi_epi16( input[3], input[7] ); /* [ 34 74 35 75 36 76 37 77 ]*/
+
+ a0 = _mm_unpacklo_epi16( b0, b4 ); /* [ 00 20 40 60 01 21 41 61 ]*/
+ a1 = _mm_unpackhi_epi16( b0, b4 ); /* [ 02 22 42 62 03 23 43 63 ]*/
+ a2 = _mm_unpacklo_epi16( b1, b5 ); /* [ 04 24 44 64 05 25 45 65 ]*/
+ a3 = _mm_unpackhi_epi16( b1, b5 ); /* [ 06 26 46 66 07 27 47 67 ]*/
+ a4 = _mm_unpacklo_epi16( b2, b6 ); /* [ 10 30 50 70 11 31 51 71 ]*/
+ a5 = _mm_unpackhi_epi16( b2, b6 ); /* [ 12 32 52 72 13 33 53 73 ]*/
+ a6 = _mm_unpacklo_epi16( b3, b7 ); /* [ 14 34 54 74 15 35 55 75 ]*/
+ a7 = _mm_unpackhi_epi16( b3, b7 ); /* [ 16 36 56 76 17 37 57 77 ]*/
+
+ output[0] = _mm_unpacklo_epi16( a0, a4 ); /* [ 00 10 20 30 40 50 60 70 ]*/
+ output[1] = _mm_unpackhi_epi16( a0, a4 ); /* [ 01 11 21 31 41 51 61 71 ]*/
+ output[2] = _mm_unpacklo_epi16( a1, a5 ); /* [ 02 12 22 32 42 52 62 72 ]*/
+ output[3] = _mm_unpackhi_epi16( a1, a5 ); /* [ 03 13 23 33 43 53 63 73 ]*/
+ output[4] = _mm_unpacklo_epi16( a2, a6 ); /* [ 04 14 24 34 44 54 64 74 ]*/
+ output[5] = _mm_unpackhi_epi16( a2, a6 ); /* [ 05 15 25 35 45 55 65 75 ]*/
+ output[6] = _mm_unpacklo_epi16( a3, a7 ); /* [ 06 16 26 36 46 56 66 76 ]*/
+ output[7] = _mm_unpackhi_epi16( a3, a7 ); /* [ 07 17 27 37 47 57 67 77 ]*/
+
+ }
Index: llvm/examples/SIMD/Transpose/transpose.vectorc.c
diff -c /dev/null llvm/examples/SIMD/Transpose/transpose.vectorc.c:1.1.2.1
*** /dev/null Sun Oct 23 17:50:24 2005
--- llvm/examples/SIMD/Transpose/transpose.vectorc.c Sun Oct 23 17:49:42 2005
***************
*** 0 ****
--- 1,42 ----
+ #include "VectorC.h"
+
+ #define MERGE(out01, out0, out1, in0, in1) \
+ short out01 = vllvm_fixed_vimm_short(0, 16); \
+ out01 = vllvm_fixed_combine_short(out01, 16, in0, 8, 0, 1); \
+ out01 = vllvm_fixed_combine_short(out01, 16, in1, 8, 8, 1); \
+ short out0 = vllvm_extract_short(out01, 0, 2, 8); \
+ short out1 = vllvm_extract_short(out01, 1, 2, 8)
+
+ #define IN(x) \
+ vllvm_load_short(input_scalar, 8, x)
+
+ #define STORE(out, idx) \
+ vllvm_store_short(out, output_scalar, idx)
+
+ inline void transpose_vector (short *input_scalar, short *output_scalar) {
+ MERGE(b01, b0, b1, IN(0), IN(4));
+ MERGE(b23, b2, b3, IN(1), IN(5));
+ MERGE(b45, b4, b5, IN(2), IN(6));
+ MERGE(b67, b6, b7, IN(3), IN(7));
+
+ MERGE(a01, a0, a1, b0, b4);
+ MERGE(a23, a2, a3, b1, b5);
+ MERGE(a45, a4, a5, b2, b6);
+ MERGE(a67, a6, a7, b3, b7);
+
+ MERGE(out01, out0, out1, a0, a4);
+ MERGE(out23, out2, out3, a1, a5);
+ MERGE(out45, out4, out5, a2, a6);
+ MERGE(out67, out6, out7, a3, a7);
+
+ STORE(out0, 0);
+ STORE(out1, 1);
+ STORE(out2, 2);
+ STORE(out3, 3);
+ STORE(out4, 4);
+ STORE(out5, 5);
+ STORE(out6, 6);
+ STORE(out7, 7);
+
+ }
+
More information about the llvm-commits
mailing list