[llvm-commits] [vector_llvm] CVS: llvm/examples/SIMD/Transpose/Makefile main.c transpose.altivec.handwritten.c transpose.sse.handwritten.c transpose.vectorc.c

Sun Oct 23 15:50:24 PDT 2005

Changes in directory llvm/examples/SIMD/Transpose:

Makefile added (r1.1.2.1)
main.c added (r1.1.2.1)
transpose.altivec.handwritten.c added (r1.1.2.1)
transpose.sse.handwritten.c added (r1.1.2.1)
transpose.vectorc.c added (r1.1.2.1)
---
Log message:

Examples to illustrate Vector LLVM's SIMD support.

---
Diffs of the changes:  (+226 -0)

 Makefile                        |    4 +
 main.c                          |   98 ++++++++++++++++++++++++++++++++++++++++
 transpose.altivec.handwritten.c |   44 +++++++++++++++++
 transpose.sse.handwritten.c     |   38 +++++++++++++++
 transpose.vectorc.c             |   42 +++++++++++++++++
 5 files changed, 226 insertions

Index: llvm/examples/SIMD/Transpose/Makefile
diff -c /dev/null llvm/examples/SIMD/Transpose/Makefile:1.1.2.1
*** /dev/null	Sun Oct 23 17:50:00 2005
--- llvm/examples/SIMD/Transpose/Makefile	Sun Oct 23 17:49:42 2005
***************
*** 0 ****
--- 1,4 ----
+ NAME= transpose
+ 
+ include ../Makefile.common
+ 

Index: llvm/examples/SIMD/Transpose/main.c
diff -c /dev/null llvm/examples/SIMD/Transpose/main.c:1.1.2.1
*** /dev/null	Sun Oct 23 17:50:24 2005
--- llvm/examples/SIMD/Transpose/main.c	Sun Oct 23 17:49:42 2005
***************
*** 0 ****
--- 1,98 ----
+ #define N 1024
+ 
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <sys/time.h>
+ #include <sys/times.h>
+ #include <assert.h>
+ #include "../_malloc.h"
+ 
+ inline void transpose_scalar(short*, short*);
+ void transpose_vector(short*, short*);
+ 
+ short *in;
+ short *out_vector;
+ short *out_scalar;
+ 
+ void init() {
+   int i;
+ 
+   // Force 16-byte alignment
+   //
+   in = (short*) _malloc(N*sizeof(short));
+   out_vector = (short*) _malloc(N*sizeof(short));
+   out_scalar = (short*) _malloc(N*sizeof(short));
+   
+   // Populate in with a range of values
+   //
+   for (i = 0; i < N; ++i) {
+     in[i] = N/2-i;
+   }
+   
+ }
+ 
+ float run() {
+     long t0, t1, t2;
+     int i,j;
+     struct tms buf_s, buf_e;
+     long scalar_time = 0, vector_time = 0;
+ 
+     times(&buf_s);
+     for (i = 0; i < 1000000; ++i)
+       for (j = 0; j < N/64; ++j)
+ 	transpose_scalar(in+64*j, out_scalar+64*j);
+     times(&buf_e);
+     scalar_time = buf_e.tms_utime - buf_s.tms_utime;
+     printf("scalar time=%d, ", scalar_time);
+     
+     times(&buf_s);
+     for (i = 0; i < 1000000; ++i)
+       for (j = 0; j < N/64; ++j)
+ 	transpose_vector(in+64*j, out_vector+64*j);
+     times(&buf_e);
+     vector_time = buf_e.tms_utime - buf_s.tms_utime;
+     printf("vector time=%d, ", vector_time);
+     
+     float speedup = ((float) scalar_time)/vector_time;
+     printf("speedup=%f\n", speedup);
+ 
+     for (i = 0; i < N; i++) {
+       if (out_vector[i] != out_scalar[i]) {
+ 	printf("FAILED\n");
+ 	exit(1);
+       }
+     }
+ 
+     return speedup;
+ }
+ 
+ int
+ main (void)
+ {
+   unsigned i;
+   init();
+   float best = 0;
+   for (i = 0; i < NRUNS; ++i) {
+     float speedup = run();
+     if (speedup > best)
+       best = speedup;
+   }
+   printf("best speedup=%f\n", best);
+ 
+   printf ("PASSED\n");
+   return 0;
+ }
+ 
+ void transpose_scalar ( short *input_scalar, short *output_scalar) {
+   unsigned i;
+   for (i = 0; i < 8; ++i) {
+     output_scalar[i] = input_scalar[8*i];
+     output_scalar[8+i] = input_scalar[8*i+1];
+     output_scalar[16+i] = input_scalar[8*i+2];
+     output_scalar[24+i] = input_scalar[8*i+3];
+     output_scalar[32+i] = input_scalar[8*i+4];
+     output_scalar[40+i] = input_scalar[8*i+5];
+     output_scalar[48+i] = input_scalar[8*i+6];
+     output_scalar[56+i] = input_scalar[8*i+7];
+   }
+ }

Index: llvm/examples/SIMD/Transpose/transpose.altivec.handwritten.c
diff -c /dev/null llvm/examples/SIMD/Transpose/transpose.altivec.handwritten.c:1.1.2.1
*** /dev/null	Sun Oct 23 17:50:24 2005
--- llvm/examples/SIMD/Transpose/transpose.altivec.handwritten.c	Sun Oct 23 17:49:42 2005
***************
*** 0 ****
--- 1,44 ----
+ void print_vector(vector short v) {
+   unsigned i;
+   short *p = ((short*) &v);
+   for (i = 0; i < 8; ++i)
+     printf("%04X ", p[i]);
+   printf("\n");
+ }
+ 
+ inline void transpose_vector ( short *input_scalar, short *output_scalar)
+ {
+   vector signed short *input = (vector signed short*) input_scalar;
+   vector signed short *output = (vector signed short*) output_scalar;
+ 
+   vector signed short a0, a1, a2, a3, a4, a5, a6, a7;
+   vector signed short b0, b1, b2, b3, b4, b5, b6, b7;
+ 
+   b0 = vec_mergeh( input[0], input[4] );     /* [ 00 40 01 41 02 42 03 43 ]*/
+   b1 = vec_mergel( input[0], input[4] );     /* [ 04 44 05 45 06 46 07 47 ]*/
+   b2 = vec_mergeh( input[1], input[5] );     /* [ 10 50 11 51 12 52 13 53 ]*/
+   b3 = vec_mergel( input[1], input[5] );     /* [ 14 54 15 55 16 56 17 57 ]*/
+   b4 = vec_mergeh( input[2], input[6] );     /* [ 20 60 21 61 22 62 23 63 ]*/
+   b5 = vec_mergel( input[2], input[6] );     /* [ 24 64 25 65 26 66 27 67 ]*/
+   b6 = vec_mergeh( input[3], input[7] );     /* [ 30 70 31 71 32 72 33 73 ]*/
+   b7 = vec_mergel( input[3], input[7] );     /* [ 34 74 35 75 36 76 37 77 ]*/
+ 
+   a0 = vec_mergeh( b0, b4 );                 /* [ 00 20 40 60 01 21 41 61 ]*/
+   a1 = vec_mergel( b0, b4 );                 /* [ 02 22 42 62 03 23 43 63 ]*/
+   a2 = vec_mergeh( b1, b5 );                 /* [ 04 24 44 64 05 25 45 65 ]*/
+   a3 = vec_mergel( b1, b5 );                 /* [ 06 26 46 66 07 27 47 67 ]*/
+   a4 = vec_mergeh( b2, b6 );                 /* [ 10 30 50 70 11 31 51 71 ]*/
+   a5 = vec_mergel( b2, b6 );                 /* [ 12 32 52 72 13 33 53 73 ]*/
+   a6 = vec_mergeh( b3, b7 );                 /* [ 14 34 54 74 15 35 55 75 ]*/
+   a7 = vec_mergel( b3, b7 );                 /* [ 16 36 56 76 17 37 57 77 ]*/
+ 
+   output[0] = vec_mergeh( a0, a4 );          /* [ 00 10 20 30 40 50 60 70 ]*/
+   output[1] = vec_mergel( a0, a4 );          /* [ 01 11 21 31 41 51 61 71 ]*/
+   output[2] = vec_mergeh( a1, a5 );          /* [ 02 12 22 32 42 52 62 72 ]*/
+   output[3] = vec_mergel( a1, a5 );          /* [ 03 13 23 33 43 53 63 73 ]*/
+   output[4] = vec_mergeh( a2, a6 );          /* [ 04 14 24 34 44 54 64 74 ]*/
+   output[5] = vec_mergel( a2, a6 );          /* [ 05 15 25 35 45 55 65 75 ]*/
+   output[6] = vec_mergeh( a3, a7 );          /* [ 06 16 26 36 46 56 66 76 ]*/
+   output[7] = vec_mergel( a3, a7 );          /* [ 07 17 27 37 47 57 67 77 ]*/
+ 
+ }

Index: llvm/examples/SIMD/Transpose/transpose.sse.handwritten.c
diff -c /dev/null llvm/examples/SIMD/Transpose/transpose.sse.handwritten.c:1.1.2.1
*** /dev/null	Sun Oct 23 17:50:24 2005
--- llvm/examples/SIMD/Transpose/transpose.sse.handwritten.c	Sun Oct 23 17:49:42 2005
***************
*** 0 ****
--- 1,38 ----
+ #include "SSE.h"
+ 
+ inline void transpose_vector ( short *input_scalar, short *output_scalar)
+ {
+   __m128i *input = (__m128i*) input_scalar;
+   __m128i *output = (__m128i*) output_scalar;
+ 
+   __m128i a0, a1, a2, a3, a4, a5, a6, a7;
+   __m128i b0, b1, b2, b3, b4, b5, b6, b7;
+ 
+   b0 = _mm_unpacklo_epi16( input[0], input[4] );     /* [ 00 40 01 41 02 42 03 43 ]*/
+   b1 = _mm_unpackhi_epi16( input[0], input[4] );     /* [ 04 44 05 45 06 46 07 47 ]*/
+   b2 = _mm_unpacklo_epi16( input[1], input[5] );     /* [ 10 50 11 51 12 52 13 53 ]*/
+   b3 = _mm_unpackhi_epi16( input[1], input[5] );     /* [ 14 54 15 55 16 56 17 57 ]*/
+   b4 = _mm_unpacklo_epi16( input[2], input[6] );     /* [ 20 60 21 61 22 62 23 63 ]*/
+   b5 = _mm_unpackhi_epi16( input[2], input[6] );     /* [ 24 64 25 65 26 66 27 67 ]*/
+   b6 = _mm_unpacklo_epi16( input[3], input[7] );     /* [ 30 70 31 71 32 72 33 73 ]*/
+   b7 = _mm_unpackhi_epi16( input[3], input[7] );     /* [ 34 74 35 75 36 76 37 77 ]*/
+ 
+   a0 = _mm_unpacklo_epi16( b0, b4 );                 /* [ 00 20 40 60 01 21 41 61 ]*/
+   a1 = _mm_unpackhi_epi16( b0, b4 );                 /* [ 02 22 42 62 03 23 43 63 ]*/
+   a2 = _mm_unpacklo_epi16( b1, b5 );                 /* [ 04 24 44 64 05 25 45 65 ]*/
+   a3 = _mm_unpackhi_epi16( b1, b5 );                 /* [ 06 26 46 66 07 27 47 67 ]*/
+   a4 = _mm_unpacklo_epi16( b2, b6 );                 /* [ 10 30 50 70 11 31 51 71 ]*/
+   a5 = _mm_unpackhi_epi16( b2, b6 );                 /* [ 12 32 52 72 13 33 53 73 ]*/
+   a6 = _mm_unpacklo_epi16( b3, b7 );                 /* [ 14 34 54 74 15 35 55 75 ]*/
+   a7 = _mm_unpackhi_epi16( b3, b7 );                 /* [ 16 36 56 76 17 37 57 77 ]*/
+ 
+   output[0] = _mm_unpacklo_epi16( a0, a4 );          /* [ 00 10 20 30 40 50 60 70 ]*/
+   output[1] = _mm_unpackhi_epi16( a0, a4 );          /* [ 01 11 21 31 41 51 61 71 ]*/
+   output[2] = _mm_unpacklo_epi16( a1, a5 );          /* [ 02 12 22 32 42 52 62 72 ]*/
+   output[3] = _mm_unpackhi_epi16( a1, a5 );          /* [ 03 13 23 33 43 53 63 73 ]*/
+   output[4] = _mm_unpacklo_epi16( a2, a6 );          /* [ 04 14 24 34 44 54 64 74 ]*/
+   output[5] = _mm_unpackhi_epi16( a2, a6 );          /* [ 05 15 25 35 45 55 65 75 ]*/
+   output[6] = _mm_unpacklo_epi16( a3, a7 );          /* [ 06 16 26 36 46 56 66 76 ]*/
+   output[7] = _mm_unpackhi_epi16( a3, a7 );          /* [ 07 17 27 37 47 57 67 77 ]*/
+ 
+ }

Index: llvm/examples/SIMD/Transpose/transpose.vectorc.c
diff -c /dev/null llvm/examples/SIMD/Transpose/transpose.vectorc.c:1.1.2.1
*** /dev/null	Sun Oct 23 17:50:24 2005
--- llvm/examples/SIMD/Transpose/transpose.vectorc.c	Sun Oct 23 17:49:42 2005
***************
*** 0 ****
--- 1,42 ----
+ #include "VectorC.h"
+ 
+ #define MERGE(out01, out0, out1, in0, in1) \
+   short out01 = vllvm_fixed_vimm_short(0, 16); \
+   out01 = vllvm_fixed_combine_short(out01, 16, in0, 8, 0, 1); \
+   out01 = vllvm_fixed_combine_short(out01, 16, in1, 8, 8, 1); \
+   short out0 = vllvm_extract_short(out01, 0, 2, 8); \
+   short out1 = vllvm_extract_short(out01, 1, 2, 8)
+ 
+ #define IN(x) \
+   vllvm_load_short(input_scalar, 8, x)
+ 
+ #define STORE(out, idx) \
+   vllvm_store_short(out, output_scalar, idx)
+ 
+ inline void transpose_vector (short *input_scalar, short *output_scalar) {
+   MERGE(b01, b0, b1, IN(0), IN(4));
+   MERGE(b23, b2, b3, IN(1), IN(5));
+   MERGE(b45, b4, b5, IN(2), IN(6));
+   MERGE(b67, b6, b7, IN(3), IN(7));
+ 
+   MERGE(a01, a0, a1, b0, b4);
+   MERGE(a23, a2, a3, b1, b5);
+   MERGE(a45, a4, a5, b2, b6);
+   MERGE(a67, a6, a7, b3, b7);
+ 
+   MERGE(out01, out0, out1, a0, a4);
+   MERGE(out23, out2, out3, a1, a5);
+   MERGE(out45, out4, out5, a2, a6);
+   MERGE(out67, out6, out7, a3, a7);
+ 
+   STORE(out0, 0);
+   STORE(out1, 1);
+   STORE(out2, 2);
+   STORE(out3, 3);
+   STORE(out4, 4);
+   STORE(out5, 5);
+   STORE(out6, 6);
+   STORE(out7, 7);
+ 
+ }
+