[llvm-commits] [test-suite] r160413 - in /test-suite/trunk/SingleSource/Benchmarks/Misc: matmul_f64_4x4.c matmul_f64_4x4.reference_output

Tue Jul 17 17:23:16 PDT 2012

Author: stoklund
Date: Tue Jul 17 19:23:16 2012
New Revision: 160413

URL: http://llvm.org/viewvc/llvm-project?rev=160413&view=rev
Log:
Add a test case for an unrolled 4x4 matrix multiplication.

Unrolled matrix multiply is an interesting problem for scheduling and
register allocation.

This particular test also exposes a problem with arm legalization after
SROA turns the double[16] array into an i1024 scalar.

Added:
    test-suite/trunk/SingleSource/Benchmarks/Misc/matmul_f64_4x4.c
    test-suite/trunk/SingleSource/Benchmarks/Misc/matmul_f64_4x4.reference_output

Added: test-suite/trunk/SingleSource/Benchmarks/Misc/matmul_f64_4x4.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/SingleSource/Benchmarks/Misc/matmul_f64_4x4.c?rev=160413&view=auto
==============================================================================

--- test-suite/trunk/SingleSource/Benchmarks/Misc/matmul_f64_4x4.c (added)
+++ test-suite/trunk/SingleSource/Benchmarks/Misc/matmul_f64_4x4.c Tue Jul 17 19:23:16 2012
@@ -0,0 +1,70 @@
+#include <stdio.h>
+
+/* Timing test for unrolled 4x4 matrix multiplication, double precision. */
+
+static void mul4(double *Out, const double A[4][4], const double B[4][4]) {
+  unsigned n;
+
+  /* Assume that Out may alias A or B. The simple array also lures SROA into
+   * creating a single i1024 scalar. */
+  double Res[16];
+
+  Res[ 0] = A[0][0]*B[0][0] + A[0][1]*B[1][0] + A[0][2]*B[2][0] + A[0][3]*B[3][0];
+  Res[ 1] = A[0][0]*B[0][1] + A[0][1]*B[1][1] + A[0][2]*B[2][1] + A[0][3]*B[3][1];
+  Res[ 2] = A[0][0]*B[0][2] + A[0][1]*B[1][2] + A[0][2]*B[2][2] + A[0][3]*B[3][2];
+  Res[ 3] = A[0][0]*B[0][3] + A[0][1]*B[1][3] + A[0][2]*B[2][3] + A[0][3]*B[3][3];
+  Res[ 4] = A[1][0]*B[0][0] + A[1][1]*B[1][0] + A[1][2]*B[2][0] + A[1][3]*B[3][0];
+  Res[ 5] = A[1][0]*B[0][1] + A[1][1]*B[1][1] + A[1][2]*B[2][1] + A[1][3]*B[3][1];
+  Res[ 6] = A[1][0]*B[0][2] + A[1][1]*B[1][2] + A[1][2]*B[2][2] + A[1][3]*B[3][2];
+  Res[ 7] = A[1][0]*B[0][3] + A[1][1]*B[1][3] + A[1][2]*B[2][3] + A[1][3]*B[3][3];
+  Res[ 8] = A[2][0]*B[0][0] + A[2][1]*B[1][0] + A[2][2]*B[2][0] + A[2][3]*B[3][0];
+  Res[ 9] = A[2][0]*B[0][1] + A[2][1]*B[1][1] + A[2][2]*B[2][1] + A[2][3]*B[3][1];
+  Res[10] = A[2][0]*B[0][2] + A[2][1]*B[1][2] + A[2][2]*B[2][2] + A[2][3]*B[3][2];
+  Res[11] = A[2][0]*B[0][3] + A[2][1]*B[1][3] + A[2][2]*B[2][3] + A[2][3]*B[3][3];
+  Res[12] = A[3][0]*B[0][0] + A[3][1]*B[1][0] + A[3][2]*B[2][0] + A[3][3]*B[3][0];
+  Res[13] = A[3][0]*B[0][1] + A[3][1]*B[1][1] + A[3][2]*B[2][1] + A[3][3]*B[3][1];
+  Res[14] = A[3][0]*B[0][2] + A[3][1]*B[1][2] + A[3][2]*B[2][2] + A[3][3]*B[3][2];
+  Res[15] = A[3][0]*B[0][3] + A[3][1]*B[1][3] + A[3][2]*B[2][3] + A[3][3]*B[3][3];
+
+  for (n = 0; n < 16; ++n)
+    Out[n] = Res[n];
+}
+
+/* Allow mul4 to be inlined into wrap_mul4. This actually enables further
+ * optimizations. */
+__attribute__((__noinline__))
+void wrap_mul4(double *Out, const double A[4][4], const double B[4][4])
+{
+  mul4(Out, A, B);
+}
+
+int main() {
+#ifdef SMALL_PROBLEM_SIZE
+  const unsigned Iterations = 1000000;
+#else
+  const unsigned Iterations = 50000000;
+#endif
+  const double A[4][4] = {
+    { 4.5, 1.3, 6.0, 4.1 },
+    { 2.5, 7.2, 7.7, 1.7 },
+    { 6.7, 1.3, 9.4, 1.3 },
+    { 1.1, 2.2, 3.0, 2.1 }
+  };
+  const double B[4][4] = {
+    { 1.0, 7.9, 5.1, 3.4 },
+    { 6.6, 2.8, 5.4, 9.2 },
+    { 5.0, 4.1, 4.1, 9.9 },
+    { 8.4, 3.7, 9.5, 6.4 }
+  };
+  double C[4][4];
+  unsigned n, m;
+
+  for (n = 0; n != Iterations; ++n)
+    wrap_mul4(&C[0][0], A, B);
+
+  for (n = 0; n != 4; ++n) {
+    for (m = 0; m != 4; ++m)
+      printf("%8.2f", C[n][m]);
+    puts("");
+  }
+}

Added: test-suite/trunk/SingleSource/Benchmarks/Misc/matmul_f64_4x4.reference_output
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/SingleSource/Benchmarks/Misc/matmul_f64_4x4.reference_output?rev=160413&view=auto
==============================================================================
--- test-suite/trunk/SingleSource/Benchmarks/Misc/matmul_f64_4x4.reference_output (added)
+++ test-suite/trunk/SingleSource/Benchmarks/Misc/matmul_f64_4x4.reference_output Tue Jul 17 19:23:16 2012
@@ -0,0 +1,5 @@
+   77.52   78.96   93.52  112.90
+  102.80   77.77   99.35  161.85
+   73.20   99.92   92.08  136.12
+   48.26   34.92   49.74   67.12
+exit 0