[llvm-commits] CVS: llvm/test/Programs/MultiSource/aha/Makefile aha.c aha.h aha.pdf read.me userfun.c
Chris Lattner
lattner at cs.uiuc.edu
Sun May 11 21:01:01 PDT 2003
Changes in directory llvm/test/Programs/MultiSource/aha:
Makefile added (r1.1)
aha.c added (r1.1)
aha.h added (r1.1)
aha.pdf added (r1.1)
read.me added (r1.1)
userfun.c added (r1.1)
---
Log message:
New test program
---
Diffs of the changes:
Index: llvm/test/Programs/MultiSource/aha/Makefile
diff -c /dev/null llvm/test/Programs/MultiSource/aha/Makefile:1.1
*** /dev/null Sun May 11 21:00:20 2003
--- llvm/test/Programs/MultiSource/aha/Makefile Sun May 11 21:00:10 2003
***************
*** 0 ****
--- 1,5 ----
+ LEVEL = ../../../..
+ PROG = aha
+ CPPFLAGS =
+ LDFLAGS =
+ include ../Makefile.multisrc
Index: llvm/test/Programs/MultiSource/aha/aha.c
diff -c /dev/null llvm/test/Programs/MultiSource/aha/aha.c:1.1
*** /dev/null Sun May 11 21:00:20 2003
--- llvm/test/Programs/MultiSource/aha/aha.c Sun May 11 21:00:10 2003
***************
*** 0 ****
--- 1,470 ----
+ /* A Hacker's Assistant */
+
+ // Copyright (C) 2002 by Henry S. Warren, Jr.
+ #include <stdlib.h>
+ #include <stdio.h>
+ #include "aha.h"
+
+ // --------------------------- print_expr ------------------------------
+
+ void
+ print_expr(int opn)
+ {
+ int i, j, k;
+
+ if (opn < RX) { // Immediate value.
+ if (-31 <= r[opn] && r[opn] <= 31) printf("%d", r[opn]);
+ else printf("0x%X", r[opn]);
+ }
+ else if (opn == RX) printf("x"); // First argument.
+ #if NARGS >= 2
+ else if (opn == RY) printf("y"); // Second argument.
+ #endif
+ else { // opn is an instruction.
+ i = opn - RI0;
+ k = pgm[i].op;
+ printf("%s", isa[k].fun_name);
+ for (j = 0; j < isa[k].numopnds; j++) {
+ print_expr(pgm[i].opnd[j]);
+ if (j < isa[k].numopnds - 1) printf("%s", isa[k].op_name);
+ else printf(")");
+ }
+ }
+ }
+
+ // --------------------------- print_pgm -------------------------------
+
+ void
+ print_pgm()
+ {
+ int i, j, k, opndj;
+
+ for (i = 0; i < numi; i++) {
+ k = pgm[i].op;
+ printf(" %-5s r%d,", isa[k].mnemonic, i + 1);
+ for (j = 0; j < isa[k].numopnds; j++) {
+ opndj = pgm[i].opnd[j];
+ if (opndj < RX) {
+ opndj = r[opndj];
+ if (opndj >= -31 && opndj <= 31) printf("%d", opndj);
+ else printf("0x%X", opndj);
+ }
+ else if (opndj == RX) printf( "rx");
+ #if NARGS > 1
+ else if (opndj == RY) printf("ry");
+ #endif
+ else printf("r%d", opndj - RI0 + 1);
+ if (j < isa[k].numopnds - 1) printf(",");
+ }
+ if (debug)
+ printf(" ==> %d (0x%X)\n", r[i+RI0], r[i+RI0]);
+ else printf("\n");
+ } // end for i
+
+ /* Now print the program as an expression. */
+
+ printf(" Expr: ");
+ print_expr(numi - 1 + RI0);
+ printf("\n");
+ }
+
+ // -------------------- simulate_one_instruction -----------------------
+
+ inline void
+ simulate_one_instruction(int i)
+ {
+ int arg0, arg1, arg2;
+
+ arg0 = r[pgm[i].opnd[0]];
+ arg1 = r[pgm[i].opnd[1]];
+ arg2 = r[pgm[i].opnd[2]];
+
+ r[i + RI0] = (*isa[pgm[i].op].proc)(arg0, arg1, arg2);
+ if (counters) counter[i] = counter[i] + 1;
+ return;
+ }
+
+ // ----------------------------- check ---------------------------------
+
+ int
+ check(int i)
+ {
+ int kx;
+ static int itrialx; // Init 0.
+ #if NARGS == 2
+ static int itrialy;
+ #endif
+
+ if (debug) {
+ #if NARGS == 1
+ printf("\nSimulating with trial arg x = %d (0x%X):\n",
+ r[RX],r[RX]);
+ #else
+ printf("\nSimulating with (x, y) = (%d, %d) ((0x%X, 0x%X)):\n",
+ r[RX], r[RY], r[RX], r[RY]);
+ #endif
+ }
+ L:
+ simulate_one_instruction(i); // Simulate i'th insn,
+ if (i < numi - 1) {i = i + 1; goto L;} // and more if req'd
+ if (unacceptable) { // E.g., if divide by 0:
+ if (debug) printf("Unacceptable program (invalid operation).\n");
+ unacceptable = 0;
+ return 0;
+ }
+
+ if (debug) {
+ print_pgm(2);
+ printf("Computed result = %d, correct result = %d, %s\n",
+ r[numi-1+RI0], corr_result, r[numi-1+RI0] == corr_result ? "ok" : "fail");
+ }
+ if (r[numi-1+RI0] != corr_result) // If not the correct
+ return 0; // result, failure.
+
+ // Got the correct result. Check this program using all trial values.
+
+ for (kx = 0; kx < NTRIALX - 1; kx++) {
+ itrialx += 1;
+ if (itrialx >= NTRIALX) itrialx = 0;
+ r[RX] = trialx[itrialx];
+ #if NARGS == 1
+ corr_result = correct_result[itrialx];
+ #else
+ for (int ky = 0; ky < NTRIALY - 1; ky++) {
+ itrialy += 1;
+ if (itrialy >= NTRIALY) itrialy = 0;
+ r[RX] = trialx[itrialx];
+ r[RY] = trialy[itrialy];
+ corr_result = correct_result[itrialx][itrialy];
+ #endif
+
+ /* Now we simulate the current program, i.e., the instructions
+ from 0 to numi-1. The result of instruction i goes in
+ register i + RI0. */
+
+ if (debug) {
+ #if NARGS == 1
+ printf("\nContinuing this pgm with arg x = %d (0x%X):\n",
+ r[RX], r[RX]);
+ #else
+ printf("\nContinuing this pgm with (x, y) = (%d, %d) ((0x%X, 0x%X)):\n",
+ r[RX], r[RY], r[RX], r[RY]);
+ #endif
+ }
+ for (i = 0; i < numi; i++) { // Simulate program from
+ simulate_one_instruction(i); // beginning to end.
+ }
+ if (unacceptable) {unacceptable = 0; return 0;}
+ if (debug) {
+ print_pgm(2);
+ printf("Computed result = %d, correct result = %d, %s\n",
+ r[numi+RI0-1], corr_result, r[numi+RI0-1] == corr_result ? "ok" : "fail");
+ }
+ if (r[numi+RI0-1] != corr_result) return 0;
+ #if NARGS == 2
+ } // end ky
+ #endif
+ } // end kx
+ return 1; // Passed all tests, found a
+ // probably correct program.
+ }
+
+ // -------------------------- fix_operands -----------------------------
+
+ void
+ fix_operands(int i)
+ {
+
+ /* This program fixes instruction i so that:
+
+ (1) if it is the last instruction, at least one operand uses the
+ result of the immediately preceding instruction, and furthermore if
+ the second from last instruction does not use the result of its
+ predecsssor, then the last instruction must use that result also.
+ (2) not all operands are immediate values, and (We assume it would be
+ a waste of time to process an instruction with all immediate
+ operands).
+ (3) if it is commutative, operand 0 >= operand 1,
+
+ It does these fixes by "increasing" the instruction by a minimal
+ amount, so that the incrementing of instructions is kept in order and no
+ legitimate instructions are skipped.
+ A hard part to understand is the logic of (1) above. Let us assume
+ for illustration that the program has four instructions (numi = 4).
+ Then when this subroutine is called to process the last instruction (i =
+ numi - 1), the operands may be in any of the configurations shown below.
+ The last instruction sets r4, the second from last instruction sets r3,
+ and its predecessor sets r2. ii denotes a register containing an
+ immediate value, or a register <= RY; in particular ii < r2. We assume
+ the last instruction ("op") has three input operands, as that is the
+ more difficult case, and that the second from last instruction does not
+ use r2. Therefore the last instruction must be altered so that it uses
+ both r2 and r3.
+
+ operand: 0 1 2 0 1 2
+ op r4,ii,ii,ii ==> op r4,r3,r2,ii Add r2 and r3.
+ op r4,ii,r2,ii ==> op r4,r3,r2,ii Add r3.
+ op r4,ii,r3,ii ==> op r4,r2,r3,ii Add r2.
+ op r4,ii,ii,r2 ==> op r4,r3,ii,r2 Add r3.
+ op r4,ii,r2,r2 ==> op r4,r3,r2,r2 Add r3.
+ op r4,ii,r3,r2 ==> no change
+ op r4,ii,ii,r3 ==> op r4,r2,ii,r3 Add r2.
+ op r4,ii,r2,r3 ==> no change
+ op r4,ii,r3,r3 ==> op r4,r2,r3,r3
+
+ These are the only possibilities. The first input operand cannot be
+ r2 or r3, because if it were, then it must have just been incremented
+ from r1 or r2 resp., and in this case "increment" does not call
+ "fix_operands."
+ The first row above means that if none of the last instruction's
+ operands are r2 or r3, then the change that adds r2 and r3 and that
+ "minimizes" the resulting instruction is to change operand 0 to r3 and
+ operand 1 to r2. The second row shows a case in which r2 is already
+ present, but r3 is not. The minimal change is to change operand 0 to r3.
+ Examination of all the possibilities reveals that a workable simple
+ rule is:
+ (1) If r3 is not used, then change operand 0 to be r3.
+ (2) Then, if r2 is not used, change operand 0 to r2 unless that
+ decreases the instruction, in which case change operand 1 to r2.
+ These rules are coded in the block headed by "if (i == numi - 1)".
+ It might seem that the program should test that pgm[i].opnd[0] is not
+ equal to rs or rt; however, as noted above operand 0 is never equal
+ to those registers at this point.
+ This scheme is sufficient to ensure that if numi = 3, no trial
+ program has an unused computed value. If numi = 4, a small percentage
+ of trial programs will have an unused computed value. Incorporation
+ of the r2 part of it improved the execution time by about a factor of
+ 1.4 if numi = 3, and a factor of 1.8 if numi = 4. If numi = 5, there
+ is probably a substantial percentage of trial programs with one or
+ more unused computed values; it hasn't been tried. */
+
+ int rs, rt, k;
+
+ k = pgm[i].op;
+
+ if (i == numi - 1) { // If this is the last insn:
+ rs = numi + RI0 - 2; // Second from last reg.
+ if (pgm[i].opnd[1] != rs && pgm[i].opnd[2] != rs) {
+ pgm[i].opnd[0] = rs;
+ }
+ rt = rs - 1; // Third from last reg.
+ if (pgm[i-1].opnd[0] != rt && pgm[i-1].opnd[1] != rt &&
+ pgm[i-1].opnd[2] != rt && pgm[i].opnd[1] != rt &&
+ pgm[i].opnd[2] != rt && rt >= RI0) {
+
+ // The last instruction needs to reference rt.
+
+ if (pgm[i].opnd[0] < rt) pgm[i].opnd[0] = rt;
+ else if (isa[k].numopnds > 1) pgm[i].opnd[1] = rt;
+
+ // else (unary op), forget it.
+ }
+ }
+
+ if (isa[k].commutative) {
+ if (pgm[i].opnd[0] < pgm[i].opnd[1])
+ pgm[i].opnd[0] = pgm[i].opnd[1];
+ return; // No need to do next check, as opnd[0]
+ } // is always a reg containing a variable.
+
+ if (i != numi - 1) {
+ if (pgm[i].opnd[0] < RX && pgm[i].opnd[1] < RX &&
+ pgm[i].opnd[2] < RX) {
+ if (isa[k].commutative) abort();
+ pgm[i].opnd[0] = RX;
+ }
+ }
+ }
+
+ // --------------------------- increment -------------------------------
+
+ inline int
+ increment(void)
+ {
+
+ /* This routine "increments" the instruction list, in a manner
+ similar to counting. The instruction list changes basically
+ like this:
+
+ i0 r0,r0 i0 r0,r0 i0 r0,r0 i0 r0,r0
+ i0 r0,r0 ==> i0 r0,r0 ==> i0 r0,r0 ==> i0 r0,r0 etc.
+ i0 r0,r0 i0 r1,r0 i0 r2,r0 i0 r0,r1
+
+ The bottom left operand is tested. If it has not reached its
+ maximum value, it is incremented. If it has reached its maximum
+ value, it is reset to its starting value and the operand to its right
+ is incremented if possible. If all operands have reached their
+ maxima, the last instruction is replaced with the next instruction
+ in the isa list, if possible, etc.
+ The returned value is the lowest index i of the instructions
+ modified, or -1 if the instruction list cannot be incremented anymore
+ ("done").
+ As far as incrementing goes, there are only three types of operands:
+
+ 1. Goes through the ordinary immediate values, skips the shift
+ immediate values, and then goes through the registers.
+ 2. Goes through the shift immediate values followed by the registers.
+ 3. Goes through the registers only.
+
+ Which range an operand is in can be determined by its register number
+ alone, so we don't need operand types in the ISA. However, opnd[0]
+ of a commutative op is an exception in that it doesn't go through
+ all the register values; it skips register values for which it is
+ less than opnd[1].
+ There's no doubt a faster way to program this, maybe by using
+ some fairly large tables. */
+
+ int i, j, k, opndj, nopnds;
+
+ for (i = numi - 1; i >= 0; i--) {
+ k = pgm[i].op;
+ nopnds = isa[k].numopnds;
+ for (j = 0; j < nopnds; j++) {
+ opndj = pgm[i].opnd[j];
+
+ if (opndj < NIM - 1) { // If ordinary imm. and not last,
+ pgm[i].opnd[j] += 1; // increment the operand.
+ break;
+ }
+ else if (opndj == NIM - 1) { // If last ordinary imm. operand,
+ pgm[i].opnd[j] = RX; // skip to first register.
+ break;
+ }
+ else if (opndj < i + RI0 - 1) {// If shift imm. or reg and not
+ pgm[i].opnd[j] += 1; // last, increment the operand.
+ break;
+ }
+ // We're at the end for opnd j.
+ pgm[i].opnd[j] = isa[k].opndstart[j]; // Reset it and
+ // increment next operand to
+ // its right.
+ } // end for j
+
+ if (j == 0) // If we just incremented the
+ return i; // leftmost operand, return; the
+ // following check is not necessary.
+ if (j < nopnds) {
+
+ /* We just incremented some operand other than the rightmost,
+ which means we reset one or more operands. Must ensure that if
+ the instruction is commutative then opnd[0] >= opnd[1], that
+ the operands are not all immediate values, and if this is the
+ last instruction, that at least one operand refers to the
+ second from last instruction and possibly to the instruction
+ before that. */
+
+ fix_operands(i);
+ return i;
+ }
+
+ /* Have gone through all of insn i's opnds.
+ Increment the instruction itself (if possible). */
+
+ if (k < NUM_INSNS_IN_ISA - 1) {
+ k = k + 1; // Increment to next isa instruction.
+ pgm[i].op = k;
+ pgm[i].opnd[0] = isa[k].opndstart[0];
+ pgm[i].opnd[1] = isa[k].opndstart[1];
+ pgm[i].opnd[2] = isa[k].opndstart[2];
+
+ fix_operands(i);
+ return i;
+ }
+
+ /* Cannot increment to next isa insn. Reset it to the first
+ isa insn and look at next insn down in the program. Furthermore,
+ if the insn being reset is the last insn in the program, make
+ its first opnd pick up the previous insn's result. */
+
+ pgm[i].op = 0; // Index first insn in isa.
+ pgm[i].opnd[0] = isa[0].opndstart[0];
+ pgm[i].opnd[1] = isa[0].opndstart[1];
+ pgm[i].opnd[2] = isa[0].opndstart[2];
+
+ fix_operands(i);
+ } // end for i
+ return -1; // Return "done" indication.
+ }
+
+ // ----------------------------- search --------------------------------
+
+ int
+ search(void)
+ {
+
+ int ok, i, num_solutions;
+
+ #if NARGS == 1
+ r[RX] = trialx[0]; // Must initialize these for
+ corr_result = correct_result[0]; // speed-up thing in "check."
+ #else
+ r[RX] = trialx[0];
+ r[RY] = trialy[0];
+ corr_result = correct_result[0][0];
+ #endif
+ num_solutions = 0;
+ i = 0;
+ do {
+ ok = check(i); // Simulate the program from i on.
+ if (ok) {
+ num_solutions += 1;
+ printf("\nFound a %d-operation program:\n", numi);
+ print_pgm(3);
+ if (num_solutions == 10) return num_solutions; // bail out early
+ }
+ i = increment(); // Increment to next program.
+ } while (i >= 0);
+ return num_solutions;
+ }
+
+ // -------------------------- Main Program -----------------------------
+
+ int main(int argc, char *argv[]) {
+ int i, num_sol = 0;
+
+ for (numi = 1; numi <= MAXNUMI && num_sol == 0; ++numi) {
+ printf("Searching for programs with %d operations.\n", numi);
+
+ // Compute all the correct answers and save them in an array.
+
+ for (i = 0; i < NTRIALX; i++) {
+ #if NARGS == 1
+ correct_result[i] = userfun(trialx[i]);
+ #else
+ for (int j = 0; j < NTRIALY; j++)
+ correct_result[i][j] = userfun(trialx[i], trialy[j]);
+ #endif
+ }
+
+ /* Preload the instruction array with the first instruction and
+ the lowest register number, with copies of this instruction
+ filling the whole array from 0 to numi - 1. */
+
+ for (i = 0; i < numi; i++) {
+ pgm[i].op = 0; // Index first insn in isa.
+ pgm[i].opnd[0] = isa[0].opndstart[0];
+ pgm[i].opnd[1] = isa[0].opndstart[1];
+ pgm[i].opnd[2] = isa[0].opndstart[2];
+
+ /* Ensure that the instruction does not have all immediate
+ operands, etc. */
+
+ fix_operands(i);
+ }
+
+ // Check the above program, generate the next, check it, etc.
+ num_sol = search();
+
+ printf("Found %d solutions.\n", num_sol);
+ if (counters) {
+ int total = 0;
+ printf("Counters = ");
+ for (i = 0; i < numi; i++) {
+ printf("%d, ", counter[i]);
+ total = total + counter[i];
+ }
+ printf("total = %d\n", total);
+ }
+ }
+ return 0;
+ }
Index: llvm/test/Programs/MultiSource/aha/aha.h
diff -c /dev/null llvm/test/Programs/MultiSource/aha/aha.h:1.1
*** /dev/null Sun May 11 21:00:20 2003
--- llvm/test/Programs/MultiSource/aha/aha.h Sun May 11 21:00:10 2003
***************
*** 0 ****
--- 1,195 ----
+ // Copyright (C) 2002 by Henry S. Warren, Jr.
+ const int debug = 0; // 0 or 1; debugging printouts if 1.
+ const int counters = 1; // 0 or 1; count number of evaluations.
+ #define NARGS 1 // Number of args in userfun, 1 or 2.
+
+ /* A note about the registers:
+
+ They are divided into four groups. The first group, starting with
+ register 0, holds ordinary immediate values. The second group, starting
+ with register NIM, holds the shift immediate values. The next 1 or 2
+ regs are the arguments to the user-defined function. The last group
+ holds the results of computations done by the trial programs.
+
+ 0 Start of ordinary immediate values (those given by IMMEDS)
+ NIM Start of shift immediate values (those given by SHIMMEDS)
+ RX First (or only) user function argument
+ RY Second user function argument
+ RI0 Result of instruction 0 goes here
+ RI0 + i Result of instruction i goes here
+ where:
+ NIM = number of ordinary immediate values
+ NSHIM = number of shift immediate values
+ */
+
+ #define MAXNEG 0x80000000
+ #define MAXPOS 0x7FFFFFFF
+ #define NBSM 63 // Shift mask. Use 63 for mod 64
+ // shifts, or 31 for mod 32.
+
+ int trialx[] = {1, 0, -1, MAXNEG, MAXPOS, \
+ MAXNEG + 1, MAXPOS - 1, 0x01234567, 0x89ABCDEF, -2, 2, -3, 3, \
+ -64, 64, -5, -31415};
+ #if NARGS == 2
+ int trialy[] = {0};
+ #endif
+ // First three values of IMMEDS must be 0, -1, and 1.
+ #define IMMEDS 0, -1, 1, MAXNEG, -2, 2, 3
+ #define SHIMMEDS 1, 2, 30, 31
+
+ int dummy1[] = {IMMEDS}; // These get optimized out of existence.
+ int dummy2[] = {SHIMMEDS};
+
+ #define NIM (int)(sizeof(dummy1)/sizeof(dummy1[0]))
+ #define NSHIM (int)(sizeof(dummy2)/sizeof(dummy2[0]))
+ #define RX (NIM + NSHIM) // First (or only) user function argument
+ #define RY (RX + 1) // Second user function argument
+ #define RI0 (RX + NARGS) // Result of instruction 0 goes here
+
+ int unacceptable; // Code below sets this to 1 for an
+ // unacceptable operation, such as
+ // divide by 0. It is initially 0.
+
+ /* Collection of simulator routines for the instructions in the isa. */
+ int neg(int x, int y, int z) {return -x;}
+ int _not(int x, int y, int z) {return ~x;}
+ int pop(int xx, int y, int z) {
+ unsigned x = xx;
+ x = x - ((x >> 1) & 0x55555555);
+ x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+ x = (x + (x >> 4)) & 0x0F0F0F0F;
+ x = x + (x << 8);
+ x = x + (x << 16);
+ return x >> 24;
+ }
+
+ int nlz(int xx, int y, int z) {
+ unsigned x = xx;
+ int n;
+
+ if (x == 0) return(32);
+ n = 0;
+ if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
+ if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
+ if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
+ if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
+ if (x <= 0x7FFFFFFF) {n = n + 1;}
+ return n;
+ }
+
+ int rev(int xi, int y, int z) {
+ unsigned x = xi;
+ x = (x & 0x55555555) << 1 | ((x >> 1) & 0x55555555);
+ x = (x & 0x33333333) << 2 | ((x >> 2) & 0x33333333);
+ x = (x & 0x0F0F0F0F) << 4 | ((x >> 4) & 0x0F0F0F0F);
+ x = (x << 24) | ((x & 0xFF00) << 8) |
+ ((x >> 8) & 0xFF00) | (x >> 24);
+ return x;
+ }
+
+ int add (int x, int y, int z) {return x + y;}
+ int sub (int x, int y, int z) {return x - y;}
+ int mul (int x, int y, int z) {return x * y;}
+ /* For division overflow we return arbitrary values, hoping they fail
+ to be part of a solution. (User must check solutions, in general.) */
+ int divide (int x, int y, int z) {
+ if (y == 0 || (y == -1 && x == (int)0x80000000))
+ {unacceptable = 1; return 0;}
+ else return x/y;}
+ int divu(int x, int y, int z) {
+ if (y == 0) {unacceptable = 1; return 0;}
+ else return (unsigned)x/(unsigned)y;}
+ int _and(int x, int y, int z) {return x & y;}
+ int _or (int x, int y, int z) {return x | y;}
+ int _xor(int x, int y, int z) {return x ^ y;}
+ int rotl(int x, int y, int z) {int s = y & NBSM;
+ return x << s | (unsigned)x >> (32 - s);}
+ int shl (int x, int y, int z) {int s = y & NBSM;
+ if (s >= 32) return 0; else return x << s;}
+ int shr(int x, int y, int z) {int s = y & NBSM;
+ if (s >= 32) return 0; else return (unsigned)x >> s;}
+ int shrs(int x, int y, int z) {int s = y & NBSM;
+ if (s >= 32) return x >> 31; else return x >> s;}
+ int cmpeq(int x, int y, int z) {return x == y;}
+ int cmplt(int x, int y, int z) {return x < y;}
+ int cmpltu(int x, int y, int z) {return (unsigned)(x) < (unsigned)(y);}
+ int seleq(int x, int y, int z) {return x == 0 ? y : z;}
+ int sellt(int x, int y, int z) {return x < 0 ? y : z;}
+ int selle(int x, int y, int z) {return x <= 0 ? y : z;}
+
+ // The machine's instruction set:
+ // Note: Commutative ops are commutative in operands 0 and 1.
+ struct {
+ int (*proc)(int, int, int); // Procedure for simulating the op.
+ int numopnds; // Number of operands, 1 to 3.
+ int commutative; // 1 if opnds 0 and 1 commutative.
+ int opndstart[3]; // Starting reg no. for each operand.
+ char *mnemonic; // Name of op, for printing.
+ char *fun_name; // Function name, for printing.
+ char *op_name; // Operator name, for printing.
+ } isa[] = {
+ {neg, 1, 0, {RX, 0, 0}, "neg", "-(", "" }, // Negate.
+ {_not, 1, 0, {RX, 0, 0}, "not", "~(", "" }, // One's-complement.
+ // {pop, 1, 0, {RX, 0, 0}, "pop", "pop(", "" }, // Population count.
+ // {nlz, 1, 0, {RX, 0, 0}, "nlz", "nlz(", "" }, // Num leading 0's.
+ // {rev, 1, 0, {RX, 0, 0}, "rev", "rev(", "" }, // Bit reversal.
+ {add, 2, 1, {RX, 2, 0}, "add", "(", " + " }, // Add.
+ {sub, 2, 0, { 2, 2, 0}, "sub", "(", " - " }, // Subtract.
+ {mul, 2, 1, {RX, 3, 0}, "mul", "(", "*" }, // Multiply.
+ {divide, 2, 0, { 1, 3, 0}, "div", "(", "/" }, // Divide signed.
+ {divu, 2, 0, { 1, 1, 0}, "divu", "(", " /u " }, // Divide unsigned.
+ {_and, 2, 1, {RX, 2, 0}, "and", "(", " & " }, // AND.
+ {_or, 2, 1, {RX, 2, 0}, "or", "(", " | " }, // OR.
+ {_xor, 2, 1, {RX, 2, 0}, "xor", "(", " ^ " }, // XOR.
+ // {rotl, 2, 0, { 1,NIM, 0}, "rotl", "(", " <<r "}, // Rotate shift left.
+ {shl, 2, 0, { 1,NIM, 0}, "shl", "(", " << " }, // Shift left.
+ {shr, 2, 0, { 1,NIM, 0}, "shr", "(", " >>u "}, // Shift right.
+ {shrs, 2, 0, { 3,NIM, 0}, "shrs", "(", " >>s "}, // Shift right signed.
+ // {cmpeq, 2, 1, {RX, 0, 0}, "cmpeq", "(", " == " }, // Compare equal.
+ // {cmplt, 2, 0, { 0, 0, 0}, "cmplt", "(", " < " }, // Compare less than.
+ // {cmpltu, 2, 0, { 1, 1, 0}, "cmpltu","(", " <u " }, // Compare less than unsigned.
+ // {seleq, 3, 0, {RX, 0, 0}, "seleq", "seleq(", ", " }, // Select if = 0.
+ // {sellt, 3, 0, {RX, 0, 0}, "sellt", "sellt(", ", " }, // Select if < 0.
+ // {selle, 3, 0, {RX, 0, 0}, "selle", "selle(", ", " }, // Select if <= 0.
+ };
+
+ /* ------------------- End of user-setup Portion -------------------- */
+
+ #define MAXNUMI 5 // Max num of insns that can be tried.
+ #if NARGS == 1
+ int userfun(int);
+ #else
+ int userfun(int, int);
+ #endif
+
+ #define NTRIALX (int)(sizeof(trialx)/sizeof(trialx[0]))
+ #define NTRIALY (int)(sizeof(trialy)/sizeof(trialy[0]))
+
+ #if NARGS == 1
+ int correct_result[NTRIALX];
+ #else
+ int correct_result[NTRIALX][NTRIALY];
+ #endif
+
+ int corr_result; // Correct result for current trial.
+
+ #define NUM_INSNS_IN_ISA (int)(sizeof(isa)/sizeof(isa[0]))
+
+ struct { // The current program.
+ int op; // Index into isa.
+ int opnd[3]; // Operands of op. Register numbers
+ // except if negative, it's the negative
+ // of a shift amount.
+ } pgm[MAXNUMI];
+
+ int numi; // Current size of the trial programs,
+ // must be from 1 to MAXNUMI.
+
+ /* GPR array: First NIM slots hold ordinary immediate values (IMMEDS),
+ next NSHIM slots hold shift immediate values (SHIMMEDS), next NARGS
+ slots hold the arguments x and, optionally, y, and the last numi slots
+ hold the result of instructions 0 through numi - 1. */
+
+ int r[NIM + NSHIM + NARGS + MAXNUMI] = {IMMEDS, SHIMMEDS};
+ unsigned counter[MAXNUMI]; // Count num times insn at level i is
+ // evaluated.
Index: llvm/test/Programs/MultiSource/aha/aha.pdf
Index: llvm/test/Programs/MultiSource/aha/read.me
diff -c /dev/null llvm/test/Programs/MultiSource/aha/read.me:1.1
*** /dev/null Sun May 11 21:00:20 2003
--- llvm/test/Programs/MultiSource/aha/read.me Sun May 11 21:00:10 2003
***************
*** 0 ****
--- 1,125 ----
+ Effect of Improvements
+
+ Changing from calculating the correct answer for each new program to
+ calculating them in advance and storing in a table, reduced the
+ execution time by about 2.7%.
+
+ An 8% improvement resulted from adding the "commutative" bit to the five
+ commutative operations (add, mul, and, or, xor). Perhaps more
+ importantly, it reduced the printout of essentially duplicate solutions.
+
+ An improvement by a factor of 2.58 (25.3/9.8) resulted from ensuring
+ that the last register operand of the last instruction, when this
+ instruction is created, refers to the result of the immediately
+ preceding instruction.
+
+ Continued the above idea for other register operands, i.e., ensured that
+ SOME operand of the last instruction always refers to the result of the
+ immediately preceding instruction. Got an improvement by a factor of
+ 1.04 (9.8/9.4).
+
+ 3/16/02: Got a factor of 1.85 by having it simulate the program only
+ from the last changed instruction to the end, which means that usually
+ only the last instruction is simulated. Also, changed the trial
+ value(s) so they "stick" at the last failed one(s). When a trial value
+ is changed, which happens after a success, the whole program must be
+ simulated.
+
+ 3/16/02: Got a factor of 1.010 by moving the assignment to
+ computed_result inside the loop just ahead of where it was. (The loop
+ is usually executed only once.)
+
+ 3/16/02: Got a factor of 1.020 by computing corr_res only when sticky_i
+ and/or sticky_j change.
+
+ 3/17/02: Tried making "numi" a constant defined with #define. Got a 5%
+ improvement. Decided not to do this.
+
+ 3/19/02: Got a factor of 1.166 by inlining "increment."
+
+ 3/23/02: Took 1614 secs (26.9 min) to search with numi = 4.
+
+ 9/19/02: Got a factor of 1.131 by requiring that immediate values be in
+ the order 0, -1, 1, ... and using isa.opndstart[3] to avoid certain
+ silly cases like ADD of 0, ADD of -1 (we do a subtract of 1), AND of 0
+ or -1, etc. This was made kind of necessary because the compare ops
+ should have an immediate value of 0 as a possibility, whereas for most
+ other ops, immediate 0 would never be used.
+
+ 9/22/02: Changed shift immediate amounts to be given in an array
+ (shimmed), so that fewer than 31 values can be specified. This gave no
+ change to execution time if all 31 are specified (1.222 secs for
+ absolute value problem on a basic RISC, running on my 1.8 mHz Thinkpad).
+ If only 4 values are specified, e.g. 1, 2, 30, and 31, the execution
+ dropped to 0.450 secs, a factor of 2.71 improvement.
+ I don't quite understand this, because the number of evaluations of
+ the third instruction reduced from 14.2 million to 2.74 million, a
+ factor of 5.18.
+ It's partly explained by the program load time. If you run aha with
+ an argument of 1, it takes 0.140 secs. Thus the time to start and end
+ the program is about that amount. So the ration of actual execution
+ time is (1.222 - 0.140)/(0.450 - 0.140) = 3.49. Closer to 5.18, but not
+ very close.
+
+ 9/24/02: Put ALL immediates (both ordinary and shift amounts) in the
+ registers. This did not affect the execution time (if compiled -O2),
+ but it allowed deleting the operand type info in the isa table, and
+ simplified the code a little (by 77 lines).
+ Execution time for the standard run is now 0.591 secs on my 667 mHz
+ machine (compiled -O2, which I guess I'll use from now on).
+
+ 9/25/02: Made Aha! measure and print its own execution time, using
+ clock(). I believe this is user + system time for the Aha! process,
+ rather than wall clock time. Found that -O2 and -O3 make no difference;
+ the assembly language files search.s and check.s are identical. Am
+ using -O2. The standard job runs in from 0.520 to 0.540 seconds process
+ time on my 667 mHz office machine.
+ The number of instruction evaluations is 62248 + 82618 + 2743328 (for
+ the first, second, and third instruction resp.), or 2888194 total. This
+ corresponds to 122 cycles per evaluation.
+
+ 9/30/02: Before today, the program consisted of three .cc files: aha,
+ search, and check. Made it all one file (aha.cc), mainly because of
+ problems with C in defining a preset array of values and not requiring
+ the user to also set a variable equal to the number of values in the
+ array. No change to execution time. Build (mainly compilation) time
+ dropped from about 2.0 secs to 1.2 secs. This change also permits
+ inlining fix_operands, but trying that did not change execution time
+ measurably (so it is not inlined now).
+
+ 10/14/02: Before today, the incrementing of instructions was done with
+ the rightmost operand varying most rapidly. Today it was changed so
+ that the leftmost operand varies most rapidly. This simplifies the
+ handling of commutative ops, and permits a few other minor
+ simplifications.
+ This gave a factor of 1.05 improvement in execution time. Quite
+ minor, but the program is a little simpler and I think it will simplify
+ more complicated optimizations that may be done, such as (somehow)
+ avoiding programs that have an instruction whose result is unused.
+ A preliminary investigation of this shows that for a typical RISC
+ instruction set, 39% of three-instruction programs have an unused
+ result, and 70% of four-instruction programs have an unused result.
+ This is compared to the present program, which ensures only that the
+ second from last computed result does not go unused. Thus there is hay
+ to be made here.
+ An attempt to skip ALL these silly programs resulted in a net
+ increase in execution time, because it was implemented inefficiently.
+ It seems to be hard to devise an efficient way to do this. Some
+ compromise might be practical, such as ensuring only that the second and
+ third from last results are not both unused.
+
+ 10/15/02: Changed the program as just mentioned, i.e., to ensure that
+ instruction n (the last) uses the result of instruction n-1 and, if
+ instruction n-1 does not use the result of instruction n-2, then the
+ last instruction does. This improved execution time by a factor of 1.4
+ for three-instruction programs, and a factor of 1.8 for four-instruction
+ programs.
+
+ 4/22/03: Ran Aha! on a two-input problem with n = 5 and 17 instructions
+ enabled. Was searching for 5-instruction programs to compute the average
+ of two signed integers (without overflowing). Shut it off after 144
+ hours (6 days). I should make it display a "progress report" for such
+ long jobs, such as printing out the first instruction in the list each
+ time a new opcode is selected for it. Otherwise, you don't know if it
+ somehow got into an infinite loop and you have no idea how long the run
+ will take.
Index: llvm/test/Programs/MultiSource/aha/userfun.c
diff -c /dev/null llvm/test/Programs/MultiSource/aha/userfun.c:1.1
*** /dev/null Sun May 11 21:00:20 2003
--- llvm/test/Programs/MultiSource/aha/userfun.c Sun May 11 21:00:10 2003
***************
*** 0 ****
--- 1,20 ----
+ /* This is the function for which it is desired to find more efficient
+ code. It must have either one or two arguments, both int, and must
+ return a 32-bit int quantity. It is declared in aha.h. */
+
+ int userfun(int x) {
+ // if (x > 0) return 1; // x > 0 predicate.
+ // else return 0; // Turn off div & divu.
+ // Found a new formula for HD.
+
+ // if (x >= 32) return 0;
+ // return 1 << (unsigned)x;
+
+ // return 3*x + 1;
+
+ // if (x >= 0) return x; // Absolute value.
+ // else return -x;
+
+ return (x & 0xfffffffc) | ((x & 1) << 1) | ((x & 2) >> 1);
+ // Swap rightmost 2 bits.
+ }
More information about the llvm-commits
mailing list