[llvm-commits] CVS: llvm/utils/BuildShuffleTable/BuildShuffleTable.cpp Makefile

Chris Lattner lattner at cs.uiuc.edu
Sun Apr 16 17:30:53 PDT 2006



Changes in directory llvm/utils/BuildShuffleTable:

BuildShuffleTable.cpp added (r1.1)
Makefile added (r1.1)
---
Log message:

Initial checking of a perfect shuffle generation program for 4-element 
Altivec vectors.


---
Diffs of the changes:  (+487 -0)

 BuildShuffleTable.cpp |  474 ++++++++++++++++++++++++++++++++++++++++++++++++++
 Makefile              |   13 +
 2 files changed, 487 insertions(+)


Index: llvm/utils/BuildShuffleTable/BuildShuffleTable.cpp
diff -c /dev/null llvm/utils/BuildShuffleTable/BuildShuffleTable.cpp:1.1
*** /dev/null	Sun Apr 16 19:30:51 2006
--- llvm/utils/BuildShuffleTable/BuildShuffleTable.cpp	Sun Apr 16 19:30:41 2006
***************
*** 0 ****
--- 1,474 ----
+ //===-- BuildShuffleTable.cpp - Perfect Shuffle Generator -----------------===//
+ //
+ //                     The LLVM Compiler Infrastructure
+ //
+ // This file was developed by Chris Lattner and is distributed under
+ // the University of Illinois Open Source License. See LICENSE.TXT for details.
+ //
+ //===----------------------------------------------------------------------===//
+ //
+ // This file computes an optimal sequence of instructions for doing all shuffles
+ // of two 4-element vectors.  With a release build and when configured to emit
+ // an altivec instruction table, this takes about 30s to run on a 2.7Ghz
+ // PowerPC G5.
+ //
+ //===----------------------------------------------------------------------===//
+ 
+ #include <iostream>
+ #include <vector>
+ 
+ struct Operator;
+ 
+ // Masks are 4-nibble hex numbers.  Values 0-7 in any nibble means that it takes
+ // an element from that value of the input vectors.  A value of 8 means the 
+ // entry is undefined.
+ 
+ // Mask manipulation functions.
+ static inline unsigned short MakeMask(unsigned V0, unsigned V1, 
+                                       unsigned V2, unsigned V3) {
+   return (V0 << (3*4)) | (V1 << (2*4)) | (V2 << (1*4)) | (V3 << (0*4));
+ }
+ 
+ /// getMaskElt - Return element N of the specified mask.
+ static unsigned getMaskElt(unsigned Mask, unsigned Elt) {
+   return (Mask >> ((3-Elt)*4)) & 0xF;
+ }
+ 
+ static unsigned setMaskElt(unsigned Mask, unsigned Elt, unsigned NewVal) {
+   unsigned FieldShift = ((3-Elt)*4);
+   return (Mask & ~(0xF << FieldShift)) | (NewVal << FieldShift);
+ }
+ 
+ // Reject elements where the values are 9-15.
+ static bool isValidMask(unsigned short Mask) {
+   unsigned short UndefBits = Mask & 0x8888;
+   return (Mask & ((UndefBits >> 1)|(UndefBits>>2)|(UndefBits>>3))) == 0;
+ }
+ 
+ /// hasUndefElements - Return true if any of the elements in the mask are undefs
+ ///
+ static bool hasUndefElements(unsigned short Mask) {
+   return (Mask & 0x8888) != 0;
+ }
+ 
+ /// isOnlyLHSMask - Return true if this mask only refers to its LHS, not
+ /// including undef values..
+ static bool isOnlyLHSMask(unsigned short Mask) {
+   return (Mask & 0x4444) == 0;
+ }
+ 
+ /// getLHSOnlyMask - Given a mask that refers to its LHS and RHS, modify it to
+ /// refer to the LHS only (for when one argument value is passed into the same
+ /// function twice).
+ static unsigned short getLHSOnlyMask(unsigned short Mask) {
+   return Mask & 0xBBBB;  // Keep only LHS and Undefs.
+ }
+ 
+ /// getCompressedMask - Turn a 16-bit uncompressed mask (where each elt uses 4
+ /// bits) into a compressed 13-bit mask, where each elt is multiplied by 9.
+ static unsigned getCompressedMask(unsigned short Mask) {
+   return getMaskElt(Mask, 0)*9*9*9 + getMaskElt(Mask, 1)*9*9 + 
+          getMaskElt(Mask, 2)*9     + getMaskElt(Mask, 3);
+ }
+ 
+ static void PrintMask(unsigned i, std::ostream &OS) {
+   OS << "<" << (char)(getMaskElt(i, 0) == 8 ? 'u' : ('0'+getMaskElt(i, 0)))
+      << "," << (char)(getMaskElt(i, 1) == 8 ? 'u' : ('0'+getMaskElt(i, 1)))
+      << "," << (char)(getMaskElt(i, 2) == 8 ? 'u' : ('0'+getMaskElt(i, 2)))
+      << "," << (char)(getMaskElt(i, 3) == 8 ? 'u' : ('0'+getMaskElt(i, 3)))
+      << ">";
+ }
+ 
+ /// ShuffleVal - This represents a shufflevector operation.
+ struct ShuffleVal {
+   unsigned Cost;  // Number of instrs used to generate this value.
+   Operator *Op;   // The Operation used to generate this value.
+   unsigned short Arg0, Arg1;  // Input operands for this value.
+   
+   ShuffleVal() : Cost(1000000) {}
+ };
+ 
+ 
+ /// ShufTab - This is the actual shuffle table that we are trying to generate.
+ ///
+ static ShuffleVal ShufTab[65536];
+ 
+ /// TheOperators - All of the operators that this target supports.
+ static std::vector<Operator*> TheOperators;
+ 
+ /// Operator - This is a vector operation that is available for use.
+ struct Operator {
+   unsigned short ShuffleMask;
+   unsigned short OpNum;
+   const char *Name;
+   
+   Operator(unsigned short shufflemask, const char *name)
+     : ShuffleMask(shufflemask), Name(name) {
+     OpNum = TheOperators.size();
+     TheOperators.push_back(this);
+   }
+   ~Operator() {
+     assert(TheOperators.back() == this);
+     TheOperators.pop_back();
+   }
+   
+   bool isOnlyLHSOperator() const {
+     return isOnlyLHSMask(ShuffleMask);
+   }
+   
+   const char *getName() const { return Name; }
+   
+   unsigned short getTransformedMask(unsigned short LHSMask, unsigned RHSMask) {
+     // Extract the elements from LHSMask and RHSMask, as appropriate.
+     unsigned Result = 0;
+     for (unsigned i = 0; i != 4; ++i) {
+       unsigned SrcElt = (ShuffleMask >> (4*i)) & 0xF;
+       unsigned ResElt;
+       if (SrcElt < 4)
+         ResElt = getMaskElt(LHSMask, SrcElt);
+       else if (SrcElt < 8)
+         ResElt = getMaskElt(RHSMask, SrcElt-4);
+       else {
+         assert(SrcElt == 8 && "Bad src elt!");
+         ResElt = 8;
+       }
+       Result |= ResElt << (4*i);
+     }
+     return Result;
+   }
+ };
+ 
+ static const char *getZeroCostOpName(unsigned short Op) {
+   if (ShufTab[Op].Arg0 == 0x0123)
+     return "LHS";
+   else if (ShufTab[Op].Arg0 == 0x4567)
+     return "RHS";
+   else {
+     assert(0 && "bad zero cost operation");
+     abort();
+   }
+ }
+ 
+ static void PrintOperation(unsigned ValNo, unsigned short Vals[]) {
+   unsigned short ThisOp = Vals[ValNo];
+   std::cerr << "t" << ValNo;
+   PrintMask(ThisOp, std::cerr);
+   std::cerr << " = " << ShufTab[ThisOp].Op->getName() << "(";
+     
+   if (ShufTab[ShufTab[ThisOp].Arg0].Cost == 0) {
+     std::cerr << getZeroCostOpName(ShufTab[ThisOp].Arg0);
+     PrintMask(ShufTab[ThisOp].Arg0, std::cerr);
+   } else {
+     // Figure out what tmp # it is.
+     for (unsigned i = 0; ; ++i)
+       if (Vals[i] == ShufTab[ThisOp].Arg0) {
+         std::cerr << "t" << i;
+         break;
+       }
+   }
+   
+   if (!ShufTab[Vals[ValNo]].Op->isOnlyLHSOperator()) {
+     std::cerr << ", ";
+     if (ShufTab[ShufTab[ThisOp].Arg1].Cost == 0) {
+       std::cerr << getZeroCostOpName(ShufTab[ThisOp].Arg1);
+       PrintMask(ShufTab[ThisOp].Arg1, std::cerr);
+     } else {
+       // Figure out what tmp # it is.
+       for (unsigned i = 0; ; ++i)
+         if (Vals[i] == ShufTab[ThisOp].Arg1) {
+           std::cerr << "t" << i;
+           break;
+         }
+     }
+   }
+   std::cerr << ")  ";
+ }
+ 
+ static unsigned getNumEntered() {
+   unsigned Count = 0;
+   for (unsigned i = 0; i != 65536; ++i)
+     Count += ShufTab[i].Cost < 100;
+   return Count;
+ }
+ 
+ static void EvaluateOps(unsigned short Elt, unsigned short Vals[], 
+                         unsigned &NumVals) {
+   if (ShufTab[Elt].Cost == 0) return;
+ 
+   // If this value has already been evaluated, it is free.  FIXME: match undefs.
+   for (unsigned i = 0, e = NumVals; i != e; ++i)
+     if (Vals[i] == Elt) return;
+   
+   // Otherwise, get the operands of the value, then add it.
+   unsigned Arg0 = ShufTab[Elt].Arg0, Arg1 = ShufTab[Elt].Arg1;
+   if (ShufTab[Arg0].Cost)
+     EvaluateOps(Arg0, Vals, NumVals);
+   if (Arg0 != Arg1 && ShufTab[Arg1].Cost)
+     EvaluateOps(Arg1, Vals, NumVals);
+   
+   Vals[NumVals++] = Elt;
+ }
+ 
+ 
+ int main() {
+   // Seed the table with accesses to the LHS and RHS.
+   ShufTab[0x0123].Cost = 0;
+   ShufTab[0x0123].Op = 0;
+   ShufTab[0x0123].Arg0 = 0x0123;
+   ShufTab[0x4567].Cost = 0;
+   ShufTab[0x4567].Op = 0;
+   ShufTab[0x4567].Arg0 = 0x4567;
+   
+   // Seed the first-level of shuffles, shuffles whose inputs are the input to
+   // the vectorshuffle operation.
+   bool MadeChange = true;
+   unsigned OpCount = 0;
+   while (MadeChange) {
+     MadeChange = false;
+     ++OpCount;
+     std::cerr << "Starting iteration #" << OpCount << " with "
+               << getNumEntered() << " entries established.\n";
+     
+     // Scan the table for two reasons: First, compute the maximum cost of any
+     // operation left in the table.  Second, make sure that values with undefs
+     // have the cheapest alternative that they match.
+     unsigned MaxCost = ShufTab[0].Cost;
+     for (unsigned i = 1; i != 0x8889; ++i) {
+       if (!isValidMask(i)) continue;
+       if (ShufTab[i].Cost > MaxCost)
+         MaxCost = ShufTab[i].Cost;
+       
+       // If this value has an undef, make it be computed the cheapest possible
+       // way of any of the things that it matches.
+       if (hasUndefElements(i)) {
+         // This code is a little bit tricky, so here's the idea: consider some
+         // permutation, like 7u4u.  To compute the lowest cost for 7u4u, we
+         // need to take the minimum cost of all of 7[0-8]4[0-8], 81 entries.  If
+         // there are 3 undefs, the number rises to 729 entries we have to scan,
+         // and for the 4 undef case, we have to scan the whole table.
+         //
+         // Instead of doing this huge amount of scanning, we process the table
+         // entries *in order*, and use the fact that 'u' is 8, larger than any
+         // valid index.  Given an entry like 7u4u then, we only need to scan
+         // 7[0-7]4u - 8 entries.  We can get away with this, because we already
+         // know that each of 704u, 714u, 724u, etc contain the minimum value of
+         // all of the 704[0-8], 714[0-8] and 724[0-8] entries respectively.
+         unsigned UndefIdx;
+         if (i & 0x8000)
+           UndefIdx = 0;
+         else if (i & 0x0800)
+           UndefIdx = 1;
+         else if (i & 0x0080)
+           UndefIdx = 2;
+         else if (i & 0x0008)
+           UndefIdx = 3;
+         else
+           abort();
+         
+         unsigned MinVal  = i;
+         unsigned MinCost = ShufTab[i].Cost;
+         
+         // Scan the 8 entries.
+         for (unsigned j = 0; j != 8; ++j) {
+           unsigned NewElt = setMaskElt(i, UndefIdx, j);
+           if (ShufTab[NewElt].Cost < MinCost) {
+             MinCost = ShufTab[NewElt].Cost;
+             MinVal = NewElt;
+           }
+         }
+         
+         // If we found something cheaper than what was here before, use it.
+         if (i != MinVal) {
+           MadeChange = true;
+           ShufTab[i] = ShufTab[MinVal];
+         }
+       } 
+     }
+     
+     for (unsigned LHS = 0; LHS != 0x8889; ++LHS) {
+       if (!isValidMask(LHS)) continue;
+       if (ShufTab[LHS].Cost > 1000) continue;
+ 
+       // If nothing involving this operand could possibly be cheaper than what
+       // we already have, don't consider it.
+       if (ShufTab[LHS].Cost + 1 >= MaxCost)
+         continue;
+         
+       for (unsigned opnum = 0, e = TheOperators.size(); opnum != e; ++opnum) {
+         Operator *Op = TheOperators[opnum];
+         unsigned short Mask = Op->ShuffleMask;
+ 
+         // Evaluate op(LHS,LHS)
+         unsigned ResultMask = Op->getTransformedMask(LHS, LHS);
+ 
+         unsigned Cost = ShufTab[LHS].Cost + 1;
+         if (Cost < ShufTab[ResultMask].Cost) {
+           ShufTab[ResultMask].Cost = Cost;
+           ShufTab[ResultMask].Op = Op;
+           ShufTab[ResultMask].Arg0 = LHS;
+           ShufTab[ResultMask].Arg1 = LHS;
+           MadeChange = true;
+         }
+         
+         // If this is a two input instruction, include the op(x,y) cases.  If
+         // this is a one input instruction, skip this.
+         if (Op->isOnlyLHSOperator()) continue;
+         
+         for (unsigned RHS = 0; RHS != 0x8889; ++RHS) {
+           if (!isValidMask(RHS)) continue;
+           if (ShufTab[RHS].Cost > 1000) continue;
+           
+           // If nothing involving this operand could possibly be cheaper than
+           // what we already have, don't consider it.
+           if (ShufTab[RHS].Cost + 1 >= MaxCost)
+             continue;
+           
+ 
+           // Evaluate op(LHS,RHS)
+           unsigned ResultMask = Op->getTransformedMask(LHS, RHS);
+ 
+           if (ShufTab[ResultMask].Cost <= OpCount ||
+               ShufTab[ResultMask].Cost <= ShufTab[LHS].Cost ||
+               ShufTab[ResultMask].Cost <= ShufTab[RHS].Cost)
+             continue;
+           
+           // Figure out the cost to evaluate this, knowing that CSE's only need
+           // to be evaluated once.
+           unsigned short Vals[30];
+           unsigned NumVals = 0;
+           EvaluateOps(LHS, Vals, NumVals);
+           EvaluateOps(RHS, Vals, NumVals);
+ 
+           unsigned Cost = NumVals + 1;
+           if (Cost < ShufTab[ResultMask].Cost) {
+             ShufTab[ResultMask].Cost = Cost;
+             ShufTab[ResultMask].Op = Op;
+             ShufTab[ResultMask].Arg0 = LHS;
+             ShufTab[ResultMask].Arg1 = RHS;
+             MadeChange = true;
+           }
+         }
+       }
+     }
+   }
+   
+   std::cerr << "Finished Table has " << getNumEntered()
+             << " entries established.\n";
+   
+   unsigned CostArray[10] = { 0 };
+ 
+   // Compute a cost histogram.
+   for (unsigned i = 0; i != 65536; ++i) {
+     if (!isValidMask(i)) continue;
+     if (ShufTab[i].Cost > 9)
+       ++CostArray[9];
+     else
+       ++CostArray[ShufTab[i].Cost];
+   }
+   
+   for (unsigned i = 0; i != 9; ++i)
+     if (CostArray[i])
+       std::cout << "// " << CostArray[i] << " entries have cost " << i << "\n";
+   if (CostArray[9])
+     std::cout << "// " << CostArray[9] << " entries have higher cost!\n";
+   
+   
+   // Build up the table to emit.
+   std::cout << "\n// This table is 6561*4 = 26244 bytes in size.\n";
+   std::cout << "static const unsigned InstrTab[6561+1] = {\n";
+   
+   for (unsigned i = 0; i != 0x8889; ++i) {
+     if (!isValidMask(i)) continue;
+     
+     // CostSat - The cost of this operation saturated to two bits.
+     unsigned CostSat = ShufTab[i].Cost;
+     if (CostSat > 3) CostSat = 3;
+     
+     unsigned OpNum = ShufTab[i].Op ? ShufTab[i].Op->OpNum : 0;
+     assert(OpNum < 16 && "Too few bits to encode operation!");
+     
+     unsigned LHS = getCompressedMask(ShufTab[i].Arg0);
+     unsigned RHS = getCompressedMask(ShufTab[i].Arg1);
+     
+     // Encode this as 2 bits of saturated cost, 4 bits of opcodes, 13 bits of
+     // LHS, and 13 bits of RHS = 32 bits.
+     unsigned Val = (CostSat << 30) | (OpNum << 27) | (LHS << 13) | RHS;
+ 
+     std::cout << "  " << Val << "U,\t// ";
+     PrintMask(i, std::cout);
+     std::cout << ": Cost " << ShufTab[i].Cost;
+     std::cout << " " << (ShufTab[i].Op ? ShufTab[i].Op->getName() : "copy");
+     std::cout << " ";
+     if (ShufTab[ShufTab[i].Arg0].Cost == 0) {
+       std::cout << getZeroCostOpName(ShufTab[i].Arg0);
+     } else {
+       PrintMask(ShufTab[i].Arg0, std::cout);
+     }
+ 
+     if (ShufTab[i].Op && !ShufTab[i].Op->isOnlyLHSOperator()) {
+       std::cout << ", ";
+       if (ShufTab[ShufTab[i].Arg1].Cost == 0) {
+         std::cout << getZeroCostOpName(ShufTab[i].Arg1);
+       } else {
+         PrintMask(ShufTab[i].Arg1, std::cout);
+       }
+     }
+     std::cout << "\n";
+   }  
+   std::cout << "  0\n};\n";
+ 
+   if (0) {
+     // Print out the table.
+     for (unsigned i = 0; i != 0x8889; ++i) {
+       if (!isValidMask(i)) continue;
+       if (ShufTab[i].Cost < 1000) {
+         PrintMask(i, std::cerr);
+         std::cerr << " - Cost " << ShufTab[i].Cost << " - ";
+         
+         unsigned short Vals[30];
+         unsigned NumVals = 0;
+         EvaluateOps(i, Vals, NumVals);
+ 
+         for (unsigned j = 0, e = NumVals; j != e; ++j)
+           PrintOperation(j, Vals);
+         std::cerr << "\n";
+       }
+     }
+   }
+ }
+ 
+ 
+ 
+ ///===---------------------------------------------------------------------===//
+ /// The altivec instruction definitions.  This is the altivec-specific part of
+ /// this file.
+ ///===---------------------------------------------------------------------===//
+ 
+ struct vmrghw : public Operator {
+   vmrghw() : Operator(0x0415, "vmrghw") {}
+ } the_vmrghw;
+ 
+ struct vmrglw : public Operator {
+   vmrglw() : Operator(0x2637, "vmrglw") {}
+ } the_vmrglw;
+ 
+ template<unsigned Elt>
+ struct vspltisw : public Operator {
+   vspltisw(const char *N) : Operator(MakeMask(Elt, Elt, Elt, Elt), N) {}
+ };
+ 
+ vspltisw<0> the_vspltisw0("vspltisw0");
+ vspltisw<1> the_vspltisw1("vspltisw1");
+ vspltisw<2> the_vspltisw2("vspltisw2");
+ vspltisw<3> the_vspltisw3("vspltisw3");
+ 
+ template<unsigned N>
+ struct vsldoi : public Operator {
+   vsldoi(const char *n) : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), n){
+   }
+ };
+ 
+ vsldoi<1> the_vsldoi1("vsldoi4");
+ vsldoi<2> the_vsldoi2("vsldoi8");
+ vsldoi<3> the_vsldoi3("vsldoi12");
+ 


Index: llvm/utils/BuildShuffleTable/Makefile
diff -c /dev/null llvm/utils/BuildShuffleTable/Makefile:1.1
*** /dev/null	Sun Apr 16 19:30:53 2006
--- llvm/utils/BuildShuffleTable/Makefile	Sun Apr 16 19:30:41 2006
***************
*** 0 ****
--- 1,13 ----
+ ##===- utils/BuildShuffleTable/Makefile --------------------*- Makefile -*-===##
+ # 
+ #                     The LLVM Compiler Infrastructure
+ #
+ # This file was developed by Chris Lattner and is distributed under
+ # the University of Illinois Open Source License. See LICENSE.TXT for details.
+ # 
+ ##===----------------------------------------------------------------------===##
+ 
+ LEVEL = ../..
+ TOOLNAME = llvm-BuildShuffleTable
+ include $(LEVEL)/Makefile.common
+ 






More information about the llvm-commits mailing list