Fri Sep 21 10:29:50 PDT 2012

Author: tstellar
Date: Fri Sep 21 12:29:50 2012
New Revision: 164384

URL: http://llvm.org/viewvc/llvm-project?rev=164384&view=rev
Log:
Merge branch 'master' into git-svn-R600

Added:
    llvm/branches/R600/docs/DebuggingJITedCode.rst
    llvm/branches/R600/include/llvm/Transforms/Utils/IntegerDivision.h
    llvm/branches/R600/lib/Transforms/Utils/IntegerDivision.cpp
    llvm/branches/R600/test/CodeGen/ARM/2012-09-18-ARMv4ISelBug.ll
    llvm/branches/R600/test/CodeGen/PowerPC/structsinregs.ll
    llvm/branches/R600/test/CodeGen/SPARC/load_to_switch.ll
    llvm/branches/R600/test/CodeGen/X86/atomic16.ll
    llvm/branches/R600/test/CodeGen/X86/atomic32.ll
    llvm/branches/R600/test/CodeGen/X86/atomic64.ll
    llvm/branches/R600/test/CodeGen/X86/atomic6432.ll
    llvm/branches/R600/test/CodeGen/X86/atomic8.ll
    llvm/branches/R600/test/CodeGen/X86/pr13458.ll
    llvm/branches/R600/test/CodeGen/X86/pr5145.ll
    llvm/branches/R600/test/MC/AsmParser/macros-darwin.s
    llvm/branches/R600/test/Other/FileCheck-space.txt
    llvm/branches/R600/test/Transforms/Inline/recursive.ll
    llvm/branches/R600/test/Transforms/SimplifyCFG/sink-common-code.ll
    llvm/branches/R600/unittests/Support/MemoryTest.cpp
Modified:
    llvm/branches/R600/README.txt
    llvm/branches/R600/docs/CodingStandards.rst
    llvm/branches/R600/docs/DeveloperPolicy.rst
    llvm/branches/R600/docs/programming.rst
    llvm/branches/R600/docs/subsystems.rst
    llvm/branches/R600/docs/userguides.rst
    llvm/branches/R600/include/llvm-c/Core.h
    llvm/branches/R600/include/llvm/ADT/MapVector.h
    llvm/branches/R600/include/llvm/Analysis/InlineCost.h
    llvm/branches/R600/include/llvm/Analysis/IntervalPartition.h
    llvm/branches/R600/include/llvm/Attributes.h
    llvm/branches/R600/include/llvm/CodeGen/ValueTypes.h
    llvm/branches/R600/include/llvm/CodeGen/ValueTypes.td
    llvm/branches/R600/include/llvm/Intrinsics.td
    llvm/branches/R600/include/llvm/IntrinsicsARM.td
    llvm/branches/R600/include/llvm/MC/MCParser/MCAsmLexer.h
    llvm/branches/R600/include/llvm/Object/ELF.h
    llvm/branches/R600/include/llvm/Object/ObjectFile.h
    llvm/branches/R600/include/llvm/Support/Memory.h
    llvm/branches/R600/include/llvm/TableGen/Record.h
    llvm/branches/R600/include/llvm/Transforms/Utils/Cloning.h
    llvm/branches/R600/lib/Analysis/BasicAliasAnalysis.cpp
    llvm/branches/R600/lib/Analysis/InlineCost.cpp
    llvm/branches/R600/lib/AsmParser/LLParser.cpp
    llvm/branches/R600/lib/Bitcode/Reader/BitcodeReader.cpp
    llvm/branches/R600/lib/CodeGen/Analysis.cpp
    llvm/branches/R600/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
    llvm/branches/R600/lib/CodeGen/LiveIntervalAnalysis.cpp
    llvm/branches/R600/lib/CodeGen/LiveRegMatrix.h
    llvm/branches/R600/lib/CodeGen/MachineFunction.cpp
    llvm/branches/R600/lib/CodeGen/RegisterCoalescer.cpp
    llvm/branches/R600/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/branches/R600/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/branches/R600/lib/MC/MCAssembler.cpp
    llvm/branches/R600/lib/MC/MCParser/AsmLexer.cpp
    llvm/branches/R600/lib/MC/MCParser/AsmParser.cpp
    llvm/branches/R600/lib/MC/MCParser/MCAsmLexer.cpp
    llvm/branches/R600/lib/MC/MachObjectWriter.cpp
    llvm/branches/R600/lib/Support/Memory.cpp
    llvm/branches/R600/lib/Support/Unix/Memory.inc
    llvm/branches/R600/lib/Support/Windows/Memory.inc
    llvm/branches/R600/lib/TableGen/Record.cpp
    llvm/branches/R600/lib/Target/ARM/ARMExpandPseudoInsts.cpp
    llvm/branches/R600/lib/Target/ARM/ARMFastISel.cpp
    llvm/branches/R600/lib/Target/ARM/ARMInstrInfo.td
    llvm/branches/R600/lib/Target/ARM/ARMInstrNEON.td
    llvm/branches/R600/lib/Target/ARM/ARMScheduleA9.td
    llvm/branches/R600/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
    llvm/branches/R600/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
    llvm/branches/R600/lib/Target/CppBackend/CPPBackend.cpp
    llvm/branches/R600/lib/Target/Mips/Mips16FrameLowering.cpp
    llvm/branches/R600/lib/Target/Mips/Mips16FrameLowering.h
    llvm/branches/R600/lib/Target/Mips/Mips16InstrInfo.td
    llvm/branches/R600/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/branches/R600/lib/Target/PowerPC/PPCISelLowering.h
    llvm/branches/R600/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
    llvm/branches/R600/lib/Target/X86/X86CodeEmitter.cpp
    llvm/branches/R600/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/branches/R600/lib/Target/X86/X86ISelLowering.cpp
    llvm/branches/R600/lib/Target/X86/X86ISelLowering.h
    llvm/branches/R600/lib/Target/X86/X86InstrCompiler.td
    llvm/branches/R600/lib/Target/X86/X86InstrFMA.td
    llvm/branches/R600/lib/Target/X86/X86InstrInfo.cpp
    llvm/branches/R600/lib/Target/X86/X86InstrInfo.h
    llvm/branches/R600/lib/Target/X86/X86InstrInfo.td
    llvm/branches/R600/lib/Target/X86/X86InstrSSE.td
    llvm/branches/R600/lib/Target/X86/X86InstrXOP.td
    llvm/branches/R600/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
    llvm/branches/R600/lib/Transforms/Scalar/CodeGenPrepare.cpp
    llvm/branches/R600/lib/Transforms/Scalar/SROA.cpp
    llvm/branches/R600/lib/Transforms/Utils/CMakeLists.txt
    llvm/branches/R600/lib/Transforms/Utils/SimplifyCFG.cpp
    llvm/branches/R600/lib/VMCore/AsmWriter.cpp
    llvm/branches/R600/lib/VMCore/Attributes.cpp
    llvm/branches/R600/lib/VMCore/Core.cpp
    llvm/branches/R600/lib/VMCore/Type.cpp
    llvm/branches/R600/lib/VMCore/ValueTypes.cpp
    llvm/branches/R600/lib/VMCore/Verifier.cpp
    llvm/branches/R600/test/Bitcode/blockaddress.ll
    llvm/branches/R600/test/CodeGen/ARM/coalesce-subregs.ll
    llvm/branches/R600/test/CodeGen/ARM/fast-isel.ll
    llvm/branches/R600/test/CodeGen/ARM/reg_sequence.ll
    llvm/branches/R600/test/CodeGen/ARM/sub.ll
    llvm/branches/R600/test/CodeGen/ARM/twoaddrinstr.ll
    llvm/branches/R600/test/CodeGen/ARM/vbsl-constant.ll
    llvm/branches/R600/test/CodeGen/ARM/vbsl.ll
    llvm/branches/R600/test/CodeGen/PowerPC/ppc64-calls.ll
    llvm/branches/R600/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
    llvm/branches/R600/test/CodeGen/X86/atomic_add.ll
    llvm/branches/R600/test/CodeGen/X86/atomic_op.ll
    llvm/branches/R600/test/CodeGen/X86/avx2-shuffle.ll
    llvm/branches/R600/test/DebugInfo/X86/DW_AT_object_pointer.ll
    llvm/branches/R600/test/MC/AsmParser/macro-args.s
    llvm/branches/R600/test/MC/AsmParser/macro-rept-err1.s
    llvm/branches/R600/test/MC/AsmParser/macros.s
    llvm/branches/R600/test/MC/X86/x86_nop.s
    llvm/branches/R600/test/Object/nm-shared-object.test
    llvm/branches/R600/test/Object/objdump-symbol-table.test
    llvm/branches/R600/test/Other/lit.local.cfg
    llvm/branches/R600/test/Transforms/InstCombine/div-shift.ll
    llvm/branches/R600/test/Transforms/SROA/basictest.ll
    llvm/branches/R600/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll
    llvm/branches/R600/tools/lli/lli.cpp
    llvm/branches/R600/tools/llvm-stress/llvm-stress.cpp
    llvm/branches/R600/unittests/Support/CMakeLists.txt
    llvm/branches/R600/utils/FileCheck/FileCheck.cpp
    llvm/branches/R600/utils/TableGen/AsmMatcherEmitter.cpp
    llvm/branches/R600/utils/TableGen/CodeGenDAGPatterns.cpp
    llvm/branches/R600/utils/TableGen/CodeGenDAGPatterns.h
    llvm/branches/R600/utils/TableGen/CodeGenSchedule.cpp
    llvm/branches/R600/utils/TableGen/CodeGenSchedule.h
    llvm/branches/R600/utils/TableGen/CodeGenTarget.cpp
    llvm/branches/R600/utils/TableGen/SubtargetEmitter.cpp
    llvm/branches/R600/utils/TableGen/X86RecognizableInstr.cpp
    llvm/branches/R600/utils/TableGen/X86RecognizableInstr.h

Modified: llvm/branches/R600/README.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/README.txt?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================

--- llvm/branches/R600/README.txt (original)
+++ llvm/branches/R600/README.txt Fri Sep 21 12:29:50 2012
@@ -15,4 +15,3 @@
 suggestions.
 
 
-

Modified: llvm/branches/R600/docs/CodingStandards.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/docs/CodingStandards.rst?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/docs/CodingStandards.rst (original)
+++ llvm/branches/R600/docs/CodingStandards.rst Fri Sep 21 12:29:50 2012
@@ -421,9 +421,9 @@
 
 .. code-block:: c++
 
-  Value *DoSomething(Instruction *I) {
+  Value *doSomething(Instruction *I) {
     if (!isa<TerminatorInst>(I) &&
-        I->hasOneUse() && SomeOtherThing(I)) {
+        I->hasOneUse() && doOtherThing(I)) {
       ... some long code ....
     }
 
@@ -445,7 +445,7 @@
 
 .. code-block:: c++
 
-  Value *DoSomething(Instruction *I) {
+  Value *doSomething(Instruction *I) {
     // Terminators never need 'something' done to them because ... 
     if (isa<TerminatorInst>(I))
       return 0;
@@ -456,7 +456,7 @@
       return 0;
 
     // This is really just here for example.
-    if (!SomeOtherThing(I))
+    if (!doOtherThing(I))
       return 0;
     
     ... some long code ....
@@ -601,9 +601,9 @@
 
 .. code-block:: c++
 
-  /// ListContainsFoo - Return true if the specified list has an element that is
+  /// containsFoo - Return true if the specified list has an element that is
   /// a foo.
-  static bool ListContainsFoo(const std::vector<Bar*> &List) {
+  static bool containsFoo(const std::vector<Bar*> &List) {
     for (unsigned i = 0, e = List.size(); i != e; ++i)
       if (List[i]->isFoo())
         return true;
@@ -611,7 +611,7 @@
   }
   ...
 
-  if (ListContainsFoo(BarList)) {
+  if (containsFoo(BarList)) {
     ...
   }
 
@@ -1120,7 +1120,7 @@
     };
   } // end anonymous namespace
 
-  static void Helper() { 
+  static void runHelper() { 
     ... 
   }
 
@@ -1140,7 +1140,7 @@
     bool operator<(const char *RHS) const;
   };
 
-  void Helper() { 
+  void runHelper() { 
     ... 
   }
 
@@ -1150,7 +1150,7 @@
 
   } // end anonymous namespace
 
-This is bad specifically because if you're looking at "``Helper``" in the middle
+This is bad specifically because if you're looking at "``runHelper``" in the middle
 of a large C++ file, that you have no immediate way to tell if it is local to
 the file.  When it is marked static explicitly, this is immediately obvious.
 Also, there is no reason to enclose the definition of "``operator<``" in the

Added: llvm/branches/R600/docs/DebuggingJITedCode.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/docs/DebuggingJITedCode.rst?rev=164384&view=auto
==============================================================================
--- llvm/branches/R600/docs/DebuggingJITedCode.rst (added)
+++ llvm/branches/R600/docs/DebuggingJITedCode.rst Fri Sep 21 12:29:50 2012
@@ -0,0 +1,147 @@
+.. _debugging-jited-code:
+
+==============================
+Debugging JIT-ed Code With GDB
+==============================
+
+.. sectionauthor:: Reid Kleckner and Eli Bendersky
+
+Background
+==========
+
+Without special runtime support, debugging dynamically generated code with
+GDB (as well as most debuggers) can be quite painful.  Debuggers generally
+read debug information from the object file of the code, but for JITed
+code, there is no such file to look for.
+
+In order to communicate the necessary debug info to GDB, an interface for
+registering JITed code with debuggers has been designed and implemented for
+GDB and LLVM MCJIT.  At a high level, whenever MCJIT generates new machine code,
+it does so in an in-memory object file that contains the debug information in
+DWARF format.  MCJIT then adds this in-memory object file to a global list of
+dynamically generated object files and calls a special function
+(``__jit_debug_register_code``) marked noinline that GDB knows about.  When
+GDB attaches to a process, it puts a breakpoint in this function and loads all
+of the object files in the global list.  When MCJIT calls the registration
+function, GDB catches the breakpoint signal, loads the new object file from
+the inferior's memory, and resumes the execution.  In this way, GDB can get the
+necessary debug information.
+
+GDB Version
+===========
+
+In order to debug code JIT-ed by LLVM, you need GDB 7.0 or newer, which is
+available on most modern distributions of Linux.  The version of GDB that
+Apple ships with Xcode has been frozen at 6.3 for a while.  LLDB may be a
+better option for debugging JIT-ed code on Mac OS X.
+
+
+Debugging MCJIT-ed code
+=======================
+
+The emerging MCJIT component of LLVM allows full debugging of JIT-ed code with
+GDB.  This is due to MCJIT's ability to use the MC emitter to provide full
+DWARF debugging information to GDB.
+
+Note that lli has to be passed the ``-use-mcjit`` flag to JIT the code with
+MCJIT instead of the old JIT.
+
+Example
+-------
+
+Consider the following C code (with line numbers added to make the example
+easier to follow):
+
+..
+   FIXME:
+   Sphinx has the ability to automatically number these lines by adding
+   :linenos: on the line immediately following the `.. code-block:: c`, but
+   it looks like garbage; the line numbers don't even line up with the
+   lines. Is this a Sphinx bug, or is it a CSS problem?
+
+.. code-block:: c
+
+   1   int compute_factorial(int n)
+   2   {
+   3       if (n <= 1)
+   4           return 1;
+   5
+   6       int f = n;
+   7       while (--n > 1)
+   8           f *= n;
+   9       return f;
+   10  }
+   11
+   12
+   13  int main(int argc, char** argv)
+   14  {
+   15      if (argc < 2)
+   16          return -1;
+   17      char firstletter = argv[1][0];
+   18      int result = compute_factorial(firstletter - '0');
+   19
+   20      // Returned result is clipped at 255...
+   21      return result;
+   22  }
+
+Here is a sample command line session that shows how to build and run this
+code via ``lli`` inside GDB:
+
+.. code-block:: bash
+
+   $ $BINPATH/clang -cc1 -O0 -g -emit-llvm showdebug.c
+   $ gdb --quiet --args $BINPATH/lli -use-mcjit showdebug.ll 5
+   Reading symbols from $BINPATH/lli...done.
+   (gdb) b showdebug.c:6
+   No source file named showdebug.c.
+   Make breakpoint pending on future shared library load? (y or [n]) y
+   Breakpoint 1 (showdebug.c:6) pending.
+   (gdb) r
+   Starting program: $BINPATH/lli -use-mcjit showdebug.ll 5
+   [Thread debugging using libthread_db enabled]
+
+   Breakpoint 1, compute_factorial (n=5) at showdebug.c:6
+   6	    int f = n;
+   (gdb) p n
+   $1 = 5
+   (gdb) p f
+   $2 = 0
+   (gdb) n
+   7	    while (--n > 1)
+   (gdb) p f
+   $3 = 5
+   (gdb) b showdebug.c:9
+   Breakpoint 2 at 0x7ffff7ed404c: file showdebug.c, line 9.
+   (gdb) c
+   Continuing.
+
+   Breakpoint 2, compute_factorial (n=1) at showdebug.c:9
+   9	    return f;
+   (gdb) p f
+   $4 = 120
+   (gdb) bt
+   #0  compute_factorial (n=1) at showdebug.c:9
+   #1  0x00007ffff7ed40a9 in main (argc=2, argv=0x16677e0) at showdebug.c:18
+   #2  0x3500000001652748 in ?? ()
+   #3  0x00000000016677e0 in ?? ()
+   #4  0x0000000000000002 in ?? ()
+   #5  0x0000000000d953b3 in llvm::MCJIT::runFunction (this=0x16151f0, F=0x1603020, ArgValues=...) at /home/ebenders_test/llvm_svn_rw/lib/ExecutionEngine/MCJIT/MCJIT.cpp:161
+   #6  0x0000000000dc8872 in llvm::ExecutionEngine::runFunctionAsMain (this=0x16151f0, Fn=0x1603020, argv=..., envp=0x7fffffffe040)
+       at /home/ebenders_test/llvm_svn_rw/lib/ExecutionEngine/ExecutionEngine.cpp:397
+   #7  0x000000000059c583 in main (argc=4, argv=0x7fffffffe018, envp=0x7fffffffe040) at /home/ebenders_test/llvm_svn_rw/tools/lli/lli.cpp:324
+   (gdb) finish
+   Run till exit from #0  compute_factorial (n=1) at showdebug.c:9
+   0x00007ffff7ed40a9 in main (argc=2, argv=0x16677e0) at showdebug.c:18
+   18	    int result = compute_factorial(firstletter - '0');
+   Value returned is $5 = 120
+   (gdb) p result
+   $6 = 23406408
+   (gdb) n
+   21	    return result;
+   (gdb) p result
+   $7 = 120
+   (gdb) c
+   Continuing.
+
+   Program exited with code 0170.
+   (gdb)

Modified: llvm/branches/R600/docs/DeveloperPolicy.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/docs/DeveloperPolicy.rst?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/docs/DeveloperPolicy.rst (original)
+++ llvm/branches/R600/docs/DeveloperPolicy.rst Fri Sep 21 12:29:50 2012
@@ -279,7 +279,7 @@
 #. You are granted *commit-after-approval* to all parts of LLVM.  To get
    approval, submit a `patch`_ to `llvm-commits
    <http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits>`_. When approved
-   you may commit it yourself.</li>
+   you may commit it yourself.
 
 #. You are allowed to commit patches without approval which you think are
    obvious. This is clearly a subjective decision --- we simply expect you to

Modified: llvm/branches/R600/docs/programming.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/docs/programming.rst?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/docs/programming.rst (original)
+++ llvm/branches/R600/docs/programming.rst Fri Sep 21 12:29:50 2012
@@ -8,12 +8,17 @@
 
    CodingStandards
    CommandLine
+   Atomics
 
 * `LLVM Language Reference Manual <LangRef.html>`_
 
   Defines the LLVM intermediate representation and the assembly form of the
   different nodes.
 
+* :ref:`atomics`
+
+  Information about LLVM's concurrency model.
+
 * `The LLVM Programmers Manual <ProgrammersManual.html>`_
 
   Introduction to the general layout of the LLVM sourcebase, important classes

Modified: llvm/branches/R600/docs/subsystems.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/docs/subsystems.rst?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/docs/subsystems.rst (original)
+++ llvm/branches/R600/docs/subsystems.rst Fri Sep 21 12:29:50 2012
@@ -15,6 +15,7 @@
    LinkTimeOptimization
    SegmentedStacks
    TableGenFundamentals
+   DebuggingJITedCode
 
 * `Writing an LLVM Pass <WritingAnLLVMPass.html>`_
     
@@ -78,7 +79,7 @@
     
    How to build your programs with link-time optimization on Linux.
     
-* `The GDB JIT interface <DebuggingJITedCode.html>`_
+* :ref:`debugging-jited-code`
     
    How to debug JITed code with GDB.
     

Modified: llvm/branches/R600/docs/userguides.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/docs/userguides.rst?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/docs/userguides.rst (original)
+++ llvm/branches/R600/docs/userguides.rst Fri Sep 21 12:29:50 2012
@@ -14,6 +14,7 @@
    Lexicon
    Packaging
    HowToAddABuilder
+   yaml2obj
 
 * `The LLVM Getting Started Guide <GettingStarted.html>`_
     

Modified: llvm/branches/R600/include/llvm-c/Core.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm-c/Core.h?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm-c/Core.h (original)
+++ llvm/branches/R600/include/llvm-c/Core.h Fri Sep 21 12:29:50 2012
@@ -1869,6 +1869,27 @@
 const char  *LLVMGetMDString(LLVMValueRef V, unsigned* Len);
 
 /**
+ * Obtain the number of operands from an MDNode value.
+ *
+ * @param V MDNode to get number of operands from.
+ * @return Number of operands of the MDNode.
+ */
+unsigned LLVMGetMDNodeNumOperands(LLVMValueRef V);
+
+/**
+ * Obtain the given MDNode's operands.
+ *
+ * The passed LLVMValueRef pointer should point to enough memory to hold all of
+ * the operands of the given MDNode (see LLVMGetMDNodeNumOperands) as
+ * LLVMValueRefs. This memory will be populated with the LLVMValueRefs of the
+ * MDNode's operands.
+ *
+ * @param V MDNode to get the operands from.
+ * @param Dest Destination array for operands.
+ */
+void LLVMGetMDNodeOperands(LLVMValueRef V, LLVMValueRef *Dest);
+
+/**
  * @}
  */
 

Modified: llvm/branches/R600/include/llvm/ADT/MapVector.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/ADT/MapVector.h?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/ADT/MapVector.h (original)
+++ llvm/branches/R600/include/llvm/ADT/MapVector.h Fri Sep 21 12:29:50 2012
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements a map that also provides access to all stored values
-// in a deterministic order via the getValues method. Note that the iteration
-// order itself is just the DenseMap order and not deterministic. The interface
-// is purposefully minimal.
+// This file implements a map that provides insertion order iteration. The
+// interface is purposefully minimal. The key is assumed to be cheap to copy
+// and 2 copies are kept, one for indexing in a DenseMap, one for iteration in
+// a std::vector.
 //
 //===----------------------------------------------------------------------===//
 
@@ -29,111 +29,38 @@
 template<typename KeyT, typename ValueT>
 class MapVector {
   typedef llvm::DenseMap<KeyT, unsigned> MapType;
-  typedef std::vector<ValueT> VectorType;
+  typedef std::vector<std::pair<KeyT, ValueT> > VectorType;
   typedef typename VectorType::size_type SizeType;
 
   MapType Map;
   VectorType Vector;
 
 public:
-  // The keys and values are not stored close to each other, so the iterator
-  // operator->() cannot return a pointer to a std::pair like a DenseMap does.
-  // Instead it returns a FakePair that contains references to Key and Value.
-  // This lets code using this to look the same as if using a regular DenseMap.
-  template<bool IsConst>
-  struct FakePair {
-    typedef typename conditional<IsConst, const ValueT, ValueT>::type VT;
-    const KeyT &first;
-    VT &second;
-    FakePair(const KeyT &K, VT &V) : first(K), second(V) {
-    }
-    FakePair *operator->() {
-      return this;
-    }
-  };
-
-  template<bool IsConst>
-  class IteratorTemplate {
-    typedef typename MapType::const_iterator WrappedIteratorType;
-    WrappedIteratorType WrappedI;
-    typedef
-      typename conditional<IsConst, const VectorType, VectorType>::type VT;
-    VT &VecRef;
-    typedef FakePair<IsConst> PairType;
-    friend class IteratorTemplate<true>;
-
-  public:
-    IteratorTemplate(WrappedIteratorType I, VT &V) :
-      WrappedI(I), VecRef(V) {
-    }
-
-    // If IsConst is true this is a converting constructor from iterator to
-    // const_iterator and the default copy constructor is used.
-    // Otherwise this is a copy constructor for iterator.
-    IteratorTemplate(const IteratorTemplate<false>& I) :
-      WrappedI(I.WrappedI), VecRef(I.VecRef) {
-    }
-
-    bool operator!=(const IteratorTemplate &RHS) const {
-      return WrappedI != RHS.WrappedI;
-    }
-
-    IteratorTemplate &operator++() {  // Preincrement
-      ++WrappedI;
-      return *this;
-    }
-
-    PairType operator->() {
-      unsigned Pos = WrappedI->second;
-      PairType Ret(WrappedI->first, VecRef[Pos]);
-      return Ret;
-    }
-  };
-
-  typedef IteratorTemplate<false> iterator;
-  typedef IteratorTemplate<true> const_iterator;
+  typedef typename VectorType::iterator iterator;
+  typedef typename VectorType::const_iterator const_iterator;
 
   SizeType size() const {
     return Vector.size();
   }
 
   iterator begin() {
-    return iterator(Map.begin(), this->Vector);
+    return Vector.begin();
   }
 
   const_iterator begin() const {
-    return const_iterator(Map.begin(), this->Vector);
+    return Vector.begin();
   }
 
   iterator end() {
-    return iterator(Map.end(), this->Vector);
+    return Vector.end();
   }
 
   const_iterator end() const {
-    return const_iterator(Map.end(), this->Vector);
-  }
-
-  bool empty() const {
-    return Map.empty();
-  }
-
-  typedef typename VectorType::iterator value_iterator;
-  typedef typename VectorType::const_iterator const_value_iterator;
-
-  value_iterator value_begin() {
-    return Vector.begin();
-  }
-
-  const_value_iterator value_begin() const {
-    return Vector.begin();
-  }
-
-  value_iterator value_end() {
     return Vector.end();
   }
 
-  const_value_iterator value_end() const {
-    return Vector.end();
+  bool empty() const {
+    return Vector.empty();
   }
 
   ValueT &operator[](const KeyT &Key) {
@@ -141,10 +68,10 @@
     std::pair<typename MapType::iterator, bool> Result = Map.insert(Pair);
     unsigned &I = Result.first->second;
     if (Result.second) {
-      Vector.push_back(ValueT());
+      Vector.push_back(std::make_pair(Key, ValueT()));
       I = Vector.size() - 1;
     }
-    return Vector[I];
+    return Vector[I].second;
   }
 };
 

Modified: llvm/branches/R600/include/llvm/Analysis/InlineCost.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/Analysis/InlineCost.h?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/Analysis/InlineCost.h (original)
+++ llvm/branches/R600/include/llvm/Analysis/InlineCost.h Fri Sep 21 12:29:50 2012
@@ -36,6 +36,9 @@
     const int LastCallToStaticBonus = -15000;
     const int ColdccPenalty = 2000;
     const int NoreturnPenalty = 10000;
+    /// Do not inline functions which allocate this many bytes on the stack
+    /// when the caller is recursive.
+    const unsigned TotalAllocaSizeRecursiveCaller = 1024;
   }
 
   /// \brief Represents the cost of inlining a function.

Modified: llvm/branches/R600/include/llvm/Analysis/IntervalPartition.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/Analysis/IntervalPartition.h?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/Analysis/IntervalPartition.h (original)
+++ llvm/branches/R600/include/llvm/Analysis/IntervalPartition.h Fri Sep 21 12:29:50 2012
@@ -33,8 +33,8 @@
 //
 // IntervalPartition - This class builds and holds an "interval partition" for
 // a function.  This partition divides the control flow graph into a set of
-// maximal intervals, as defined with the properties above.  Intuitively, a
-// BasicBlock is a (possibly nonexistent) loop with a "tail" of non looping
+// maximal intervals, as defined with the properties above.  Intuitively, an
+// interval is a (possibly nonexistent) loop with a "tail" of non looping
 // nodes following it.
 //
 class IntervalPartition : public FunctionPass {

Modified: llvm/branches/R600/include/llvm/Attributes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/Attributes.h?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/Attributes.h (original)
+++ llvm/branches/R600/include/llvm/Attributes.h Fri Sep 21 12:29:50 2012
@@ -106,6 +106,147 @@
   Attributes() : Bits(0) { }
   explicit Attributes(uint64_t Val) : Bits(Val) { }
   /*implicit*/ Attributes(Attribute::AttrConst Val) : Bits(Val.v) { }
+
+  // Attribute query methods.
+  // FIXME: StackAlignment & Alignment attributes have no predicate methods.
+  bool hasAttributes() const {
+    return Bits != 0;
+  }
+  bool hasAttributes(const Attributes &A) const {
+    return Bits & A.Bits;
+  }
+
+  bool hasZExtAttr() const {
+    return Bits & Attribute::ZExt_i;
+  }
+  bool hasSExtAttr() const {
+    return Bits & Attribute::SExt_i;
+  }
+  bool hasNoReturnAttr() const {
+    return Bits & Attribute::NoReturn_i;
+  }
+  bool hasInRegAttr() const {
+    return Bits & Attribute::InReg_i;
+  }
+  bool hasStructRetAttr() const {
+    return Bits & Attribute::StructRet_i;
+  }
+  bool hasNoUnwindAttr() const {
+    return Bits & Attribute::NoUnwind_i;
+  }
+  bool hasNoAliasAttr() const {
+    return Bits & Attribute::NoAlias_i;
+  }
+  bool hasByValAttr() const {
+    return Bits & Attribute::ByVal_i;
+  }
+  bool hasNestAttr() const {
+    return Bits & Attribute::Nest_i;
+  }
+  bool hasReadNoneAttr() const {
+    return Bits & Attribute::ReadNone_i;
+  }
+  bool hasReadOnlyAttr() const {
+    return Bits & Attribute::ReadOnly_i;
+  }
+  bool hasNoInlineAttr() const {
+    return Bits & Attribute::NoInline_i;
+  }
+  bool hasAlwaysInlineAttr() const {
+    return Bits & Attribute::AlwaysInline_i;
+  }
+  bool hasOptimizeForSizeAttr() const {
+    return Bits & Attribute::OptimizeForSize_i;
+  }
+  bool hasStackProtectAttr() const {
+    return Bits & Attribute::StackProtect_i;
+  }
+  bool hasStackProtectReqAttr() const {
+    return Bits & Attribute::StackProtectReq_i;
+  }
+  bool hasAlignmentAttr() const {
+    return Bits & Attribute::Alignment_i;
+  }
+  bool hasNoCaptureAttr() const {
+    return Bits & Attribute::NoCapture_i;
+  }
+  bool hasNoRedZoneAttr() const {
+    return Bits & Attribute::NoRedZone_i;
+  }
+  bool hasNoImplicitFloatAttr() const {
+    return Bits & Attribute::NoImplicitFloat_i;
+  }
+  bool hasNakedAttr() const {
+    return Bits & Attribute::Naked_i;
+  }
+  bool hasInlineHintAttr() const {
+    return Bits & Attribute::InlineHint_i;
+  }
+  bool hasReturnsTwiceAttr() const {
+    return Bits & Attribute::ReturnsTwice_i;
+  }
+  bool hasStackAlignmentAttr() const {
+    return Bits & Attribute::StackAlignment_i;
+  }
+  bool hasUWTableAttr() const {
+    return Bits & Attribute::UWTable_i;
+  }
+  bool hasNonLazyBindAttr() const {
+    return Bits & Attribute::NonLazyBind_i;
+  }
+  bool hasAddressSafetyAttr() const {
+    return Bits & Attribute::AddressSafety_i;
+  }
+
+  uint64_t getRawAlignment() const {
+    return Bits & Attribute::Alignment_i;
+  }
+  uint64_t getRawStackAlignment() const {
+    return Bits & Attribute::StackAlignment_i;
+  }
+
+  /// This returns the alignment field of an attribute as a byte alignment
+  /// value.
+  unsigned getAlignment() const {
+    if (!hasAlignmentAttr())
+      return 0;
+
+    return 1U << ((getRawAlignment() >> 16) - 1);
+  }
+
+  /// This returns the stack alignment field of an attribute as a byte alignment
+  /// value.
+  unsigned getStackAlignment() const {
+    if (!hasStackAlignmentAttr())
+      return 0;
+
+    return 1U << ((getRawStackAlignment() >> 26) - 1);
+  }
+
+  /// This turns an int alignment (a power of 2, normally) into the form used
+  /// internally in Attributes.
+  static Attributes constructAlignmentFromInt(unsigned i) {
+    // Default alignment, allow the target to define how to align it.
+    if (i == 0)
+      return Attribute::None;
+
+    assert(isPowerOf2_32(i) && "Alignment must be a power of two.");
+    assert(i <= 0x40000000 && "Alignment too large.");
+    return Attributes((Log2_32(i)+1) << 16);
+  }
+
+  /// This turns an int stack alignment (which must be a power of 2) into the
+  /// form used internally in Attributes.
+  static Attributes constructStackAlignmentFromInt(unsigned i) {
+    // Default alignment, allow the target to define how to align it.
+    if (i == 0)
+      return Attribute::None;
+
+    assert(isPowerOf2_32(i) && "Alignment must be a power of two.");
+    assert(i <= 0x100 && "Alignment too large.");
+    return Attributes((Log2_32(i)+1) << 26);
+  }
+
   // This is a "safe bool() operator".
   operator const void *() const { return Bits ? this : 0; }
   bool isEmptyOrSingleton() const { return (Bits & (Bits - 1)) == 0; }
@@ -134,6 +275,12 @@
   }
   Attributes operator ~ () const { return Attributes(~Bits); }
   uint64_t Raw() const { return Bits; }
+
+  /// The set of Attributes set in Attributes is converted to a string of
+  /// equivalent mnemonics. This is, presumably, for writing out the mnemonics
+  /// for the assembly writer.
+  /// @brief Convert attribute bits to text
+  std::string getAsString() const;
 };
 
 namespace Attribute {
@@ -177,49 +324,6 @@
 /// @brief Which attributes cannot be applied to a type.
 Attributes typeIncompatible(Type *Ty);
 
-/// This turns an int alignment (a power of 2, normally) into the
-/// form used internally in Attributes.
-inline Attributes constructAlignmentFromInt(unsigned i) {
-  // Default alignment, allow the target to define how to align it.
-  if (i == 0)
-    return None;
-
-  assert(isPowerOf2_32(i) && "Alignment must be a power of two.");
-  assert(i <= 0x40000000 && "Alignment too large.");
-  return Attributes((Log2_32(i)+1) << 16);
-}
-
-/// This returns the alignment field of an attribute as a byte alignment value.
-inline unsigned getAlignmentFromAttrs(Attributes A) {
-  Attributes Align = A & Attribute::Alignment;
-  if (!Align)
-    return 0;
-
-  return 1U << ((Align.Raw() >> 16) - 1);
-}
-
-/// This turns an int stack alignment (which must be a power of 2) into
-/// the form used internally in Attributes.
-inline Attributes constructStackAlignmentFromInt(unsigned i) {
-  // Default alignment, allow the target to define how to align it.
-  if (i == 0)
-    return None;
-
-  assert(isPowerOf2_32(i) && "Alignment must be a power of two.");
-  assert(i <= 0x100 && "Alignment too large.");
-  return Attributes((Log2_32(i)+1) << 26);
-}
-
-/// This returns the stack alignment field of an attribute as a byte alignment
-/// value.
-inline unsigned getStackAlignmentFromAttrs(Attributes A) {
-  Attributes StackAlign = A & Attribute::StackAlignment;
-  if (!StackAlign)
-    return 0;
-
-  return 1U << ((StackAlign.Raw() >> 26) - 1);
-}
-
 /// This returns an integer containing an encoding of all the
 /// LLVM attributes found in the given attribute bitset.  Any
 /// change to this encoding is a breaking change to bitcode
@@ -237,9 +341,9 @@
   // 11 bits.
 
   uint64_t EncodedAttrs = Attrs.Raw() & 0xffff;
-  if (Attrs & Attribute::Alignment)
+  if (Attrs.hasAlignmentAttr())
     EncodedAttrs |= (1ull << 16) <<
-      (((Attrs & Attribute::Alignment).Raw()-1) >> 16);
+      ((Attrs.getRawAlignment() - 1) >> 16);
   EncodedAttrs |= (Attrs.Raw() & (0xfffull << 21)) << 11;
 
   return EncodedAttrs;
@@ -258,18 +362,12 @@
 
   Attributes Attrs(EncodedAttrs & 0xffff);
   if (Alignment)
-    Attrs |= Attribute::constructAlignmentFromInt(Alignment);
+    Attrs |= Attributes::constructAlignmentFromInt(Alignment);
   Attrs |= Attributes((EncodedAttrs & (0xfffull << 32)) >> 11);
 
   return Attrs;
 }
 
-
-/// The set of Attributes set in Attributes is converted to a
-/// string of equivalent mnemonics. This is, presumably, for writing out
-/// the mnemonics for the assembly writer.
-/// @brief Convert attribute bits to text
-std::string getAsString(Attributes Attrs);
 } // end namespace Attribute
 
 /// This is just a pair of values to associate a set of attributes
@@ -347,13 +445,13 @@
   /// paramHasAttr - Return true if the specified parameter index has the
   /// specified attribute set.
   bool paramHasAttr(unsigned Idx, Attributes Attr) const {
-    return getAttributes(Idx) & Attr;
+    return getAttributes(Idx).hasAttributes(Attr);
   }
 
   /// getParamAlignment - Return the alignment for the specified function
   /// parameter.
   unsigned getParamAlignment(unsigned Idx) const {
-    return Attribute::getAlignmentFromAttrs(getAttributes(Idx));
+    return getAttributes(Idx).getAlignment();
   }
 
   /// hasAttrSomewhere - Return true if the specified attribute is set for at

Modified: llvm/branches/R600/include/llvm/CodeGen/ValueTypes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/CodeGen/ValueTypes.h?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/CodeGen/ValueTypes.h (original)
+++ llvm/branches/R600/include/llvm/CodeGen/ValueTypes.h Fri Sep 21 12:29:50 2012
@@ -56,50 +56,56 @@
       FIRST_FP_VALUETYPE = f16,
       LAST_FP_VALUETYPE  = ppcf128,
 
-      v2i8           =  13,   //  2 x i8
-      v4i8           =  14,   //  4 x i8
-      v8i8           =  15,   //  8 x i8
-      v16i8          =  16,   // 16 x i8
-      v32i8          =  17,   // 32 x i8
-      v2i16          =  18,   //  2 x i16
-      v4i16          =  19,   //  4 x i16
-      v8i16          =  20,   //  8 x i16
-      v16i16         =  21,   // 16 x i16
-      v2i32          =  22,   //  2 x i32
-      v4i32          =  23,   //  4 x i32
-      v8i32          =  24,   //  8 x i32
-      v16i32         =  25,   // 16 x i32
-      v1i64          =  26,   //  1 x i64
-      v2i64          =  27,   //  2 x i64
-      v4i64          =  28,   //  4 x i64
-      v8i64          =  29,   //  8 x i64
-      v16i64         =  30,   // 16 x i64
-
-      v2f16          =  31,   //  2 x f16
-      v2f32          =  32,   //  2 x f32
-      v4f32          =  33,   //  4 x f32
-      v8f32          =  34,   //  8 x f32
-      v2f64          =  35,   //  2 x f64
-      v4f64          =  36,   //  4 x f64
+      v2i1           =  13,   //  2 x i1
+      v4i1           =  14,   //  4 x i1
+      v8i1           =  15,   //  8 x i1
+      v16i1          =  16,   // 16 x i1
+      v2i8           =  17,   //  2 x i8
+      v4i8           =  18,   //  4 x i8
+      v8i8           =  19,   //  8 x i8
+      v16i8          =  20,   // 16 x i8
+      v32i8          =  21,   // 32 x i8
+      v1i16          =  22,   //  1 x i16
+      v2i16          =  23,   //  2 x i16
+      v4i16          =  24,   //  4 x i16
+      v8i16          =  25,   //  8 x i16
+      v16i16         =  26,   // 16 x i16
+      v1i32          =  27,   //  1 x i32
+      v2i32          =  28,   //  2 x i32
+      v4i32          =  29,   //  4 x i32
+      v8i32          =  30,   //  8 x i32
+      v16i32         =  31,   // 16 x i32
+      v1i64          =  32,   //  1 x i64
+      v2i64          =  33,   //  2 x i64
+      v4i64          =  34,   //  4 x i64
+      v8i64          =  35,   //  8 x i64
+      v16i64         =  36,   // 16 x i64
+
+      v2f16          =  37,   //  2 x f16
+      v2f32          =  38,   //  2 x f32
+      v4f32          =  39,   //  4 x f32
+      v8f32          =  40,   //  8 x f32
+      v2f64          =  41,   //  2 x f64
+      v4f64          =  42,   //  4 x f64
 
-      FIRST_VECTOR_VALUETYPE = v2i8,
+      FIRST_VECTOR_VALUETYPE = v2i1,
       LAST_VECTOR_VALUETYPE  = v4f64,
-      FIRST_INTEGER_VECTOR_VALUETYPE = v2i8,
+      FIRST_INTEGER_VECTOR_VALUETYPE = v2i1,
       LAST_INTEGER_VECTOR_VALUETYPE = v16i64,
       FIRST_FP_VECTOR_VALUETYPE = v2f16,
       LAST_FP_VECTOR_VALUETYPE = v4f64,
 
-      x86mmx         =  37,   // This is an X86 MMX value
+      x86mmx         =  43,   // This is an X86 MMX value
 
-      Glue           =  38,   // This glues nodes together during pre-RA sched
+      Glue           =  44,   // This glues nodes together during pre-RA sched
 
-      isVoid         =  39,   // This has no value
+      isVoid         =  45,   // This has no value
 
-      Untyped        =  40,   // This value takes a register, but has
+      Untyped        =  46,   // This value takes a register, but has
                               // unspecified type.  The register class
                               // will be determined by the opcode.
 
-      LAST_VALUETYPE =  41,   // This always remains at the end of the list.
+      LAST_VALUETYPE =  47,   // This always remains at the end of the list.
 
       // This is the current maximum for LAST_VALUETYPE.
       // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
@@ -233,15 +239,21 @@
       switch (SimpleTy) {
       default:
         llvm_unreachable("Not a vector MVT!");
+      case v2i1 :
+      case v4i1 :
+      case v8i1 :
+      case v16i1: return i1;
       case v2i8 :
       case v4i8 :
       case v8i8 :
       case v16i8:
       case v32i8: return i8;
+      case v1i16:
       case v2i16:
       case v4i16:
       case v8i16:
       case v16i16: return i16;
+      case v1i32:
       case v2i32:
       case v4i32:
       case v8i32:
@@ -265,21 +277,25 @@
       default:
         llvm_unreachable("Not a vector MVT!");
       case v32i8: return 32;
+      case v16i1:
       case v16i8:
       case v16i16:
       case v16i32:
       case v16i64:return 16;
+      case v8i1:
       case v8i8 :
       case v8i16:
       case v8i32:
       case v8i64:
       case v8f32: return 8;
+      case v4i1:
       case v4i8:
       case v4i16:
       case v4i32:
       case v4i64:
       case v4f32:
       case v4f64: return 4;
+      case v2i1:
       case v2i8:
       case v2i16:
       case v2i32:
@@ -287,6 +303,8 @@
       case v2f16:
       case v2f32:
       case v2f64: return 2;
+      case v1i16:
+      case v1i32:
       case v1i64: return 1;
       }
     }
@@ -302,15 +320,21 @@
       default:
         llvm_unreachable("getSizeInBits called on extended MVT.");
       case i1  :  return 1;
-      case i8  :  return 8;
+      case v2i1:  return 2;
+      case v4i1:  return 4;
+      case i8  :
+      case v8i1: return 8;
       case i16 :
       case f16:
-      case v2i8:  return 16;
+      case v16i1:
+      case v2i8:
+      case v1i16: return 16;
       case f32 :
       case i32 :
       case v4i8:
       case v2i16:
-      case v2f16: return 32;
+      case v2f16: 
+      case v1i32: return 32;
       case x86mmx:
       case f64 :
       case i64 :
@@ -393,6 +417,12 @@
       switch (VT.SimpleTy) {
       default:
         break;
+      case MVT::i1:
+        if (NumElements == 2)  return MVT::v2i1;
+        if (NumElements == 4)  return MVT::v4i1;
+        if (NumElements == 8)  return MVT::v8i1;
+        if (NumElements == 16) return MVT::v16i1;
+        break;
       case MVT::i8:
         if (NumElements == 2)  return MVT::v2i8;
         if (NumElements == 4)  return MVT::v4i8;
@@ -401,12 +431,14 @@
         if (NumElements == 32) return MVT::v32i8;
         break;
       case MVT::i16:
+        if (NumElements == 1)  return MVT::v1i16;
         if (NumElements == 2)  return MVT::v2i16;
         if (NumElements == 4)  return MVT::v4i16;
         if (NumElements == 8)  return MVT::v8i16;
         if (NumElements == 16) return MVT::v16i16;
         break;
       case MVT::i32:
+        if (NumElements == 1)  return MVT::v1i32;
         if (NumElements == 2)  return MVT::v2i32;
         if (NumElements == 4)  return MVT::v4i32;
         if (NumElements == 8)  return MVT::v8i32;
@@ -529,6 +561,23 @@
       return isSimple() ? V.isVector() : isExtendedVector();
     }
 
+    /// is16BitVector - Return true if this is a 16-bit vector type.
+    bool is16BitVector() const {
+      if (!isSimple())
+        return isExtended16BitVector();
+
+      return (V == MVT::v2i8  || V==MVT::v1i16 || V == MVT::v16i1);
+    }
+
+    /// is32BitVector - Return true if this is a 32-bit vector type.
+    bool is32BitVector() const {
+      if (!isSimple())
+        return isExtended32BitVector();
+
+      return (V == MVT::v4i8  || V==MVT::v2i16
+          || V==MVT::v1i32);
+    }
+
     /// is64BitVector - Return true if this is a 64-bit vector type.
     bool is64BitVector() const {
       return isSimple() ? V.is64BitVector() : isExtended64BitVector();
@@ -740,6 +789,8 @@
     bool isExtendedFloatingPoint() const;
     bool isExtendedInteger() const;
     bool isExtendedVector() const;
+    bool isExtended16BitVector() const;
+    bool isExtended32BitVector() const;
     bool isExtended64BitVector() const;
     bool isExtended128BitVector() const;
     bool isExtended256BitVector() const;

Modified: llvm/branches/R600/include/llvm/CodeGen/ValueTypes.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/CodeGen/ValueTypes.td?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/CodeGen/ValueTypes.td (original)
+++ llvm/branches/R600/include/llvm/CodeGen/ValueTypes.td Fri Sep 21 12:29:50 2012
@@ -33,36 +33,42 @@
 def f128   : ValueType<128, 11>;   // 128-bit floating point value
 def ppcf128: ValueType<128, 12>;   // PPC 128-bit floating point value
 
-def v2i8   : ValueType<16 , 13>;   //  2 x i8  vector value
-def v4i8   : ValueType<32 , 14>;   //  4 x i8  vector value
-def v8i8   : ValueType<64 , 15>;   //  8 x i8  vector value
-def v16i8  : ValueType<128, 16>;   // 16 x i8  vector value
-def v32i8  : ValueType<256, 17>;   // 32 x i8 vector value
-def v2i16  : ValueType<32 , 18>;   //  2 x i16 vector value
-def v4i16  : ValueType<64 , 19>;   //  4 x i16 vector value
-def v8i16  : ValueType<128, 20>;   //  8 x i16 vector value
-def v16i16 : ValueType<256, 21>;   // 16 x i16 vector value
-def v2i32  : ValueType<64 , 22>;   //  2 x i32 vector value
-def v4i32  : ValueType<128, 23>;   //  4 x i32 vector value
-def v8i32  : ValueType<256, 24>;   //  8 x i32 vector value
-def v16i32 : ValueType<512, 25>;   // 16 x i32 vector value
-def v1i64  : ValueType<64 , 26>;   //  1 x i64 vector value
-def v2i64  : ValueType<128, 27>;   //  2 x i64 vector value
-def v4i64  : ValueType<256, 28>;   //  4 x i64 vector value
-def v8i64  : ValueType<512, 29>;   //  8 x i64 vector value
-def v16i64 : ValueType<1024,30>;  // 16 x i64 vector value
-
-def v2f16  : ValueType<32 , 31>;   //  2 x f16 vector value
-def v2f32  : ValueType<64 , 32>;   //  2 x f32 vector value
-def v4f32  : ValueType<128, 33>;   //  4 x f32 vector value
-def v8f32  : ValueType<256, 34>;   //  8 x f32 vector value
-def v2f64  : ValueType<128, 35>;   //  2 x f64 vector value
-def v4f64  : ValueType<256, 36>;   //  4 x f64 vector value
-
-def x86mmx : ValueType<64 , 37>;   // X86 MMX value
-def FlagVT : ValueType<0  , 38>;   // Pre-RA sched glue
-def isVoid : ValueType<0  , 39>;   // Produces no value
-def untyped: ValueType<8  , 40>;   // Produces an untyped value
+def v2i1   : ValueType<2 ,  13>;   //  2 x i1  vector value
+def v4i1   : ValueType<4 ,  14>;   //  4 x i1  vector value
+def v8i1   : ValueType<8 ,  15>;   //  8 x i1  vector value
+def v16i1  : ValueType<16,  16>;   // 16 x i1  vector value
+def v2i8   : ValueType<16 , 17>;   //  2 x i8  vector value
+def v4i8   : ValueType<32 , 18>;   //  4 x i8  vector value
+def v8i8   : ValueType<64 , 19>;   //  8 x i8  vector value
+def v16i8  : ValueType<128, 20>;   // 16 x i8  vector value
+def v32i8  : ValueType<256, 21>;   // 32 x i8 vector value
+def v1i16  : ValueType<16 , 22>;   //  1 x i16 vector value
+def v2i16  : ValueType<32 , 23>;   //  2 x i16 vector value
+def v4i16  : ValueType<64 , 24>;   //  4 x i16 vector value
+def v8i16  : ValueType<128, 25>;   //  8 x i16 vector value
+def v16i16 : ValueType<256, 26>;   // 16 x i16 vector value
+def v1i32  : ValueType<32 , 27>;   //  1 x i32 vector value
+def v2i32  : ValueType<64 , 28>;   //  2 x i32 vector value
+def v4i32  : ValueType<128, 29>;   //  4 x i32 vector value
+def v8i32  : ValueType<256, 30>;   //  8 x i32 vector value
+def v16i32 : ValueType<512, 31>;   // 16 x i32 vector value
+def v1i64  : ValueType<64 , 32>;   //  1 x i64 vector value
+def v2i64  : ValueType<128, 33>;   //  2 x i64 vector value
+def v4i64  : ValueType<256, 34>;   //  4 x i64 vector value
+def v8i64  : ValueType<512, 35>;   //  8 x i64 vector value
+def v16i64 : ValueType<1024,36>;   // 16 x i64 vector value
+
+def v2f16  : ValueType<32 , 37>;   //  2 x f16 vector value
+def v2f32  : ValueType<64 , 38>;   //  2 x f32 vector value
+def v4f32  : ValueType<128, 39>;   //  4 x f32 vector value
+def v8f32  : ValueType<256, 40>;   //  8 x f32 vector value
+def v2f64  : ValueType<128, 41>;   //  2 x f64 vector value
+def v4f64  : ValueType<256, 42>;   //  4 x f64 vector value
+
+def x86mmx : ValueType<64 , 43>;   // X86 MMX value
+def FlagVT : ValueType<0  , 44>;   // Pre-RA sched glue
+def isVoid : ValueType<0  , 45>;   // Produces no value
+def untyped: ValueType<8  , 46>;   // Produces an untyped value
 
 def MetadataVT: ValueType<0, 250>; // Metadata
 

Modified: llvm/branches/R600/include/llvm/Intrinsics.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/Intrinsics.td?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/Intrinsics.td (original)
+++ llvm/branches/R600/include/llvm/Intrinsics.td Fri Sep 21 12:29:50 2012
@@ -121,15 +121,21 @@
 def llvm_x86mmx_ty     : LLVMType<x86mmx>;
 def llvm_ptrx86mmx_ty  : LLVMPointerType<llvm_x86mmx_ty>;         // <1 x i64>*
 
+def llvm_v2i1_ty       : LLVMType<v2i1>;     //  2 x i1
+def llvm_v4i1_ty       : LLVMType<v4i1>;     //  4 x i1
+def llvm_v8i1_ty       : LLVMType<v8i1>;     //  8 x i1
+def llvm_v16i1_ty      : LLVMType<v16i1>;    // 16 x i1
 def llvm_v2i8_ty       : LLVMType<v2i8>;     //  2 x i8
 def llvm_v4i8_ty       : LLVMType<v4i8>;     //  4 x i8
 def llvm_v8i8_ty       : LLVMType<v8i8>;     //  8 x i8
 def llvm_v16i8_ty      : LLVMType<v16i8>;    // 16 x i8
 def llvm_v32i8_ty      : LLVMType<v32i8>;    // 32 x i8
+def llvm_v1i16_ty      : LLVMType<v1i16>;    //  1 x i16
 def llvm_v2i16_ty      : LLVMType<v2i16>;    //  2 x i16
 def llvm_v4i16_ty      : LLVMType<v4i16>;    //  4 x i16
 def llvm_v8i16_ty      : LLVMType<v8i16>;    //  8 x i16
 def llvm_v16i16_ty     : LLVMType<v16i16>;   // 16 x i16
+def llvm_v1i32_ty      : LLVMType<v1i32>;    //  1 x i32
 def llvm_v2i32_ty      : LLVMType<v2i32>;    //  2 x i32
 def llvm_v4i32_ty      : LLVMType<v4i32>;    //  4 x i32
 def llvm_v8i32_ty      : LLVMType<v8i32>;    //  8 x i32

Modified: llvm/branches/R600/include/llvm/IntrinsicsARM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/IntrinsicsARM.td?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/IntrinsicsARM.td (original)
+++ llvm/branches/R600/include/llvm/IntrinsicsARM.td Fri Sep 21 12:29:50 2012
@@ -16,147 +16,136 @@
 // TLS
 
 let TargetPrefix = "arm" in {  // All intrinsics start with "llvm.arm.".
-  def int_arm_thread_pointer : GCCBuiltin<"__builtin_thread_pointer">,
-              Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
-}
+
+def int_arm_thread_pointer : GCCBuiltin<"__builtin_thread_pointer">,
+            Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
 
 //===----------------------------------------------------------------------===//
 // Saturating Arithmentic
 
-let TargetPrefix = "arm" in {  // All intrinsics start with "llvm.arm.".
-  def int_arm_qadd : GCCBuiltin<"__builtin_arm_qadd">,
-              Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
-                        [IntrNoMem, Commutative]>;
-  def int_arm_qsub : GCCBuiltin<"__builtin_arm_qsub">,
-              Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_arm_ssat : GCCBuiltin<"__builtin_arm_ssat">,
-              Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_arm_usat : GCCBuiltin<"__builtin_arm_usat">,
-              Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-}
+def int_arm_qadd : GCCBuiltin<"__builtin_arm_qadd">,
+    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+    [IntrNoMem, Commutative]>;
+def int_arm_qsub : GCCBuiltin<"__builtin_arm_qsub">,
+    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_ssat : GCCBuiltin<"__builtin_arm_ssat">,
+    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_usat : GCCBuiltin<"__builtin_arm_usat">,
+    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
 
 //===----------------------------------------------------------------------===//
 // Load and Store exclusive doubleword
 
-let TargetPrefix = "arm" in {  // All intrinsics start with "llvm.arm.".
-  def int_arm_strexd : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
-                                  llvm_ptr_ty], [IntrReadWriteArgMem]>;
-  def int_arm_ldrexd : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_ptr_ty],
-                                 [IntrReadArgMem]>;
-}
+def int_arm_strexd : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
+    llvm_ptr_ty], [IntrReadWriteArgMem]>;
+def int_arm_ldrexd : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_ptr_ty],
+    [IntrReadArgMem]>;
 
 //===----------------------------------------------------------------------===//
 // VFP
 
-let TargetPrefix = "arm" in {  // All intrinsics start with "llvm.arm.".
-  def int_arm_get_fpscr : GCCBuiltin<"__builtin_arm_get_fpscr">, 
-                         Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
-  def int_arm_set_fpscr : GCCBuiltin<"__builtin_arm_set_fpscr">, 
-                         Intrinsic<[], [llvm_i32_ty], []>;
-  def int_arm_vcvtr     : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
-                                    [IntrNoMem]>;
-  def int_arm_vcvtru    : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
-                                    [IntrNoMem]>;
-}
+def int_arm_get_fpscr : GCCBuiltin<"__builtin_arm_get_fpscr">,
+                       Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
+def int_arm_set_fpscr : GCCBuiltin<"__builtin_arm_set_fpscr">,
+                       Intrinsic<[], [llvm_i32_ty], []>;
+def int_arm_vcvtr     : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
+                                  [IntrNoMem]>;
+def int_arm_vcvtru    : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
+                                  [IntrNoMem]>;
 
 //===----------------------------------------------------------------------===//
 // Coprocessor
 
-let TargetPrefix = "arm" in {  // All intrinsics start with "llvm.arm.".
-  // Move to coprocessor
-  def int_arm_mcr : GCCBuiltin<"__builtin_arm_mcr">,
-     Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
-                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
-  def int_arm_mcr2 : GCCBuiltin<"__builtin_arm_mcr2">,
-     Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
-                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
-
-  // Move from coprocessor
-  def int_arm_mrc : GCCBuiltin<"__builtin_arm_mrc">,
-     Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
-                               llvm_i32_ty, llvm_i32_ty], []>;
-  def int_arm_mrc2 : GCCBuiltin<"__builtin_arm_mrc2">,
-     Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
-                               llvm_i32_ty, llvm_i32_ty], []>;
-
-  // Coprocessor data processing
-  def int_arm_cdp : GCCBuiltin<"__builtin_arm_cdp">,
-     Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
-                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
-  def int_arm_cdp2 : GCCBuiltin<"__builtin_arm_cdp2">,
-     Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
-                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
-
-  // Move from two registers to coprocessor
-  def int_arm_mcrr : GCCBuiltin<"__builtin_arm_mcrr">,
-     Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
-                    llvm_i32_ty, llvm_i32_ty], []>;
-  def int_arm_mcrr2 : GCCBuiltin<"__builtin_arm_mcrr2">,
-     Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
-                    llvm_i32_ty, llvm_i32_ty], []>;
-}
+// Move to coprocessor
+def int_arm_mcr : GCCBuiltin<"__builtin_arm_mcr">,
+   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
+                  llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+def int_arm_mcr2 : GCCBuiltin<"__builtin_arm_mcr2">,
+   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
+                  llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+
+// Move from coprocessor
+def int_arm_mrc : GCCBuiltin<"__builtin_arm_mrc">,
+   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
+                             llvm_i32_ty, llvm_i32_ty], []>;
+def int_arm_mrc2 : GCCBuiltin<"__builtin_arm_mrc2">,
+   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
+                             llvm_i32_ty, llvm_i32_ty], []>;
+
+// Coprocessor data processing
+def int_arm_cdp : GCCBuiltin<"__builtin_arm_cdp">,
+   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
+                  llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+def int_arm_cdp2 : GCCBuiltin<"__builtin_arm_cdp2">,
+   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
+                  llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+
+// Move from two registers to coprocessor
+def int_arm_mcrr : GCCBuiltin<"__builtin_arm_mcrr">,
+   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
+                  llvm_i32_ty, llvm_i32_ty], []>;
+def int_arm_mcrr2 : GCCBuiltin<"__builtin_arm_mcrr2">,
+   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
+                  llvm_i32_ty, llvm_i32_ty], []>;
 
 //===----------------------------------------------------------------------===//
 // Advanced SIMD (NEON)
 
-let TargetPrefix = "arm" in {  // All intrinsics start with "llvm.arm.".
-
-  // The following classes do not correspond directly to GCC builtins.
-  class Neon_1Arg_Intrinsic
-    : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
-  class Neon_1Arg_Narrow_Intrinsic
-    : Intrinsic<[llvm_anyvector_ty],
-                [LLVMExtendedElementVectorType<0>], [IntrNoMem]>;
-  class Neon_2Arg_Intrinsic
-    : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
-                [IntrNoMem]>;
-  class Neon_2Arg_Narrow_Intrinsic
-    : Intrinsic<[llvm_anyvector_ty],
-                [LLVMExtendedElementVectorType<0>,
-                 LLVMExtendedElementVectorType<0>],
-                [IntrNoMem]>;
-  class Neon_2Arg_Long_Intrinsic
-    : Intrinsic<[llvm_anyvector_ty],
-                [LLVMTruncatedElementVectorType<0>,
-                 LLVMTruncatedElementVectorType<0>],
-                [IntrNoMem]>;
-  class Neon_3Arg_Intrinsic
-    : Intrinsic<[llvm_anyvector_ty],
-                [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
-                [IntrNoMem]>;
-  class Neon_3Arg_Long_Intrinsic
-    : Intrinsic<[llvm_anyvector_ty],
-                [LLVMMatchType<0>,
-                 LLVMTruncatedElementVectorType<0>,
-                 LLVMTruncatedElementVectorType<0>],
-                [IntrNoMem]>;
-  class Neon_CvtFxToFP_Intrinsic
-    : Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
-  class Neon_CvtFPToFx_Intrinsic
-    : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
-
-  // The table operands for VTBL and VTBX consist of 1 to 4 v8i8 vectors.
-  // Besides the table, VTBL has one other v8i8 argument and VTBX has two.
-  // Overall, the classes range from 2 to 6 v8i8 arguments.
-  class Neon_Tbl2Arg_Intrinsic
-    : Intrinsic<[llvm_v8i8_ty],
-                [llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>;
-  class Neon_Tbl3Arg_Intrinsic
-    : Intrinsic<[llvm_v8i8_ty],
-                [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>;
-  class Neon_Tbl4Arg_Intrinsic
-    : Intrinsic<[llvm_v8i8_ty],
-                [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty],
-                [IntrNoMem]>;
-  class Neon_Tbl5Arg_Intrinsic
-    : Intrinsic<[llvm_v8i8_ty],
-                [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty,
-                 llvm_v8i8_ty], [IntrNoMem]>;
-  class Neon_Tbl6Arg_Intrinsic
-    : Intrinsic<[llvm_v8i8_ty],
-                [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty,
-                 llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>;
-}
+// The following classes do not correspond directly to GCC builtins.
+class Neon_1Arg_Intrinsic
+  : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+class Neon_1Arg_Narrow_Intrinsic
+  : Intrinsic<[llvm_anyvector_ty],
+              [LLVMExtendedElementVectorType<0>], [IntrNoMem]>;
+class Neon_2Arg_Intrinsic
+  : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
+              [IntrNoMem]>;
+class Neon_2Arg_Narrow_Intrinsic
+  : Intrinsic<[llvm_anyvector_ty],
+              [LLVMExtendedElementVectorType<0>,
+               LLVMExtendedElementVectorType<0>],
+              [IntrNoMem]>;
+class Neon_2Arg_Long_Intrinsic
+  : Intrinsic<[llvm_anyvector_ty],
+              [LLVMTruncatedElementVectorType<0>,
+               LLVMTruncatedElementVectorType<0>],
+              [IntrNoMem]>;
+class Neon_3Arg_Intrinsic
+  : Intrinsic<[llvm_anyvector_ty],
+              [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+              [IntrNoMem]>;
+class Neon_3Arg_Long_Intrinsic
+  : Intrinsic<[llvm_anyvector_ty],
+              [LLVMMatchType<0>,
+               LLVMTruncatedElementVectorType<0>,
+               LLVMTruncatedElementVectorType<0>],
+              [IntrNoMem]>;
+class Neon_CvtFxToFP_Intrinsic
+  : Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
+class Neon_CvtFPToFx_Intrinsic
+  : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
+
+// The table operands for VTBL and VTBX consist of 1 to 4 v8i8 vectors.
+// Besides the table, VTBL has one other v8i8 argument and VTBX has two.
+// Overall, the classes range from 2 to 6 v8i8 arguments.
+class Neon_Tbl2Arg_Intrinsic
+  : Intrinsic<[llvm_v8i8_ty],
+              [llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>;
+class Neon_Tbl3Arg_Intrinsic
+  : Intrinsic<[llvm_v8i8_ty],
+              [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>;
+class Neon_Tbl4Arg_Intrinsic
+  : Intrinsic<[llvm_v8i8_ty],
+              [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty],
+              [IntrNoMem]>;
+class Neon_Tbl5Arg_Intrinsic
+  : Intrinsic<[llvm_v8i8_ty],
+              [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty,
+               llvm_v8i8_ty], [IntrNoMem]>;
+class Neon_Tbl6Arg_Intrinsic
+  : Intrinsic<[llvm_v8i8_ty],
+              [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty,
+               llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>;
 
 // Arithmetic ops
 
@@ -209,20 +198,18 @@
 def int_arm_neon_vrsubhn : Neon_2Arg_Narrow_Intrinsic;
 
 // Vector Absolute Compare.
-let TargetPrefix = "arm" in {
-  def int_arm_neon_vacged : Intrinsic<[llvm_v2i32_ty],
-                                      [llvm_v2f32_ty, llvm_v2f32_ty],
-                                      [IntrNoMem]>;
-  def int_arm_neon_vacgeq : Intrinsic<[llvm_v4i32_ty],
-                                      [llvm_v4f32_ty, llvm_v4f32_ty],
-                                      [IntrNoMem]>;
-  def int_arm_neon_vacgtd : Intrinsic<[llvm_v2i32_ty],
-                                      [llvm_v2f32_ty, llvm_v2f32_ty],
-                                      [IntrNoMem]>;
-  def int_arm_neon_vacgtq : Intrinsic<[llvm_v4i32_ty],
-                                      [llvm_v4f32_ty, llvm_v4f32_ty],
-                                      [IntrNoMem]>;
-}
+def int_arm_neon_vacged : Intrinsic<[llvm_v2i32_ty],
+                                    [llvm_v2f32_ty, llvm_v2f32_ty],
+                                    [IntrNoMem]>;
+def int_arm_neon_vacgeq : Intrinsic<[llvm_v4i32_ty],
+                                    [llvm_v4f32_ty, llvm_v4f32_ty],
+                                    [IntrNoMem]>;
+def int_arm_neon_vacgtd : Intrinsic<[llvm_v2i32_ty],
+                                    [llvm_v2f32_ty, llvm_v2f32_ty],
+                                    [IntrNoMem]>;
+def int_arm_neon_vacgtq : Intrinsic<[llvm_v4i32_ty],
+                                    [llvm_v4f32_ty, llvm_v4f32_ty],
+                                    [IntrNoMem]>;
 
 // Vector Absolute Differences.
 def int_arm_neon_vabds : Neon_2Arg_Intrinsic;
@@ -235,24 +222,20 @@
 // Note: This is different than the other "long" NEON intrinsics because
 // the result vector has half as many elements as the source vector.
 // The source and destination vector types must be specified separately.
-let TargetPrefix = "arm" in {
-  def int_arm_neon_vpaddls : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty],
-                                       [IntrNoMem]>;
-  def int_arm_neon_vpaddlu : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty],
-                                       [IntrNoMem]>;
-}
+def int_arm_neon_vpaddls : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty],
+                                     [IntrNoMem]>;
+def int_arm_neon_vpaddlu : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty],
+                                     [IntrNoMem]>;
 
 // Vector Pairwise Add and Accumulate Long.
 // Note: This is similar to vpaddl but the destination vector also appears
 // as the first argument.
-let TargetPrefix = "arm" in {
-  def int_arm_neon_vpadals : Intrinsic<[llvm_anyvector_ty],
-                                       [LLVMMatchType<0>, llvm_anyvector_ty],
-                                       [IntrNoMem]>;
-  def int_arm_neon_vpadalu : Intrinsic<[llvm_anyvector_ty],
-                                       [LLVMMatchType<0>, llvm_anyvector_ty],
-                                       [IntrNoMem]>;
-}
+def int_arm_neon_vpadals : Intrinsic<[llvm_anyvector_ty],
+                                     [LLVMMatchType<0>, llvm_anyvector_ty],
+                                     [IntrNoMem]>;
+def int_arm_neon_vpadalu : Intrinsic<[llvm_anyvector_ty],
+                                     [LLVMMatchType<0>, llvm_anyvector_ty],
+                                     [IntrNoMem]>;
 
 // Vector Pairwise Maximum and Minimum.
 def int_arm_neon_vpmaxs : Neon_2Arg_Intrinsic;
@@ -364,79 +347,83 @@
 def int_arm_neon_vtbx3 : Neon_Tbl5Arg_Intrinsic;
 def int_arm_neon_vtbx4 : Neon_Tbl6Arg_Intrinsic;
 
-let TargetPrefix = "arm" in {
+// De-interleaving vector loads from N-element structures.
+// Source operands are the address and alignment.
+def int_arm_neon_vld1 : Intrinsic<[llvm_anyvector_ty],
+                                  [llvm_ptr_ty, llvm_i32_ty],
+                                  [IntrReadArgMem]>;
+def int_arm_neon_vld2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+                                  [llvm_ptr_ty, llvm_i32_ty],
+                                  [IntrReadArgMem]>;
+def int_arm_neon_vld3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+                                   LLVMMatchType<0>],
+                                  [llvm_ptr_ty, llvm_i32_ty],
+                                  [IntrReadArgMem]>;
+def int_arm_neon_vld4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+                                   LLVMMatchType<0>, LLVMMatchType<0>],
+                                  [llvm_ptr_ty, llvm_i32_ty],
+                                  [IntrReadArgMem]>;
+
+// Vector load N-element structure to one lane.
+// Source operands are: the address, the N input vectors (since only one
+// lane is assigned), the lane number, and the alignment.
+def int_arm_neon_vld2lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+                                      [llvm_ptr_ty, LLVMMatchType<0>,
+                                       LLVMMatchType<0>, llvm_i32_ty,
+                                       llvm_i32_ty], [IntrReadArgMem]>;
+def int_arm_neon_vld3lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+                                       LLVMMatchType<0>],
+                                      [llvm_ptr_ty, LLVMMatchType<0>,
+                                       LLVMMatchType<0>, LLVMMatchType<0>,
+                                       llvm_i32_ty, llvm_i32_ty],
+                                      [IntrReadArgMem]>;
+def int_arm_neon_vld4lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+                                       LLVMMatchType<0>, LLVMMatchType<0>],
+                                      [llvm_ptr_ty, LLVMMatchType<0>,
+                                       LLVMMatchType<0>, LLVMMatchType<0>,
+                                       LLVMMatchType<0>, llvm_i32_ty,
+                                       llvm_i32_ty], [IntrReadArgMem]>;
+
+// Interleaving vector stores from N-element structures.
+// Source operands are: the address, the N vectors, and the alignment.
+def int_arm_neon_vst1 : Intrinsic<[],
+                                  [llvm_ptr_ty, llvm_anyvector_ty,
+                                   llvm_i32_ty], [IntrReadWriteArgMem]>;
+def int_arm_neon_vst2 : Intrinsic<[],
+                                  [llvm_ptr_ty, llvm_anyvector_ty,
+                                   LLVMMatchType<0>, llvm_i32_ty],
+                                  [IntrReadWriteArgMem]>;
+def int_arm_neon_vst3 : Intrinsic<[],
+                                  [llvm_ptr_ty, llvm_anyvector_ty,
+                                   LLVMMatchType<0>, LLVMMatchType<0>,
+                                   llvm_i32_ty], [IntrReadWriteArgMem]>;
+def int_arm_neon_vst4 : Intrinsic<[],
+                                  [llvm_ptr_ty, llvm_anyvector_ty,
+                                   LLVMMatchType<0>, LLVMMatchType<0>,
+                                   LLVMMatchType<0>, llvm_i32_ty],
+                                  [IntrReadWriteArgMem]>;
+
+// Vector store N-element structure from one lane.
+// Source operands are: the address, the N vectors, the lane number, and
+// the alignment.
+def int_arm_neon_vst2lane : Intrinsic<[],
+                                      [llvm_ptr_ty, llvm_anyvector_ty,
+                                       LLVMMatchType<0>, llvm_i32_ty,
+                                       llvm_i32_ty], [IntrReadWriteArgMem]>;
+def int_arm_neon_vst3lane : Intrinsic<[],
+                                      [llvm_ptr_ty, llvm_anyvector_ty,
+                                       LLVMMatchType<0>, LLVMMatchType<0>,
+                                       llvm_i32_ty, llvm_i32_ty],
+                                      [IntrReadWriteArgMem]>;
+def int_arm_neon_vst4lane : Intrinsic<[],
+                                      [llvm_ptr_ty, llvm_anyvector_ty,
+                                       LLVMMatchType<0>, LLVMMatchType<0>,
+                                       LLVMMatchType<0>, llvm_i32_ty,
+                                       llvm_i32_ty], [IntrReadWriteArgMem]>;
+
+// Vector bitwise select.
+def int_arm_neon_vbsl : Intrinsic<[llvm_anyvector_ty],
+                        [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+                        [IntrNoMem]>;
 
-  // De-interleaving vector loads from N-element structures.
-  // Source operands are the address and alignment.
-  def int_arm_neon_vld1 : Intrinsic<[llvm_anyvector_ty],
-                                    [llvm_ptr_ty, llvm_i32_ty],
-                                    [IntrReadArgMem]>;
-  def int_arm_neon_vld2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
-                                    [llvm_ptr_ty, llvm_i32_ty],
-                                    [IntrReadArgMem]>;
-  def int_arm_neon_vld3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
-                                     LLVMMatchType<0>],
-                                    [llvm_ptr_ty, llvm_i32_ty],
-                                    [IntrReadArgMem]>;
-  def int_arm_neon_vld4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
-                                     LLVMMatchType<0>, LLVMMatchType<0>],
-                                    [llvm_ptr_ty, llvm_i32_ty],
-                                    [IntrReadArgMem]>;
-
-  // Vector load N-element structure to one lane.
-  // Source operands are: the address, the N input vectors (since only one
-  // lane is assigned), the lane number, and the alignment.
-  def int_arm_neon_vld2lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
-                                        [llvm_ptr_ty, LLVMMatchType<0>,
-                                         LLVMMatchType<0>, llvm_i32_ty,
-                                         llvm_i32_ty], [IntrReadArgMem]>;
-  def int_arm_neon_vld3lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
-                                         LLVMMatchType<0>],
-                                        [llvm_ptr_ty, LLVMMatchType<0>,
-                                         LLVMMatchType<0>, LLVMMatchType<0>,
-                                         llvm_i32_ty, llvm_i32_ty],
-                                        [IntrReadArgMem]>;
-  def int_arm_neon_vld4lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
-                                         LLVMMatchType<0>, LLVMMatchType<0>],
-                                        [llvm_ptr_ty, LLVMMatchType<0>,
-                                         LLVMMatchType<0>, LLVMMatchType<0>,
-                                         LLVMMatchType<0>, llvm_i32_ty,
-                                         llvm_i32_ty], [IntrReadArgMem]>;
-
-  // Interleaving vector stores from N-element structures.
-  // Source operands are: the address, the N vectors, and the alignment.
-  def int_arm_neon_vst1 : Intrinsic<[],
-                                    [llvm_ptr_ty, llvm_anyvector_ty,
-                                     llvm_i32_ty], [IntrReadWriteArgMem]>;
-  def int_arm_neon_vst2 : Intrinsic<[],
-                                    [llvm_ptr_ty, llvm_anyvector_ty,
-                                     LLVMMatchType<0>, llvm_i32_ty],
-                                    [IntrReadWriteArgMem]>;
-  def int_arm_neon_vst3 : Intrinsic<[],
-                                    [llvm_ptr_ty, llvm_anyvector_ty,
-                                     LLVMMatchType<0>, LLVMMatchType<0>,
-                                     llvm_i32_ty], [IntrReadWriteArgMem]>;
-  def int_arm_neon_vst4 : Intrinsic<[],
-                                    [llvm_ptr_ty, llvm_anyvector_ty,
-                                     LLVMMatchType<0>, LLVMMatchType<0>,
-                                     LLVMMatchType<0>, llvm_i32_ty],
-                                    [IntrReadWriteArgMem]>;
-
-  // Vector store N-element structure from one lane.
-  // Source operands are: the address, the N vectors, the lane number, and
-  // the alignment.
-  def int_arm_neon_vst2lane : Intrinsic<[],
-                                        [llvm_ptr_ty, llvm_anyvector_ty,
-                                         LLVMMatchType<0>, llvm_i32_ty,
-                                         llvm_i32_ty], [IntrReadWriteArgMem]>;
-  def int_arm_neon_vst3lane : Intrinsic<[],
-                                        [llvm_ptr_ty, llvm_anyvector_ty,
-                                         LLVMMatchType<0>, LLVMMatchType<0>,
-                                         llvm_i32_ty, llvm_i32_ty],
-                                        [IntrReadWriteArgMem]>;
-  def int_arm_neon_vst4lane : Intrinsic<[],
-                                        [llvm_ptr_ty, llvm_anyvector_ty,
-                                         LLVMMatchType<0>, LLVMMatchType<0>,
-                                         LLVMMatchType<0>, llvm_i32_ty,
-                                         llvm_i32_ty], [IntrReadWriteArgMem]>;
-}
+} // end TargetPrefix

Modified: llvm/branches/R600/include/llvm/MC/MCParser/MCAsmLexer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/MC/MCParser/MCAsmLexer.h?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/MC/MCParser/MCAsmLexer.h (original)
+++ llvm/branches/R600/include/llvm/MC/MCParser/MCAsmLexer.h Fri Sep 21 12:29:50 2012
@@ -40,6 +40,7 @@
     // No-value.
     EndOfStatement,
     Colon,
+    Space,
     Plus, Minus, Tilde,
     Slash,    // '/'
     BackSlash, // '\'
@@ -126,6 +127,7 @@
   void operator=(const MCAsmLexer &) LLVM_DELETED_FUNCTION;
 protected: // Can only create subclasses.
   const char *TokStart;
+  bool SkipSpace;
 
   MCAsmLexer();
 
@@ -175,6 +177,9 @@
 
   /// isNot - Check if the current token has kind \p K.
   bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
+
+  /// setSkipSpace - Set whether spaces should be ignored by the lexer
+  void setSkipSpace(bool val) { SkipSpace = val; }
 };
 
 } // End llvm namespace

Modified: llvm/branches/R600/include/llvm/Object/ELF.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/Object/ELF.h?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/Object/ELF.h (original)
+++ llvm/branches/R600/include/llvm/Object/ELF.h Fri Sep 21 12:29:50 2012
@@ -951,7 +951,18 @@
   case ELF::STT_FUNC:
   case ELF::STT_OBJECT:
   case ELF::STT_NOTYPE:
-    Result = symb->st_value + (Section ? Section->sh_addr : 0);
+    bool IsRelocatable;
+    switch(Header->e_type) {
+    case ELF::ET_EXEC:
+    case ELF::ET_DYN:
+      IsRelocatable = false;
+      break;
+    default:
+      IsRelocatable = true;
+    }
+    Result = symb->st_value;
+    if (IsRelocatable && Section != 0)
+      Result += Section->sh_addr;
     return object_error::success;
   default:
     Result = UnknownAddressOrSize;

Modified: llvm/branches/R600/include/llvm/Object/ObjectFile.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/Object/ObjectFile.h?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/Object/ObjectFile.h (original)
+++ llvm/branches/R600/include/llvm/Object/ObjectFile.h Fri Sep 21 12:29:50 2012
@@ -212,6 +212,8 @@
   error_code getNext(SymbolRef &Result) const;
 
   error_code getName(StringRef &Result) const;
+  /// Returns the symbol virtual address (i.e. address at which it will be
+  /// mapped).
   error_code getAddress(uint64_t &Result) const;
   error_code getFileOffset(uint64_t &Result) const;
   error_code getSize(uint64_t &Result) const;

Modified: llvm/branches/R600/include/llvm/Support/Memory.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/Support/Memory.h?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/Support/Memory.h (original)
+++ llvm/branches/R600/include/llvm/Support/Memory.h Fri Sep 21 12:29:50 2012
@@ -15,6 +15,7 @@
 #define LLVM_SYSTEM_MEMORY_H
 
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/system_error.h"
 #include <string>
 
 namespace llvm {
@@ -43,6 +44,70 @@
   /// @brief An abstraction for memory operations.
   class Memory {
   public:
+    enum ProtectionFlags {
+      MF_READ  = 0x1000000,
+      MF_WRITE = 0x2000000,
+      MF_EXEC  = 0x4000000
+    };
+
+    /// This method allocates a block of memory that is suitable for loading
+    /// dynamically generated code (e.g. JIT). An attempt to allocate
+    /// \p NumBytes bytes of virtual memory is made.
+    /// \p NearBlock may point to an existing allocation in which case
+    /// an attempt is made to allocate more memory near the existing block.
+    /// The actual allocated address is not guaranteed to be near the requested
+    /// address.
+    /// \p Flags is used to set the initial protection flags for the block
+    /// of the memory.
+    /// \p EC [out] returns an object describing any error that occurs.
+    ///
+    /// This method may allocate more than the number of bytes requested.  The
+    /// actual number of bytes allocated is indicated in the returned
+    /// MemoryBlock.
+    ///
+    /// The start of the allocated block must be aligned with the
+    /// system allocation granularity (64K on Windows, page size on Linux).
+    /// If the address following \p NearBlock is not so aligned, it will be
+    /// rounded up to the next allocation granularity boundary.
+    ///
+    /// \r a non-null MemoryBlock if the function was successful, 
+    /// otherwise a null MemoryBlock is with \p EC describing the error.
+    ///
+    /// @brief Allocate mapped memory.
+    static MemoryBlock allocateMappedMemory(size_t NumBytes,
+                                            const MemoryBlock *const NearBlock,
+                                            unsigned Flags,
+                                            error_code &EC);
+
+    /// This method releases a block of memory that was allocated with the
+    /// allocateMappedMemory method. It should not be used to release any
+    /// memory block allocated any other way.
+    /// \p Block describes the memory to be released.
+    ///
+    /// \r error_success if the function was successful, or an error_code
+    /// describing the failure if an error occurred.
+    /// 
+    /// @brief Release mapped memory.
+    static error_code releaseMappedMemory(MemoryBlock &Block);
+
+    /// This method sets the protection flags for a block of memory to the
+    /// state specified by /p Flags.  The behavior is not specified if the
+    /// memory was not allocated using the allocateMappedMemory method.
+    /// \p Block describes the memory block to be protected.
+    /// \p Flags specifies the new protection state to be assigned to the block.
+    /// \p ErrMsg [out] returns a string describing any error that occured.
+    ///
+    /// If \p Flags is MF_WRITE, the actual behavior varies
+    /// with the operating system (i.e. MF_READWRITE on Windows) and the
+    /// target architecture (i.e. MF_WRITE -> MF_READWRITE on i386).
+    ///
+    /// \r error_success if the function was successful, or an error_code
+    /// describing the failure if an error occurred.
+    ///
+    /// @brief Set memory protection state.
+    static error_code protectMappedMemory(const MemoryBlock &Block,
+                                          unsigned Flags);
+
     /// This method allocates a block of Read/Write/Execute memory that is
     /// suitable for executing dynamically generated code (e.g. JIT). An
     /// attempt to allocate \p NumBytes bytes of virtual memory is made.

Modified: llvm/branches/R600/include/llvm/TableGen/Record.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/TableGen/Record.h?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/TableGen/Record.h (original)
+++ llvm/branches/R600/include/llvm/TableGen/Record.h Fri Sep 21 12:29:50 2012
@@ -1328,6 +1328,14 @@
     TrackedRecords(records), TheInit(0) {
     init();
   }
+
+  // When copy-constructing a Record, we must still guarantee a globally unique
+  // ID number.  All other fields can be copied normally.
+  Record(const Record &O) :
+    ID(LastID++), Name(O.Name), Locs(O.Locs), TemplateArgs(O.TemplateArgs),
+    Values(O.Values), SuperClasses(O.SuperClasses),
+    TrackedRecords(O.TrackedRecords), TheInit(O.TheInit) { }
+
   ~Record() {}
 
 
@@ -1609,6 +1617,16 @@
   }
 };
 
+/// LessRecordByID - Sorting predicate to sort record pointers by their
+/// unique ID. If you just need a deterministic order, use this, since it
+/// just compares two `unsigned`; the other sorting predicates require
+/// string manipulation.
+struct LessRecordByID {
+  bool operator()(const Record *LHS, const Record *RHS) const {
+    return LHS->getID() < RHS->getID();
+  }
+};
+
 /// LessRecordFieldName - Sorting predicate to sort record pointers by their
 /// name field.
 ///

Modified: llvm/branches/R600/include/llvm/Transforms/Utils/Cloning.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/Transforms/Utils/Cloning.h?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/Transforms/Utils/Cloning.h (original)
+++ llvm/branches/R600/include/llvm/Transforms/Utils/Cloning.h Fri Sep 21 12:29:50 2012
@@ -116,13 +116,6 @@
                         bool ModuleLevelChanges,
                         ClonedCodeInfo *CodeInfo = 0);
 
-/// CloneFunction - Version of the function that doesn't need the VMap.
-///
-inline Function *CloneFunction(const Function *F, ClonedCodeInfo *CodeInfo = 0){
-  ValueToValueMapTy VMap;
-  return CloneFunction(F, VMap, CodeInfo);
-}
-
 /// Clone OldFunc into NewFunc, transforming the old arguments into references
 /// to VMap values.  Note that if NewFunc already has basic blocks, the ones
 /// cloned into it will be added to the end of the function.  This function

Added: llvm/branches/R600/include/llvm/Transforms/Utils/IntegerDivision.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/Transforms/Utils/IntegerDivision.h?rev=164384&view=auto
==============================================================================
--- llvm/branches/R600/include/llvm/Transforms/Utils/IntegerDivision.h (added)
+++ llvm/branches/R600/include/llvm/Transforms/Utils/IntegerDivision.h Fri Sep 21 12:29:50 2012
@@ -0,0 +1,38 @@
+//===- llvm/Transforms/Utils/IntegerDivision.h ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an implementation of 32bit integer division for targets
+// that don't have native support. It's largely derived from compiler-rt's
+// implementation of __udivsi3, but hand-tuned for targets that prefer less
+// control flow.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TRANSFORMS_UTILS_INTEGERDIVISION_H
+#define TRANSFORMS_UTILS_INTEGERDIVISION_H
+
+namespace llvm {
+  class BinaryOperator;
+}
+
+namespace llvm {
+
+  /// Generate code to divide two integers, replacing Div with the generated
+  /// code. This currently generates code similarly to compiler-rt's
+  /// implementations, but future work includes generating more specialized code
+  /// when more information about the operands are known. Currently only
+  /// implements 32bit scalar division, but future work is removing this
+  /// limitation.
+  ///
+  /// @brief Replace Div with generated code.
+  bool expandDivision(BinaryOperator* Div);
+
+} // End llvm namespace
+
+#endif

Modified: llvm/branches/R600/lib/Analysis/BasicAliasAnalysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Analysis/BasicAliasAnalysis.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Analysis/BasicAliasAnalysis.cpp (original)
+++ llvm/branches/R600/lib/Analysis/BasicAliasAnalysis.cpp Fri Sep 21 12:29:50 2012
@@ -1086,7 +1086,7 @@
       // We assume for the recursion that the the phis (ptr_phi, ptr2_phi) do
       // not alias each other.
       bool ArePhisAssumedNoAlias = false;
-      AliasResult OrigAliasResult;
+      AliasResult OrigAliasResult = NoAlias;
       if (Alias == NoAlias) {
         // Pretend the phis do not alias.
         assert(AliasCache.count(Locs) &&

Modified: llvm/branches/R600/lib/Analysis/InlineCost.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Analysis/InlineCost.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Analysis/InlineCost.cpp (original)
+++ llvm/branches/R600/lib/Analysis/InlineCost.cpp Fri Sep 21 12:29:50 2012
@@ -51,9 +51,12 @@
   int Cost;
   const bool AlwaysInline;
 
-  bool IsRecursive;
+  bool IsCallerRecursive;
+  bool IsRecursiveCall;
   bool ExposesReturnsTwice;
   bool HasDynamicAlloca;
+  /// Number of bytes allocated statically by the callee.
+  uint64_t AllocatedSize;
   unsigned NumInstructions, NumVectorInstructions;
   int FiftyPercentVectorBonus, TenPercentVectorBonus;
   int VectorBonus;
@@ -126,7 +129,8 @@
   CallAnalyzer(const TargetData *TD, Function &Callee, int Threshold)
     : TD(TD), F(Callee), Threshold(Threshold), Cost(0),
       AlwaysInline(F.hasFnAttr(Attribute::AlwaysInline)),
-      IsRecursive(false), ExposesReturnsTwice(false), HasDynamicAlloca(false),
+      IsCallerRecursive(false), IsRecursiveCall(false),
+      ExposesReturnsTwice(false), HasDynamicAlloca(false), AllocatedSize(0),
       NumInstructions(0), NumVectorInstructions(0),
       FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
       NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
@@ -269,6 +273,13 @@
   // FIXME: Check whether inlining will turn a dynamic alloca into a static
   // alloca, and handle that case.
 
+  // Accumulate the allocated size.
+  if (I.isStaticAlloca()) {
+    Type *Ty = I.getAllocatedType();
+    AllocatedSize += (TD ? TD->getTypeAllocSize(Ty) :
+                      Ty->getPrimitiveSizeInBits());
+  }
+
   // We will happily inline static alloca instructions or dynamic alloca
   // instructions in always-inline situations.
   if (AlwaysInline || I.isStaticAlloca())
@@ -625,7 +636,7 @@
     if (F == CS.getInstruction()->getParent()->getParent()) {
       // This flag will fully abort the analysis, so don't bother with anything
       // else.
-      IsRecursive = true;
+      IsRecursiveCall = true;
       return false;
     }
 
@@ -712,7 +723,14 @@
       Cost += InlineConstants::InstrCost;
 
     // If the visit this instruction detected an uninlinable pattern, abort.
-    if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca)
+    if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
+      return false;
+
+    // If the caller is a recursive function then we don't want to inline
+    // functions which allocate a lot of stack space because it would increase
+    // the caller stack usage dramatically.
+    if (IsCallerRecursive &&
+        AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
       return false;
 
     if (NumVectorInstructions > NumInstructions/2)
@@ -831,12 +849,14 @@
       Cost += InlineConstants::LastCallToStaticBonus;
 
     // If the instruction after the call, or if the normal destination of the
-    // invoke is an unreachable instruction, the function is noreturn.  As such,
-    // there is little point in inlining this unless there is literally zero cost.
-    if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
+    // invoke is an unreachable instruction, the function is noreturn. As such,
+    // there is little point in inlining this unless there is literally zero
+    // cost.
+    Instruction *Instr = CS.getInstruction();
+    if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
       if (isa<UnreachableInst>(II->getNormalDest()->begin()))
         Threshold = 1;
-    } else if (isa<UnreachableInst>(++BasicBlock::iterator(CS.getInstruction())))
+    } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr)))
       Threshold = 1;
 
     // If this function uses the coldcc calling convention, prefer not to inline
@@ -852,6 +872,20 @@
   if (F.empty())
     return true;
 
+  Function *Caller = CS.getInstruction()->getParent()->getParent();
+  // Check if the caller function is recursive itself.
+  for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end();
+       U != E; ++U) {
+    CallSite Site(cast<Value>(*U));
+    if (!Site)
+      continue;
+    Instruction *I = Site.getInstruction();
+    if (I->getParent()->getParent() == Caller) {
+      IsCallerRecursive = true;
+      break;
+    }
+  }
+
   // Track whether we've seen a return instruction. The first return
   // instruction is free, as at least one will usually disappear in inlining.
   bool HasReturn = false;
@@ -908,9 +942,9 @@
 
     // We never want to inline functions that contain an indirectbr.  This is
     // incorrect because all the blockaddress's (in static global initializers
-    // for example) would be referring to the original function, and this indirect
-    // jump would jump from the inlined copy of the function into the original
-    // function which is extremely undefined behavior.
+    // for example) would be referring to the original function, and this
+    // indirect jump would jump from the inlined copy of the function into the 
+    // original function which is extremely undefined behavior.
     // FIXME: This logic isn't really right; we can safely inline functions
     // with indirectbr's as long as no other function or global references the
     // blockaddress of a block within the current function.  And as a QOI issue,
@@ -928,8 +962,16 @@
     // Analyze the cost of this block. If we blow through the threshold, this
     // returns false, and we can bail on out.
     if (!analyzeBlock(BB)) {
-      if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca)
+      if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
         return false;
+
+      // If the caller is a recursive function then we don't want to inline
+      // functions which allocate a lot of stack space because it would increase
+      // the caller stack usage dramatically.
+      if (IsCallerRecursive &&
+          AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
+        return false;
+
       break;
     }
 
@@ -955,7 +997,8 @@
 
     // If we're unable to select a particular successor, just count all of
     // them.
-    for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; ++TIdx)
+    for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
+         ++TIdx)
       BBWorklist.insert(TI->getSuccessor(TIdx));
 
     // If we had any successors at this point, than post-inlining is likely to
@@ -1003,7 +1046,8 @@
       Callee->hasFnAttr(Attribute::NoInline) || CS.isNoInline())
     return llvm::InlineCost::getNever();
 
-  DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName() << "...\n");
+  DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
+        << "...\n");
 
   CallAnalyzer CA(TD, *Callee, Threshold);
   bool ShouldInline = CA.analyzeCall(CS);

Modified: llvm/branches/R600/lib/AsmParser/LLParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/AsmParser/LLParser.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/AsmParser/LLParser.cpp (original)
+++ llvm/branches/R600/lib/AsmParser/LLParser.cpp Fri Sep 21 12:29:50 2012
@@ -967,7 +967,7 @@
       unsigned Alignment;
       if (ParseOptionalStackAlignment(Alignment))
         return true;
-      Attrs |= Attribute::constructStackAlignmentFromInt(Alignment);
+      Attrs |= Attributes::constructStackAlignmentFromInt(Alignment);
       continue;
     }
 
@@ -975,7 +975,7 @@
       unsigned Alignment;
       if (ParseOptionalAlignment(Alignment))
         return true;
-      Attrs |= Attribute::constructAlignmentFromInt(Alignment);
+      Attrs |= Attributes::constructAlignmentFromInt(Alignment);
       continue;
     }
 
@@ -2717,7 +2717,7 @@
 
   // If the alignment was parsed as an attribute, move to the alignment field.
   if (FuncAttrs & Attribute::Alignment) {
-    Alignment = Attribute::getAlignmentFromAttrs(FuncAttrs);
+    Alignment = FuncAttrs.getAlignment();
     FuncAttrs &= ~Attribute::Alignment;
   }
 
@@ -2726,16 +2726,16 @@
   std::vector<Type*> ParamTypeList;
   SmallVector<AttributeWithIndex, 8> Attrs;
 
-  if (RetAttrs != Attribute::None)
+  if (RetAttrs.hasAttributes())
     Attrs.push_back(AttributeWithIndex::get(0, RetAttrs));
 
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
     ParamTypeList.push_back(ArgList[i].Ty);
-    if (ArgList[i].Attrs != Attribute::None)
+    if (ArgList[i].Attrs.hasAttributes())
       Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
   }
 
-  if (FuncAttrs != Attribute::None)
+  if (FuncAttrs.hasAttributes())
     Attrs.push_back(AttributeWithIndex::get(~0, FuncAttrs));
 
   AttrListPtr PAL = AttrListPtr::get(Attrs);
@@ -3253,7 +3253,7 @@
 
   // Set up the Attributes for the function.
   SmallVector<AttributeWithIndex, 8> Attrs;
-  if (RetAttrs != Attribute::None)
+  if (RetAttrs.hasAttributes())
     Attrs.push_back(AttributeWithIndex::get(0, RetAttrs));
 
   SmallVector<Value*, 8> Args;
@@ -3274,14 +3274,14 @@
       return Error(ArgList[i].Loc, "argument is not of expected type '" +
                    getTypeString(ExpectedTy) + "'");
     Args.push_back(ArgList[i].V);
-    if (ArgList[i].Attrs != Attribute::None)
+    if (ArgList[i].Attrs.hasAttributes())
       Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
   }
 
   if (I != E)
     return Error(CallLoc, "not enough parameters specified for call");
 
-  if (FnAttrs != Attribute::None)
+  if (FnAttrs.hasAttributes())
     Attrs.push_back(AttributeWithIndex::get(~0, FnAttrs));
 
   // Finish off the Attributes and check them
@@ -3649,7 +3649,7 @@
 
   // Set up the Attributes for the function.
   SmallVector<AttributeWithIndex, 8> Attrs;
-  if (RetAttrs != Attribute::None)
+  if (RetAttrs.hasAttributes())
     Attrs.push_back(AttributeWithIndex::get(0, RetAttrs));
 
   SmallVector<Value*, 8> Args;
@@ -3670,14 +3670,14 @@
       return Error(ArgList[i].Loc, "argument is not of expected type '" +
                    getTypeString(ExpectedTy) + "'");
     Args.push_back(ArgList[i].V);
-    if (ArgList[i].Attrs != Attribute::None)
+    if (ArgList[i].Attrs.hasAttributes())
       Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
   }
 
   if (I != E)
     return Error(CallLoc, "not enough parameters specified for call");
 
-  if (FnAttrs != Attribute::None)
+  if (FnAttrs.hasAttributes())
     Attrs.push_back(AttributeWithIndex::get(~0, FnAttrs));
 
   // Finish off the Attributes and check them

Modified: llvm/branches/R600/lib/Bitcode/Reader/BitcodeReader.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Bitcode/Reader/BitcodeReader.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Bitcode/Reader/BitcodeReader.cpp (original)
+++ llvm/branches/R600/lib/Bitcode/Reader/BitcodeReader.cpp Fri Sep 21 12:29:50 2012
@@ -52,6 +52,8 @@
   std::vector<Function*>().swap(FunctionsWithBodies);
   DeferredFunctionInfo.clear();
   MDKindMap.clear();
+
+  assert(BlockAddrFwdRefs.empty() && "Unresolved blockaddress fwd references");
 }
 
 //===----------------------------------------------------------------------===//
@@ -1300,13 +1302,27 @@
       Function *Fn =
         dyn_cast_or_null<Function>(ValueList.getConstantFwdRef(Record[1],FnTy));
       if (Fn == 0) return Error("Invalid CE_BLOCKADDRESS record");
-      
-      GlobalVariable *FwdRef = new GlobalVariable(*Fn->getParent(),
-                                                  Type::getInt8Ty(Context),
+
+      // If the function is already parsed we can insert the block address right
+      // away.
+      if (!Fn->empty()) {
+        Function::iterator BBI = Fn->begin(), BBE = Fn->end();
+        for (size_t I = 0, E = Record[2]; I != E; ++I) {
+          if (BBI == BBE)
+            return Error("Invalid blockaddress block #");
+          ++BBI;
+        }
+        V = BlockAddress::get(Fn, BBI);
+      } else {
+        // Otherwise insert a placeholder and remember it so it can be inserted
+        // when the function is parsed.
+        GlobalVariable *FwdRef = new GlobalVariable(*Fn->getParent(),
+                                                    Type::getInt8Ty(Context),
                                             false, GlobalValue::InternalLinkage,
-                                                  0, "");
-      BlockAddrFwdRefs[Fn].push_back(std::make_pair(Record[2], FwdRef));
-      V = FwdRef;
+                                                    0, "");
+        BlockAddrFwdRefs[Fn].push_back(std::make_pair(Record[2], FwdRef));
+        V = FwdRef;
+      }
       break;
     }  
     }

Modified: llvm/branches/R600/lib/CodeGen/Analysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/CodeGen/Analysis.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/CodeGen/Analysis.cpp (original)
+++ llvm/branches/R600/lib/CodeGen/Analysis.cpp Fri Sep 21 12:29:50 2012
@@ -318,7 +318,7 @@
     return false;
 
   // It's not safe to eliminate the sign / zero extension of the return value.
-  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+  if (CallerRetAttr.hasZExtAttr() || CallerRetAttr.hasSExtAttr())
     return false;
 
   // Otherwise, make sure the unmodified return value of I is the return value.
@@ -358,7 +358,7 @@
     return false;
 
   // It's not safe to eliminate the sign / zero extension of the return value.
-  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+  if (CallerRetAttr.hasZExtAttr() || CallerRetAttr.hasSExtAttr())
     return false;
 
   // Check if the only use is a function return node.

Modified: llvm/branches/R600/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp (original)
+++ llvm/branches/R600/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp Fri Sep 21 12:29:50 2012
@@ -1109,9 +1109,6 @@
         addType(Arg, ATy);
         if (ATy.isArtificial())
           addFlag(Arg, dwarf::DW_AT_artificial);
-        if (ATy.isObjectPointer())
-          addDIEEntry(SPDie, dwarf::DW_AT_object_pointer, dwarf::DW_FORM_ref4,
-                      Arg);
         SPDie->addChild(Arg);
       }
   }

Modified: llvm/branches/R600/lib/CodeGen/LiveIntervalAnalysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/CodeGen/LiveIntervalAnalysis.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/CodeGen/LiveIntervalAnalysis.cpp (original)
+++ llvm/branches/R600/lib/CodeGen/LiveIntervalAnalysis.cpp Fri Sep 21 12:29:50 2012
@@ -742,9 +742,6 @@
 void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill,
                                SmallVectorImpl<SlotIndex> *EndPoints) {
   LiveRangeQuery LRQ(*LI, Kill);
-  assert (!LRQ.valueDefined() && "Can't prune value at the defining instr");
-
-  // Also can't prune a value that isn't there.
   VNInfo *VNI = LRQ.valueOut();
   if (!VNI)
     return;

Modified: llvm/branches/R600/lib/CodeGen/LiveRegMatrix.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/CodeGen/LiveRegMatrix.h?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/CodeGen/LiveRegMatrix.h (original)
+++ llvm/branches/R600/lib/CodeGen/LiveRegMatrix.h Fri Sep 21 12:29:50 2012
@@ -15,7 +15,7 @@
 // Register units are defined in MCRegisterInfo.h, they represent the smallest
 // unit of interference when dealing with overlapping physical registers. The
 // LiveRegMatrix is represented as a LiveIntervalUnion per register unit. When
-// a virtual register is assigned to a physicval register, the live range for
+// a virtual register is assigned to a physical register, the live range for
 // the virtual register is inserted into the LiveIntervalUnion for each regunit
 // in the physreg.
 //

Modified: llvm/branches/R600/lib/CodeGen/MachineFunction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/CodeGen/MachineFunction.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/CodeGen/MachineFunction.cpp (original)
+++ llvm/branches/R600/lib/CodeGen/MachineFunction.cpp Fri Sep 21 12:29:50 2012
@@ -60,8 +60,8 @@
   MFInfo = 0;
   FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering());
   if (Fn->hasFnAttr(Attribute::StackAlignment))
-    FrameInfo->ensureMaxAlignment(Attribute::getStackAlignmentFromAttrs(
-        Fn->getAttributes().getFnAttributes()));
+    FrameInfo->ensureMaxAlignment(Fn->getAttributes().
+                                  getFnAttributes().getStackAlignment());
   ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData());
   Alignment = TM.getTargetLowering()->getMinFunctionAlignment();
   // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.

Modified: llvm/branches/R600/lib/CodeGen/RegisterCoalescer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/CodeGen/RegisterCoalescer.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/CodeGen/RegisterCoalescer.cpp (original)
+++ llvm/branches/R600/lib/CodeGen/RegisterCoalescer.cpp Fri Sep 21 12:29:50 2012
@@ -55,6 +55,8 @@
 STATISTIC(numExtends  , "Number of copies extended");
 STATISTIC(NumReMats   , "Number of instructions re-materialized");
 STATISTIC(NumInflated , "Number of register classes inflated");
+STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
+STATISTIC(NumLaneResolves,  "Number of dead lane conflicts resolved");
 
 static cl::opt<bool>
 EnableJoining("join-liveintervals",
@@ -1244,8 +1246,15 @@
     // Value in the other live range that overlaps this def, if any.
     VNInfo *OtherVNI;
 
+    // True when the live range of this value will be pruned because of an
+    // overlapping CR_Replace value in the other live range.
+    bool Pruned;
+
+    // True once Pruned above has been computed.
+    bool PrunedComputed;
+
     Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0),
-            RedefVNI(0), OtherVNI(0) {}
+            RedefVNI(0), OtherVNI(0), Pruned(false), PrunedComputed(false) {}
 
     bool isAnalyzed() const { return WriteLanes != 0; }
   };
@@ -1257,6 +1266,10 @@
   VNInfo *stripCopies(VNInfo *VNI);
   ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
   void computeAssignment(unsigned ValNo, JoinVals &Other);
+  bool taintExtent(unsigned, unsigned, JoinVals&,
+                   SmallVectorImpl<std::pair<SlotIndex, unsigned> >&);
+  bool usesLanes(MachineInstr *MI, unsigned, unsigned, unsigned);
+  bool isPrunedValue(unsigned ValNo, JoinVals &Other);
 
 public:
   JoinVals(LiveInterval &li, unsigned subIdx,
@@ -1389,7 +1402,6 @@
   // values should be merged into one, but not into any preceding value.
   // The first value defined or visited gets CR_Keep, the other gets CR_Merge.
   if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) {
-    DEBUG(dbgs() << "\t\tDouble def: " << VNI->def << '\n');
     assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ");
 
     // One value stays, the other is merged. Keep the earlier one, or the first
@@ -1407,10 +1419,14 @@
     // Keep this value, check for conflicts when analyzing OtherVNI.
     if (!OtherV.isAnalyzed())
       return CR_Keep;
-    // Both sides have been analyzed now. Do they conflict?
+    // Both sides have been analyzed now.
+    // Allow overlapping PHI values. Any real interference would show up in a
+    // predecessor, the PHI itself can't introduce any conflicts.
+    if (VNI->isPHIDef())
+      return CR_Merge;
     if (V.ValidLanes & OtherV.ValidLanes)
-      // Overlapping lanes can't be resolved now, maybe later.
-      return CR_Unresolved;
+      // Overlapping lanes can't be resolved.
+      return CR_Impossible;
     else
       return CR_Merge;
   }
@@ -1428,9 +1444,10 @@
   Other.computeAssignment(V.OtherVNI->id, *this);
   const Val &OtherV = Other.Vals[V.OtherVNI->id];
 
-  // Don't attempt resolving PHI values for now.
+  // Allow overlapping PHI values. Any real interference would show up in a
+  // predecessor, the PHI itself can't introduce any conflicts.
   if (VNI->isPHIDef())
-    return CR_Impossible;
+    return CR_Replace;
 
   // Check for simple erasable conflicts.
   if (DefMI->isImplicitDef())
@@ -1474,9 +1491,28 @@
   if ((V.WriteLanes & OtherV.ValidLanes) == 0)
     return CR_Replace;
 
-  // FIXME: Identify CR_Replace opportunities where the clobbered lanes are
-  // dead.
-  return CR_Impossible;
+  // VNI is clobbering live lanes in OtherVNI, but there is still the
+  // possibility that no instructions actually read the clobbered lanes.
+  // If we're clobbering all the lanes in OtherVNI, at least one must be read.
+  // Otherwise Other.LI wouldn't be live here.
+  if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes) == 0)
+    return CR_Impossible;
+
+  // We need to verify that no instructions are reading the clobbered lanes. To
+  // save compile time, we'll only check that locally. Don't allow the tainted
+  // value to escape the basic block.
+  MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+  if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB))
+    return CR_Impossible;
+
+  // There are still some things that could go wrong besides clobbered lanes
+  // being read, for example OtherVNI may be only partially redefined in MBB,
+  // and some clobbered lanes could escape the block. Save this analysis for
+  // resolveConflicts() when all values have been mapped. We need to know
+  // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute
+  // that now - the recursive analyzeValue() calls must go upwards in the
+  // dominator tree.
+  return CR_Unresolved;
 }
 
 /// Compute the value assignment for ValNo in LI.
@@ -1503,6 +1539,12 @@
                  << V.OtherVNI->def << " --> @"
                  << NewVNInfo[Assignments[ValNo]]->def << '\n');
     break;
+  case CR_Replace:
+  case CR_Unresolved:
+    // The other value is going to be pruned if this join is successful.
+    assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
+    Other.Vals[V.OtherVNI->id].Pruned = true;
+    // Fall through.
   default:
     // This value number needs to go in the final joined live range.
     Assignments[ValNo] = NewVNInfo.size();
@@ -1523,28 +1565,192 @@
   return true;
 }
 
+/// Assuming ValNo is going to clobber some valid lanes in Other.LI, compute
+/// the extent of the tainted lanes in the block.
+///
+/// Multiple values in Other.LI can be affected since partial redefinitions can
+/// preserve previously tainted lanes.
+///
+///   1 %dst = VLOAD           <-- Define all lanes in %dst
+///   2 %src = FOO             <-- ValNo to be joined with %dst:ssub0
+///   3 %dst:ssub1 = BAR       <-- Partial redef doesn't clear taint in ssub0
+///   4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
+///
+/// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
+/// entry to TaintedVals.
+///
+/// Returns false if the tainted lanes extend beyond the basic block.
+bool JoinVals::
+taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other,
+            SmallVectorImpl<std::pair<SlotIndex, unsigned> > &TaintExtent) {
+  VNInfo *VNI = LI.getValNumInfo(ValNo);
+  MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+  SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
+
+  // Scan Other.LI from VNI.def to MBBEnd.
+  LiveInterval::iterator OtherI = Other.LI.find(VNI->def);
+  assert(OtherI != Other.LI.end() && "No conflict?");
+  do {
+    // OtherI is pointing to a tainted value. Abort the join if the tainted
+    // lanes escape the block.
+    SlotIndex End = OtherI->end;
+    if (End >= MBBEnd) {
+      DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.LI.reg) << ':'
+                   << OtherI->valno->id << '@' << OtherI->start << '\n');
+      return false;
+    }
+    DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.LI.reg) << ':'
+                 << OtherI->valno->id << '@' << OtherI->start
+                 << " to " << End << '\n');
+    // A dead def is not a problem.
+    if (End.isDead())
+      break;
+    TaintExtent.push_back(std::make_pair(End, TaintedLanes));
+
+    // Check for another def in the MBB.
+    if (++OtherI == Other.LI.end() || OtherI->start >= MBBEnd)
+      break;
+
+    // Lanes written by the new def are no longer tainted.
+    const Val &OV = Other.Vals[OtherI->valno->id];
+    TaintedLanes &= ~OV.WriteLanes;
+    if (!OV.RedefVNI)
+      break;
+  } while (TaintedLanes);
+  return true;
+}
+
+/// Return true if MI uses any of the given Lanes from Reg.
+/// This does not include partial redefinitions of Reg.
+bool JoinVals::usesLanes(MachineInstr *MI, unsigned Reg, unsigned SubIdx,
+                         unsigned Lanes) {
+  if (MI->isDebugValue())
+    return false;
+  for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+    if (!MO->isReg() || MO->isDef() || MO->getReg() != Reg)
+      continue;
+    if (!MO->readsReg())
+      continue;
+    if (Lanes &
+        TRI->getSubRegIndexLaneMask(compose(*TRI, SubIdx, MO->getSubReg())))
+      return true;
+  }
+  return false;
+}
+
 bool JoinVals::resolveConflicts(JoinVals &Other) {
   for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
-    assert (Vals[i].Resolution != CR_Impossible && "Unresolvable conflict");
-    if (Vals[i].Resolution != CR_Unresolved)
+    Val &V = Vals[i];
+    assert (V.Resolution != CR_Impossible && "Unresolvable conflict");
+    if (V.Resolution != CR_Unresolved)
       continue;
-    // FIXME: Actually resolve dead lane conflicts.
     DEBUG(dbgs() << "\t\tconflict at " << PrintReg(LI.reg) << ':' << i
                  << '@' << LI.getValNumInfo(i)->def << '\n');
-    return false;
+    ++NumLaneConflicts;
+    assert(V.OtherVNI && "Inconsistent conflict resolution.");
+    VNInfo *VNI = LI.getValNumInfo(i);
+    const Val &OtherV = Other.Vals[V.OtherVNI->id];
+
+    // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
+    // join, those lanes will be tainted with a wrong value. Get the extent of
+    // the tainted lanes.
+    unsigned TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
+    SmallVector<std::pair<SlotIndex, unsigned>, 8> TaintExtent;
+    if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
+      // Tainted lanes would extend beyond the basic block.
+      return false;
+
+    assert(!TaintExtent.empty() && "There should be at least one conflict.");
+
+    // Now look at the instructions from VNI->def to TaintExtent (inclusive).
+    MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+    MachineBasicBlock::iterator MI = MBB->begin();
+    if (!VNI->isPHIDef()) {
+      MI = Indexes->getInstructionFromIndex(VNI->def);
+      // No need to check the instruction defining VNI for reads.
+      ++MI;
+    }
+    assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) &&
+           "Interference ends on VNI->def. Should have been handled earlier");
+    MachineInstr *LastMI =
+      Indexes->getInstructionFromIndex(TaintExtent.front().first);
+    assert(LastMI && "Range must end at a proper instruction");
+    unsigned TaintNum = 0;
+    for(;;) {
+      assert(MI != MBB->end() && "Bad LastMI");
+      if (usesLanes(MI, Other.LI.reg, Other.SubIdx, TaintedLanes)) {
+        DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
+        return false;
+      }
+      // LastMI is the last instruction to use the current value.
+      if (&*MI == LastMI) {
+        if (++TaintNum == TaintExtent.size())
+          break;
+        LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first);
+        assert(LastMI && "Range must end at a proper instruction");
+        TaintedLanes = TaintExtent[TaintNum].second;
+      }
+      ++MI;
+    }
+
+    // The tainted lanes are unused.
+    V.Resolution = CR_Replace;
+    ++NumLaneResolves;
   }
   return true;
 }
 
+// Determine if ValNo is a copy of a value number in LI or Other.LI that will
+// be pruned:
+//
+//   %dst = COPY %src
+//   %src = COPY %dst  <-- This value to be pruned.
+//   %dst = COPY %src  <-- This value is a copy of a pruned value.
+//
+bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
+  Val &V = Vals[ValNo];
+  if (V.Pruned || V.PrunedComputed)
+    return V.Pruned;
+
+  if (V.Resolution != CR_Erase && V.Resolution != CR_Merge)
+    return V.Pruned;
+
+  // Follow copies up the dominator tree and check if any intermediate value
+  // has been pruned.
+  V.PrunedComputed = true;
+  V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this);
+  return V.Pruned;
+}
+
 void JoinVals::pruneValues(JoinVals &Other,
                            SmallVectorImpl<SlotIndex> &EndPoints) {
   for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
-    if (Vals[i].Resolution != CR_Replace)
-      continue;
     SlotIndex Def = LI.getValNumInfo(i)->def;
-    LIS->pruneValue(&Other.LI, Def, &EndPoints);
-    DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def
-                 << ": " << Other.LI << '\n');
+    switch (Vals[i].Resolution) {
+    case CR_Keep:
+      break;
+    case CR_Replace:
+      // This value takes precedence over the value in Other.LI.
+      LIS->pruneValue(&Other.LI, Def, &EndPoints);
+      DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def
+                   << ": " << Other.LI << '\n');
+      break;
+    case CR_Erase:
+    case CR_Merge:
+      if (isPrunedValue(i, Other)) {
+        // This value is ultimately a copy of a pruned value in LI or Other.LI.
+        // We can no longer trust the value mapping computed by
+        // computeAssignment(), the value that was originally copied could have
+        // been replaced.
+        LIS->pruneValue(&LI, Def, &EndPoints);
+        DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(LI.reg) << " at "
+                     << Def << ": " << LI << '\n');
+      }
+      break;
+    case CR_Unresolved:
+    case CR_Impossible:
+      llvm_unreachable("Unresolved conflicts");
+    }
   }
 }
 

Modified: llvm/branches/R600/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/branches/R600/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Sep 21 12:29:50 2012
@@ -1644,7 +1644,8 @@
     return N0.getOperand(0);
   // fold C2-(A+C1) -> (C2-C1)-A
   if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
-    SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT);
+    SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
+                                   VT);
     return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC,
                        N1.getOperand(0));
   }
@@ -2346,16 +2347,19 @@
   // we don't want to undo this promotion.
   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
   // on scalars.
-  if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR)
-      && Level == AfterLegalizeTypes) {
+  if ((N0.getOpcode() == ISD::BITCAST ||
+       N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
+      Level == AfterLegalizeTypes) {
     SDValue In0 = N0.getOperand(0);
     SDValue In1 = N1.getOperand(0);
     EVT In0Ty = In0.getValueType();
     EVT In1Ty = In1.getValueType();
-    // If both incoming values are integers, and the original types are the same.
+    DebugLoc DL = N->getDebugLoc();
+    // If both incoming values are integers, and the original types are the
+    // same.
     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
-      SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1);
-      SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op);
+      SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
+      SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
       AddToWorkList(Op.getNode());
       return BC;
     }
@@ -4057,7 +4061,8 @@
   if (VT.isInteger() &&
       (VT0 == MVT::i1 ||
        (VT0.isInteger() &&
-        TLI.getBooleanContents(false) == TargetLowering::ZeroOrOneBooleanContent)) &&
+        TLI.getBooleanContents(false) ==
+        TargetLowering::ZeroOrOneBooleanContent)) &&
       N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
     SDValue XORNode;
     if (VT == VT0)
@@ -5676,12 +5681,12 @@
     return N0;
   // fold (fadd A, (fneg B)) -> (fsub A, B)
   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
-      isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
+    isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
     return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
                        GetNegatedExpression(N1, DAG, LegalOperations));
   // fold (fadd (fneg A), B) -> (fsub B, A)
   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
-      isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
+    isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
     return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
                        GetNegatedExpression(N0, DAG, LegalOperations));
 
@@ -7711,9 +7716,9 @@
 
   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
   // We only perform this optimization before the op legalization phase because
-  // we may introduce new vector instructions which are not backed by TD patterns.
-  // For example on AVX, extracting elements from a wide vector without using
-  // extract_subvector.
+  // we may introduce new vector instructions which are not backed by TD
+  // patterns. For example on AVX, extracting elements from a wide vector
+  // without using extract_subvector.
   if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
       && ConstEltNo && !LegalOperations) {
     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
@@ -8097,7 +8102,8 @@
     // If VecIn2 is unused then change it to undef.
     VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
 
-    // Check that we were able to transform all incoming values to the same type.
+    // Check that we were able to transform all incoming values to the same
+    // type.
     if (VecIn2.getValueType() != VecIn1.getValueType() ||
         VecIn1.getValueType() != VT)
           return SDValue();

Modified: llvm/branches/R600/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/branches/R600/lib/CodeGen/SelectionDAG/TargetLowering.cpp Fri Sep 21 12:29:50 2012
@@ -996,9 +996,9 @@
     EVT VT = ValueVTs[j];
     ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
 
-    if (attr & Attribute::SExt)
+    if (attr.hasSExtAttr())
       ExtendKind = ISD::SIGN_EXTEND;
-    else if (attr & Attribute::ZExt)
+    else if (attr.hasZExtAttr())
       ExtendKind = ISD::ZERO_EXTEND;
 
     // FIXME: C calling convention requires the return type to be promoted to
@@ -1016,18 +1016,17 @@
 
     // 'inreg' on function refers to return value
     ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
-    if (attr & Attribute::InReg)
+    if (attr.hasInRegAttr())
       Flags.setInReg();
 
     // Propagate extension type if any
-    if (attr & Attribute::SExt)
+    if (attr.hasSExtAttr())
       Flags.setSExt();
-    else if (attr & Attribute::ZExt)
+    else if (attr.hasZExtAttr())
       Flags.setZExt();
 
-    for (unsigned i = 0; i < NumParts; ++i) {
+    for (unsigned i = 0; i < NumParts; ++i)
       Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true));
-    }
   }
 }
 

Modified: llvm/branches/R600/lib/MC/MCAssembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/MC/MCAssembler.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/MC/MCAssembler.cpp (original)
+++ llvm/branches/R600/lib/MC/MCAssembler.cpp Fri Sep 21 12:29:50 2012
@@ -199,8 +199,7 @@
                          MCCodeEmitter &Emitter_, MCObjectWriter &Writer_,
                          raw_ostream &OS_)
   : Context(Context_), Backend(Backend_), Emitter(Emitter_), Writer(Writer_),
-    OS(OS_), RelaxAll(false), NoExecStack(false), SubsectionsViaSymbols(false)
-{
+    OS(OS_), RelaxAll(false), NoExecStack(false), SubsectionsViaSymbols(false) {
 }
 
 MCAssembler::~MCAssembler() {
@@ -533,7 +532,7 @@
   }
 
   uint64_t Start = getWriter().getStream().tell();
-  (void) Start;
+  (void)Start;
 
   for (MCSectionData::const_iterator it = SD->begin(),
          ie = SD->end(); it != ie; ++it)

Modified: llvm/branches/R600/lib/MC/MCParser/AsmLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/MC/MCParser/AsmLexer.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/MC/MCParser/AsmLexer.cpp (original)
+++ llvm/branches/R600/lib/MC/MCParser/AsmLexer.cpp Fri Sep 21 12:29:50 2012
@@ -396,8 +396,17 @@
   case 0:
   case ' ':
   case '\t':
-    // Ignore whitespace.
-    return LexToken();
+    if (SkipSpace) {
+      // Ignore whitespace.
+      return LexToken();
+    } else {
+      int len = 1;
+      while (*CurPtr==' ' || *CurPtr=='\t') {
+        CurPtr++;
+        len++;
+      }
+      return AsmToken(AsmToken::Space, StringRef(TokStart, len));
+    }
   case '\n': // FALL THROUGH.
   case '\r':
     isAtStartOfLine = true;

Modified: llvm/branches/R600/lib/MC/MCParser/AsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/MC/MCParser/AsmParser.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/MC/MCParser/AsmParser.cpp (original)
+++ llvm/branches/R600/lib/MC/MCParser/AsmParser.cpp Fri Sep 21 12:29:50 2012
@@ -47,7 +47,7 @@
 /// \brief Helper class for tracking macro definitions.
 typedef std::vector<AsmToken> MacroArgument;
 typedef std::vector<MacroArgument> MacroArguments;
-typedef StringRef MacroParameter;
+typedef std::pair<StringRef, MacroArgument> MacroParameter;
 typedef std::vector<MacroParameter> MacroParameters;
 
 struct Macro {
@@ -130,6 +130,9 @@
   /// AssemblerDialect. ~OU means unset value and use value provided by MAI.
   unsigned AssemblerDialect;
 
+  /// IsDarwin - is Darwin compatibility enabled?
+  bool IsDarwin;
+
 public:
   AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
             const MCAsmInfo &MAI);
@@ -209,7 +212,8 @@
 
   virtual void EatToEndOfStatement();
 
-  bool ParseMacroArgument(MacroArgument &MA);
+  bool ParseMacroArgument(MacroArgument &MA,
+                          AsmToken::TokenKind &ArgumentDelimiter);
   bool ParseMacroArguments(const Macro *M, MacroArguments &A);
 
   /// \brief Parse up to the end of statement and a return the contents from the
@@ -407,8 +411,8 @@
                      MCStreamer &_Out, const MCAsmInfo &_MAI)
   : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
     GenericParser(new GenericAsmParser), PlatformParser(0),
-    CurBuffer(0), MacrosEnabled(true), CppHashLineNumber(0), 
-    AssemblerDialect(~0U) {
+    CurBuffer(0), MacrosEnabled(true), CppHashLineNumber(0),
+    AssemblerDialect(~0U), IsDarwin(false) {
   // Save the old handler.
   SavedDiagHandler = SrcMgr.getDiagHandler();
   SavedDiagContext = SrcMgr.getDiagContext();
@@ -429,6 +433,7 @@
   } else if (_MAI.hasSubsectionsViaSymbols()) {
     PlatformParser = createDarwinAsmParser();
     PlatformParser->Initialize(*this);
+    IsDarwin = true;
   } else {
     PlatformParser = createELFAsmParser();
     PlatformParser->Initialize(*this);
@@ -1471,6 +1476,8 @@
   if (NParameters != 0 && NParameters != A.size())
     return Error(L, "Wrong number of arguments");
 
+  // A macro without parameters is handled differently on Darwin:
+  // gas accepts no arguments and does no substitutions
   while (!Body.empty()) {
     // Scan for the next substitution.
     std::size_t End = Body.size(), Pos = 0;
@@ -1534,18 +1541,26 @@
       StringRef Argument(Begin, I - (Pos +1));
       unsigned Index = 0;
       for (; Index < NParameters; ++Index)
-        if (Parameters[Index] == Argument)
+        if (Parameters[Index].first == Argument)
           break;
 
-      // FIXME: We should error at the macro definition.
-      if (Index == NParameters)
-        return Error(L, "Parameter not found");
-
-      for (MacroArgument::const_iterator it = A[Index].begin(),
-             ie = A[Index].end(); it != ie; ++it)
-        OS << it->getString();
+      if (Index == NParameters) {
+          if (Body[Pos+1] == '(' && Body[Pos+2] == ')')
+            Pos += 3;
+          else {
+            OS << '\\' << Argument;
+            Pos = I;
+          }
+      } else {
+        for (MacroArgument::const_iterator it = A[Index].begin(),
+               ie = A[Index].end(); it != ie; ++it)
+          if (it->getKind() == AsmToken::String)
+            OS << it->getStringContents();
+          else
+            OS << it->getString();
 
-      Pos += 1 + Argument.size();
+        Pos += 1 + Argument.size();
+      }
     }
     // Update the scan point.
     Body = Body.substr(Pos);
@@ -1560,22 +1575,97 @@
 {
 }
 
+static bool IsOperator(AsmToken::TokenKind kind)
+{
+  switch (kind)
+  {
+    default:
+      return false;
+    case AsmToken::Plus:
+    case AsmToken::Minus:
+    case AsmToken::Tilde:
+    case AsmToken::Slash:
+    case AsmToken::Star:
+    case AsmToken::Dot:
+    case AsmToken::Equal:
+    case AsmToken::EqualEqual:
+    case AsmToken::Pipe:
+    case AsmToken::PipePipe:
+    case AsmToken::Caret:
+    case AsmToken::Amp:
+    case AsmToken::AmpAmp:
+    case AsmToken::Exclaim:
+    case AsmToken::ExclaimEqual:
+    case AsmToken::Percent:
+    case AsmToken::Less:
+    case AsmToken::LessEqual:
+    case AsmToken::LessLess:
+    case AsmToken::LessGreater:
+    case AsmToken::Greater:
+    case AsmToken::GreaterEqual:
+    case AsmToken::GreaterGreater:
+      return true;
+  }
+}
+
 /// ParseMacroArgument - Extract AsmTokens for a macro argument.
 /// This is used for both default macro parameter values and the
 /// arguments in macro invocations
-bool AsmParser::ParseMacroArgument(MacroArgument &MA) {
+bool AsmParser::ParseMacroArgument(MacroArgument &MA,
+                                   AsmToken::TokenKind &ArgumentDelimiter) {
   unsigned ParenLevel = 0;
+  unsigned AddTokens = 0;
+
+  // gas accepts arguments separated by whitespace, except on Darwin
+  if (!IsDarwin)
+    Lexer.setSkipSpace(false);
 
   for (;;) {
-    if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal))
+    if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal)) {
+      Lexer.setSkipSpace(true);
       return TokError("unexpected token in macro instantiation");
+    }
+
+    if (ParenLevel == 0 && Lexer.is(AsmToken::Comma)) {
+      // Spaces and commas cannot be mixed to delimit parameters
+      if (ArgumentDelimiter == AsmToken::Eof)
+        ArgumentDelimiter = AsmToken::Comma;
+      else if (ArgumentDelimiter != AsmToken::Comma) {
+        Lexer.setSkipSpace(true);
+        return TokError("expected ' ' for macro argument separator");
+      }
+      break;
+    }
+
+    if (Lexer.is(AsmToken::Space)) {
+      Lex(); // Eat spaces
+
+      // Spaces can delimit parameters, but could also be part an expression.
+      // If the token after a space is an operator, add the token and the next
+      // one into this argument
+      if (ArgumentDelimiter == AsmToken::Space ||
+          ArgumentDelimiter == AsmToken::Eof) {
+        if (IsOperator(Lexer.getKind())) {
+          // Check to see whether the token is used as an operator,
+          // or part of an identifier
+          const char *NextChar = getTok().getEndLoc().getPointer() + 1;
+          if (*NextChar == ' ')
+            AddTokens = 2;
+        }
+
+        if (!AddTokens && ParenLevel == 0) {
+          if (ArgumentDelimiter == AsmToken::Eof &&
+              !IsOperator(Lexer.getKind()))
+            ArgumentDelimiter = AsmToken::Space;
+          break;
+        }
+      }
+    }
 
     // HandleMacroEntry relies on not advancing the lexer here
     // to be able to fill in the remaining default parameter values
     if (Lexer.is(AsmToken::EndOfStatement))
       break;
-    if (ParenLevel == 0 && Lexer.is(AsmToken::Comma))
-      break;
 
     // Adjust the current parentheses level.
     if (Lexer.is(AsmToken::LParen))
@@ -1585,8 +1675,12 @@
 
     // Append the token to the current argument list.
     MA.push_back(getTok());
+    if (AddTokens)
+      AddTokens--;
     Lex();
   }
+
+  Lexer.setSkipSpace(true);
   if (ParenLevel != 0)
     return TokError("unbalanced parentheses in macro argument");
   return false;
@@ -1595,6 +1689,9 @@
 // Parse the macro instantiation arguments.
 bool AsmParser::ParseMacroArguments(const Macro *M, MacroArguments &A) {
   const unsigned NParameters = M ? M->Parameters.size() : 0;
+  // Argument delimiter is initially unknown. It will be set by
+  // ParseMacroArgument()
+  AsmToken::TokenKind ArgumentDelimiter = AsmToken::Eof;
 
   // Parse two kinds of macro invocations:
   // - macros defined without any parameters accept an arbitrary number of them
@@ -1603,13 +1700,30 @@
        ++Parameter) {
     MacroArgument MA;
 
-    if (ParseMacroArgument(MA))
+    if (ParseMacroArgument(MA, ArgumentDelimiter))
       return true;
 
-    A.push_back(MA);
+    if (!MA.empty() || !NParameters)
+      A.push_back(MA);
+    else if (NParameters) {
+      if (!M->Parameters[Parameter].second.empty())
+        A.push_back(M->Parameters[Parameter].second);
+    }
 
-    if (Lexer.is(AsmToken::EndOfStatement))
+    // At the end of the statement, fill in remaining arguments that have
+    // default values. If there aren't any, then the next argument is
+    // required but missing
+    if (Lexer.is(AsmToken::EndOfStatement)) {
+      if (NParameters && Parameter < NParameters - 1) {
+        if (M->Parameters[Parameter + 1].second.empty())
+          return TokError("macro argument '" +
+                          Twine(M->Parameters[Parameter + 1].first) +
+                          "' is missing");
+        else
+          continue;
+      }
       return false;
+    }
 
     if (Lexer.is(AsmToken::Comma))
       Lex();
@@ -3088,22 +3202,30 @@
     return TokError("expected identifier in '.macro' directive");
 
   MacroParameters Parameters;
+  // Argument delimiter is initially unknown. It will be set by
+  // ParseMacroArgument()
+  AsmToken::TokenKind ArgumentDelimiter = AsmToken::Eof;
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     for (;;) {
       MacroParameter Parameter;
-      if (getParser().ParseIdentifier(Parameter))
+      if (getParser().ParseIdentifier(Parameter.first))
         return TokError("expected identifier in '.macro' directive");
+
+      if (getLexer().is(AsmToken::Equal)) {
+        Lex();
+        if (getParser().ParseMacroArgument(Parameter.second, ArgumentDelimiter))
+          return true;
+      }
+
       Parameters.push_back(Parameter);
 
-      if (getLexer().isNot(AsmToken::Comma))
+      if (getLexer().is(AsmToken::Comma))
+        Lex();
+      else if (getLexer().is(AsmToken::EndOfStatement))
         break;
-      Lex();
     }
   }
 
-  if (getLexer().isNot(AsmToken::EndOfStatement))
-    return TokError("unexpected token in '.macro' directive");
-
   // Eat the end of statement.
   Lex();
 
@@ -3308,7 +3430,7 @@
   MacroParameters Parameters;
   MacroParameter Parameter;
 
-  if (ParseIdentifier(Parameter))
+  if (ParseIdentifier(Parameter.first))
     return TokError("expected identifier in '.irp' directive");
 
   Parameters.push_back(Parameter);
@@ -3354,7 +3476,7 @@
   MacroParameters Parameters;
   MacroParameter Parameter;
 
-  if (ParseIdentifier(Parameter))
+  if (ParseIdentifier(Parameter.first))
     return TokError("expected identifier in '.irpc' directive");
 
   Parameters.push_back(Parameter);
@@ -3404,7 +3526,7 @@
 
 bool AsmParser::ParseDirectiveEndr(SMLoc DirectiveLoc) {
   if (ActiveMacros.empty())
-    return TokError("unexpected '.endr' directive, no current .rept");
+    return TokError("unmatched '.endr' directive");
 
   // The only .repl that should get here are the ones created by
   // InstantiateMacroLikeBody.

Modified: llvm/branches/R600/lib/MC/MCParser/MCAsmLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/MC/MCParser/MCAsmLexer.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/MC/MCParser/MCAsmLexer.cpp (original)
+++ llvm/branches/R600/lib/MC/MCParser/MCAsmLexer.cpp Fri Sep 21 12:29:50 2012
@@ -12,7 +12,8 @@
 
 using namespace llvm;
 
-MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()), TokStart(0) {
+MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()),
+                           TokStart(0), SkipSpace(true) {
 }
 
 MCAsmLexer::~MCAsmLexer() {

Modified: llvm/branches/R600/lib/MC/MachObjectWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/MC/MachObjectWriter.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/MC/MachObjectWriter.cpp (original)
+++ llvm/branches/R600/lib/MC/MachObjectWriter.cpp Fri Sep 21 12:29:50 2012
@@ -820,8 +820,12 @@
          it = Asm.data_region_begin(), ie = Asm.data_region_end();
          it != ie; ++it) {
     const DataRegionData *Data = &(*it);
-    uint64_t Start = getSymbolAddress(&Layout.getAssembler().getSymbolData(*Data->Start), Layout);
-    uint64_t End = getSymbolAddress(&Layout.getAssembler().getSymbolData(*Data->End), Layout);
+    uint64_t Start =
+      getSymbolAddress(&Layout.getAssembler().getSymbolData(*Data->Start),
+                       Layout);
+    uint64_t End =
+      getSymbolAddress(&Layout.getAssembler().getSymbolData(*Data->End),
+                       Layout);
     DEBUG(dbgs() << "data in code region-- kind: " << Data->Kind
                  << "  start: " << Start << "(" << Data->Start->getName() << ")"
                  << "  end: " << End << "(" << Data->End->getName() << ")"

Modified: llvm/branches/R600/lib/Support/Memory.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Support/Memory.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Support/Memory.cpp (original)
+++ llvm/branches/R600/lib/Support/Memory.cpp Fri Sep 21 12:29:50 2012
@@ -16,10 +16,6 @@
 #include "llvm/Support/Valgrind.h"
 #include "llvm/Config/config.h"
 
-namespace llvm {
-using namespace sys;
-}
-
 // Include the platform-specific parts of this class.
 #ifdef LLVM_ON_UNIX
 #include "Unix/Memory.inc"
@@ -27,51 +23,3 @@
 #ifdef LLVM_ON_WIN32
 #include "Windows/Memory.inc"
 #endif
-
-extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
-
-/// InvalidateInstructionCache - Before the JIT can run a block of code
-/// that has been emitted it must invalidate the instruction cache on some
-/// platforms.
-void llvm::sys::Memory::InvalidateInstructionCache(const void *Addr,
-                                                   size_t Len) {
-
-// icache invalidation for PPC and ARM.
-#if defined(__APPLE__)
-
-#  if (defined(__POWERPC__) || defined (__ppc__) || \
-     defined(_POWER) || defined(_ARCH_PPC)) || defined(__arm__)
-  sys_icache_invalidate(const_cast<void *>(Addr), Len);
-#  endif
-
-#else
-
-#  if (defined(__POWERPC__) || defined (__ppc__) || \
-       defined(_POWER) || defined(_ARCH_PPC)) && defined(__GNUC__)
-  const size_t LineSize = 32;
-
-  const intptr_t Mask = ~(LineSize - 1);
-  const intptr_t StartLine = ((intptr_t) Addr) & Mask;
-  const intptr_t EndLine = ((intptr_t) Addr + Len + LineSize - 1) & Mask;
-
-  for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
-    asm volatile("dcbf 0, %0" : : "r"(Line));
-  asm volatile("sync");
-
-  for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
-    asm volatile("icbi 0, %0" : : "r"(Line));
-  asm volatile("isync");
-#  elif defined(__arm__) && defined(__GNUC__)
-  // FIXME: Can we safely always call this for __GNUC__ everywhere?
-  const char *Start = static_cast<const char *>(Addr);
-  const char *End = Start + Len;
-  __clear_cache(const_cast<char *>(Start), const_cast<char *>(End));
-#  elif defined(__mips__)
-  const char *Start = static_cast<const char *>(Addr);
-  cacheflush(const_cast<char *>(Start), Len, BCACHE);
-#  endif
-
-#endif  // end apple
-
-  ValgrindDiscardTranslations(Addr, Len);
-}

Modified: llvm/branches/R600/lib/Support/Unix/Memory.inc
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Support/Unix/Memory.inc?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Support/Unix/Memory.inc (original)
+++ llvm/branches/R600/lib/Support/Unix/Memory.inc Fri Sep 21 12:29:50 2012
@@ -13,6 +13,7 @@
 
 #include "Unix.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Process.h"
 
 #ifdef HAVE_SYS_MMAN_H
@@ -31,14 +32,138 @@
 #  endif
 #endif
 
+extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
+
+namespace {
+
+int getPosixProtectionFlags(unsigned Flags) {
+  switch (Flags) {
+  case llvm::sys::Memory::MF_READ:
+    return PROT_READ;
+  case llvm::sys::Memory::MF_WRITE:
+    return PROT_WRITE;
+  case llvm::sys::Memory::MF_READ|llvm::sys::Memory::MF_WRITE:
+    return PROT_READ | PROT_WRITE;
+  case llvm::sys::Memory::MF_READ|llvm::sys::Memory::MF_EXEC:
+    return PROT_READ | PROT_EXEC;
+  case llvm::sys::Memory::MF_READ |
+	 llvm::sys::Memory::MF_WRITE |
+	 llvm::sys::Memory::MF_EXEC:
+    return PROT_READ | PROT_WRITE | PROT_EXEC;
+  case llvm::sys::Memory::MF_EXEC:
+    return PROT_EXEC;
+  default:
+    llvm_unreachable("Illegal memory protection flag specified!");
+  }
+  // Provide a default return value as required by some compilers.
+  return PROT_NONE;
+}
+
+} // namespace
+
+namespace llvm {
+namespace sys {
+
+MemoryBlock
+Memory::allocateMappedMemory(size_t NumBytes,
+                             const MemoryBlock *const NearBlock,
+                             unsigned PFlags,
+                             error_code &EC) {
+  EC = error_code::success();
+  if (NumBytes == 0)
+    return MemoryBlock();
+
+  static const size_t PageSize = Process::GetPageSize();
+  const size_t NumPages = (NumBytes+PageSize-1)/PageSize;
+
+  int fd = -1;
+#ifdef NEED_DEV_ZERO_FOR_MMAP
+  static int zero_fd = open("/dev/zero", O_RDWR);
+  if (zero_fd == -1) {
+    EC = error_code(errno, system_category());
+    return MemoryBlock();
+  }
+  fd = zero_fd;
+#endif
+
+  int MMFlags = MAP_PRIVATE |
+#ifdef HAVE_MMAP_ANONYMOUS
+  MAP_ANONYMOUS
+#else
+  MAP_ANON
+#endif
+  ; // Ends statement above
+
+  int Protect = getPosixProtectionFlags(PFlags);
+
+  // Use any near hint and the page size to set a page-aligned starting address
+  uintptr_t Start = NearBlock ? reinterpret_cast<uintptr_t>(NearBlock->base()) +
+                                      NearBlock->size() : 0;
+  if (Start && Start % PageSize)
+    Start += PageSize - Start % PageSize;
+
+  void *Addr = ::mmap(reinterpret_cast<void*>(Start), PageSize*NumPages,
+                      Protect, MMFlags, fd, 0);
+  if (Addr == MAP_FAILED) {
+    if (NearBlock) //Try again without a near hint
+      return allocateMappedMemory(NumBytes, 0, PFlags, EC);
+
+    EC = error_code(errno, system_category());
+    return MemoryBlock();
+  }
+
+  MemoryBlock Result;
+  Result.Address = Addr;
+  Result.Size = NumPages*PageSize;
+
+  if (PFlags & MF_EXEC)
+    Memory::InvalidateInstructionCache(Result.Address, Result.Size);
+
+  return Result;
+}
+
+error_code
+Memory::releaseMappedMemory(MemoryBlock &M) {
+  if (M.Address == 0 || M.Size == 0)
+    return error_code::success();
+
+  if (0 != ::munmap(M.Address, M.Size))
+    return error_code(errno, system_category());
+
+  M.Address = 0;
+  M.Size = 0;
+
+  return error_code::success();
+}
+
+error_code
+Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) {
+  if (M.Address == 0 || M.Size == 0)
+    return error_code::success();
+
+  if (!Flags)
+    return error_code(EINVAL, generic_category());
+
+  int Protect = getPosixProtectionFlags(Flags);
+
+  int Result = ::mprotect(M.Address, M.Size, Protect);
+  if (Result != 0)
+    return error_code(errno, system_category());
+
+  if (Flags & MF_EXEC)
+    Memory::InvalidateInstructionCache(M.Address, M.Size);
+
+  return error_code::success();
+}
+
 /// AllocateRWX - Allocate a slab of memory with read/write/execute
 /// permissions.  This is typically used for JIT applications where we want
 /// to emit code to the memory then jump to it.  Getting this type of memory
 /// is very OS specific.
 ///
-llvm::sys::MemoryBlock
-llvm::sys::Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
-                               std::string *ErrMsg) {
+MemoryBlock
+Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
+                    std::string *ErrMsg) {
   if (NumBytes == 0) return MemoryBlock();
 
   size_t pageSize = Process::GetPageSize();
@@ -86,7 +211,7 @@
                                 VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
   if (KERN_SUCCESS != kr) {
     MakeErrMsg(ErrMsg, "vm_protect max RX failed");
-    return sys::MemoryBlock();
+    return MemoryBlock();
   }
 
   kr = vm_protect(mach_task_self(), (vm_address_t)pa,
@@ -94,7 +219,7 @@
                   VM_PROT_READ | VM_PROT_WRITE);
   if (KERN_SUCCESS != kr) {
     MakeErrMsg(ErrMsg, "vm_protect RW failed");
-    return sys::MemoryBlock();
+    return MemoryBlock();
   }
 #endif
 
@@ -105,17 +230,17 @@
   return result;
 }
 
-bool llvm::sys::Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) {
+bool Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) {
   if (M.Address == 0 || M.Size == 0) return false;
   if (0 != ::munmap(M.Address, M.Size))
     return MakeErrMsg(ErrMsg, "Can't release RWX Memory");
   return false;
 }
 
-bool llvm::sys::Memory::setWritable (MemoryBlock &M, std::string *ErrMsg) {
+bool Memory::setWritable (MemoryBlock &M, std::string *ErrMsg) {
 #if defined(__APPLE__) && defined(__arm__)
   if (M.Address == 0 || M.Size == 0) return false;
-  sys::Memory::InvalidateInstructionCache(M.Address, M.Size);
+  Memory::InvalidateInstructionCache(M.Address, M.Size);
   kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
     (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_WRITE);
   return KERN_SUCCESS == kr;
@@ -124,10 +249,10 @@
 #endif
 }
 
-bool llvm::sys::Memory::setExecutable (MemoryBlock &M, std::string *ErrMsg) {
+bool Memory::setExecutable (MemoryBlock &M, std::string *ErrMsg) {
 #if defined(__APPLE__) && defined(__arm__)
   if (M.Address == 0 || M.Size == 0) return false;
-  sys::Memory::InvalidateInstructionCache(M.Address, M.Size);
+  Memory::InvalidateInstructionCache(M.Address, M.Size);
   kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
     (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
   return KERN_SUCCESS == kr;
@@ -136,7 +261,7 @@
 #endif
 }
 
-bool llvm::sys::Memory::setRangeWritable(const void *Addr, size_t Size) {
+bool Memory::setRangeWritable(const void *Addr, size_t Size) {
 #if defined(__APPLE__) && defined(__arm__)
   kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr,
                                 (vm_size_t)Size, 0,
@@ -147,7 +272,7 @@
 #endif
 }
 
-bool llvm::sys::Memory::setRangeExecutable(const void *Addr, size_t Size) {
+bool Memory::setRangeExecutable(const void *Addr, size_t Size) {
 #if defined(__APPLE__) && defined(__arm__)
   kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr,
                                 (vm_size_t)Size, 0,
@@ -157,3 +282,52 @@
   return true;
 #endif
 }
+
+/// InvalidateInstructionCache - Before the JIT can run a block of code
+/// that has been emitted it must invalidate the instruction cache on some
+/// platforms.
+void Memory::InvalidateInstructionCache(const void *Addr,
+                                        size_t Len) {
+
+// icache invalidation for PPC and ARM.
+#if defined(__APPLE__)
+
+#  if (defined(__POWERPC__) || defined (__ppc__) || \
+     defined(_POWER) || defined(_ARCH_PPC)) || defined(__arm__)
+  sys_icache_invalidate(const_cast<void *>(Addr), Len);
+#  endif
+
+#else
+
+#  if (defined(__POWERPC__) || defined (__ppc__) || \
+       defined(_POWER) || defined(_ARCH_PPC)) && defined(__GNUC__)
+  const size_t LineSize = 32;
+
+  const intptr_t Mask = ~(LineSize - 1);
+  const intptr_t StartLine = ((intptr_t) Addr) & Mask;
+  const intptr_t EndLine = ((intptr_t) Addr + Len + LineSize - 1) & Mask;
+
+  for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
+    asm volatile("dcbf 0, %0" : : "r"(Line));
+  asm volatile("sync");
+
+  for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
+    asm volatile("icbi 0, %0" : : "r"(Line));
+  asm volatile("isync");
+#  elif defined(__arm__) && defined(__GNUC__)
+  // FIXME: Can we safely always call this for __GNUC__ everywhere?
+  const char *Start = static_cast<const char *>(Addr);
+  const char *End = Start + Len;
+  __clear_cache(const_cast<char *>(Start), const_cast<char *>(End));
+#  elif defined(__mips__)
+  const char *Start = static_cast<const char *>(Addr);
+  cacheflush(const_cast<char *>(Start), Len, BCACHE);
+#  endif
+
+#endif  // end apple
+
+  ValgrindDiscardTranslations(Addr, Len);
+}
+
+} // namespace sys
+} // namespace llvm

Modified: llvm/branches/R600/lib/Support/Windows/Memory.inc
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Support/Windows/Memory.inc?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Support/Windows/Memory.inc (original)
+++ llvm/branches/R600/lib/Support/Windows/Memory.inc Fri Sep 21 12:29:50 2012
@@ -12,51 +12,163 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Windows.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Process.h"
+#include "Windows.h"
+
+namespace {
+
+DWORD getWindowsProtectionFlags(unsigned Flags) {
+  switch (Flags) {
+  // Contrary to what you might expect, the Windows page protection flags
+  // are not a bitwise combination of RWX values
+  case llvm::sys::Memory::MF_READ:
+    return PAGE_READONLY;
+  case llvm::sys::Memory::MF_WRITE:
+    // Note: PAGE_WRITE is not supported by VirtualProtect
+    return PAGE_READWRITE;
+  case llvm::sys::Memory::MF_READ|llvm::sys::Memory::MF_WRITE:
+    return PAGE_READWRITE;
+  case llvm::sys::Memory::MF_READ|llvm::sys::Memory::MF_EXEC:
+    return PAGE_EXECUTE_READ;
+  case llvm::sys::Memory::MF_READ |
+         llvm::sys::Memory::MF_WRITE |
+         llvm::sys::Memory::MF_EXEC:
+    return PAGE_EXECUTE_READWRITE;
+  case llvm::sys::Memory::MF_EXEC:
+    return PAGE_EXECUTE;
+  default:
+    llvm_unreachable("Illegal memory protection flag specified!");
+  }
+  // Provide a default return value as required by some compilers.
+  return PAGE_NOACCESS;
+}
+
+size_t getAllocationGranularity() {
+  SYSTEM_INFO  Info;
+  ::GetSystemInfo(&Info);
+  if (Info.dwPageSize > Info.dwAllocationGranularity)
+    return Info.dwPageSize;
+  else
+    return Info.dwAllocationGranularity;
+}
+
+} // namespace
 
 namespace llvm {
-using namespace sys;
+namespace sys {
 
 //===----------------------------------------------------------------------===//
 //=== WARNING: Implementation here must contain only Win32 specific code
 //===          and must not be UNIX code
 //===----------------------------------------------------------------------===//
 
-MemoryBlock Memory::AllocateRWX(size_t NumBytes,
-                                const MemoryBlock *NearBlock,
-                                std::string *ErrMsg) {
-  if (NumBytes == 0) return MemoryBlock();
-
-  static const size_t pageSize = Process::GetPageSize();
-  size_t NumPages = (NumBytes+pageSize-1)/pageSize;
-
-  PVOID start = NearBlock ? static_cast<unsigned char *>(NearBlock->base()) +
-                                NearBlock->size() : NULL;
+MemoryBlock Memory::allocateMappedMemory(size_t NumBytes,
+                                         const MemoryBlock *const NearBlock,
+                                         unsigned Flags,
+                                         error_code &EC) {
+  EC = error_code::success();
+  if (NumBytes == 0)
+    return MemoryBlock();
 
-  void *pa = VirtualAlloc(start, NumPages*pageSize, MEM_RESERVE | MEM_COMMIT,
-                  PAGE_EXECUTE_READWRITE);
-  if (pa == NULL) {
+  // While we'd be happy to allocate single pages, the Windows allocation
+  // granularity may be larger than a single page (in practice, it is 64K)
+  // so mapping less than that will create an unreachable fragment of memory.
+  static const size_t Granularity = getAllocationGranularity();
+  const size_t NumBlocks = (NumBytes+Granularity-1)/Granularity;
+
+  uintptr_t Start = NearBlock ? reinterpret_cast<uintptr_t>(NearBlock->base()) +
+                                NearBlock->size()
+                           : NULL;
+
+  // If the requested address is not aligned to the allocation granularity,
+  // round up to get beyond NearBlock. VirtualAlloc would have rounded down.
+  if (Start && Start % Granularity != 0)
+    Start += Granularity - Start % Granularity;
+
+  DWORD Protect = getWindowsProtectionFlags(Flags);
+
+  void *PA = ::VirtualAlloc(reinterpret_cast<void*>(Start),
+                            NumBlocks*Granularity,
+                            MEM_RESERVE | MEM_COMMIT, Protect);
+  if (PA == NULL) {
     if (NearBlock) {
       // Try again without the NearBlock hint
-      return AllocateRWX(NumBytes, NULL, ErrMsg);
+      return allocateMappedMemory(NumBytes, NULL, Flags, EC);
     }
-    MakeErrMsg(ErrMsg, "Can't allocate RWX Memory: ");
+    EC = error_code(::GetLastError(), system_category());
     return MemoryBlock();
   }
 
-  MemoryBlock result;
-  result.Address = pa;
-  result.Size = NumPages*pageSize;
-  return result;
+  MemoryBlock Result;
+  Result.Address = PA;
+  Result.Size = NumBlocks*Granularity;
+                                 ;
+  if (Flags & MF_EXEC)
+    Memory::InvalidateInstructionCache(Result.Address, Result.Size);
+
+  return Result;
 }
 
-bool Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) {
-  if (M.Address == 0 || M.Size == 0) return false;
+error_code Memory::releaseMappedMemory(MemoryBlock &M) {
+  if (M.Address == 0 || M.Size == 0)
+    return error_code::success();
+
   if (!VirtualFree(M.Address, 0, MEM_RELEASE))
-    return MakeErrMsg(ErrMsg, "Can't release RWX Memory: ");
-  return false;
+    return error_code(::GetLastError(), system_category());
+
+  M.Address = 0;
+  M.Size = 0;
+
+  return error_code::success();
+}
+
+error_code Memory::protectMappedMemory(const MemoryBlock &M,
+                                       unsigned Flags) {
+  if (M.Address == 0 || M.Size == 0)
+    return error_code::success();
+
+  DWORD Protect = getWindowsProtectionFlags(Flags);
+
+  DWORD OldFlags;
+  if (!VirtualProtect(M.Address, M.Size, Protect, &OldFlags))
+    return error_code(::GetLastError(), system_category());
+
+  if (Flags & MF_EXEC)
+    Memory::InvalidateInstructionCache(M.Address, M.Size);
+
+  return error_code::success();
+}
+
+/// InvalidateInstructionCache - Before the JIT can run a block of code
+/// that has been emitted it must invalidate the instruction cache on some
+/// platforms.
+void Memory::InvalidateInstructionCache(
+    const void *Addr, size_t Len) {
+  FlushInstructionCache(GetCurrentProcess(), Addr, Len);
+}
+
+
+MemoryBlock Memory::AllocateRWX(size_t NumBytes,
+                                const MemoryBlock *NearBlock,
+                                std::string *ErrMsg) {
+  MemoryBlock MB;
+  error_code EC;
+  MB = allocateMappedMemory(NumBytes, NearBlock,
+                            MF_READ|MF_WRITE|MF_EXEC, EC);
+  if (EC != error_code::success() && ErrMsg) {
+    MakeErrMsg(ErrMsg, EC.message());
+  }
+  return MB;
+}
+
+bool Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) {
+  error_code EC = releaseMappedMemory(M);
+  if (EC == error_code::success())
+    return false;
+  MakeErrMsg(ErrMsg, EC.message());
+  return true;
 }
 
 static DWORD getProtection(const void *addr) {
@@ -93,7 +205,7 @@
   }
 
   DWORD oldProt;
-  sys::Memory::InvalidateInstructionCache(Addr, Size);
+  Memory::InvalidateInstructionCache(Addr, Size);
   return ::VirtualProtect(const_cast<LPVOID>(Addr), Size, prot, &oldProt)
             == TRUE;
 }
@@ -112,9 +224,10 @@
   }
 
   DWORD oldProt;
-  sys::Memory::InvalidateInstructionCache(Addr, Size);
+  Memory::InvalidateInstructionCache(Addr, Size);
   return ::VirtualProtect(const_cast<LPVOID>(Addr), Size, prot, &oldProt)
             == TRUE;
 }
 
-}
+} // namespace sys
+} // namespace llvm

Modified: llvm/branches/R600/lib/TableGen/Record.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/TableGen/Record.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/TableGen/Record.cpp (original)
+++ llvm/branches/R600/lib/TableGen/Record.cpp Fri Sep 21 12:29:50 2012
@@ -344,57 +344,53 @@
   return false;
 }
 
-
 /// resolveTypes - Find a common type that T1 and T2 convert to.
 /// Return 0 if no such type exists.
 ///
 RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) {
-  if (!T1->typeIsConvertibleTo(T2)) {
-    if (!T2->typeIsConvertibleTo(T1)) {
-      // If one is a Record type, check superclasses
-      RecordRecTy *RecTy1 = dynamic_cast<RecordRecTy*>(T1);
-      if (RecTy1) {
-        // See if T2 inherits from a type T1 also inherits from
-        const std::vector<Record *> &T1SuperClasses =
-          RecTy1->getRecord()->getSuperClasses();
-        for(std::vector<Record *>::const_iterator i = T1SuperClasses.begin(),
-              iend = T1SuperClasses.end();
-            i != iend;
-            ++i) {
-          RecordRecTy *SuperRecTy1 = RecordRecTy::get(*i);
-          RecTy *NewType1 = resolveTypes(SuperRecTy1, T2);
-          if (NewType1 != 0) {
-            if (NewType1 != SuperRecTy1) {
-              delete SuperRecTy1;
-            }
-            return NewType1;
-          }
+  if (T1->typeIsConvertibleTo(T2))
+    return T2;
+  if (T2->typeIsConvertibleTo(T1))
+    return T1;
+
+  // If one is a Record type, check superclasses
+  if (RecordRecTy *RecTy1 = dynamic_cast<RecordRecTy*>(T1)) {
+    // See if T2 inherits from a type T1 also inherits from
+    const std::vector<Record *> &T1SuperClasses =
+      RecTy1->getRecord()->getSuperClasses();
+    for(std::vector<Record *>::const_iterator i = T1SuperClasses.begin(),
+          iend = T1SuperClasses.end();
+        i != iend;
+        ++i) {
+      RecordRecTy *SuperRecTy1 = RecordRecTy::get(*i);
+      RecTy *NewType1 = resolveTypes(SuperRecTy1, T2);
+      if (NewType1 != 0) {
+        if (NewType1 != SuperRecTy1) {
+          delete SuperRecTy1;
         }
+        return NewType1;
       }
-      RecordRecTy *RecTy2 = dynamic_cast<RecordRecTy*>(T2);
-      if (RecTy2) {
-        // See if T1 inherits from a type T2 also inherits from
-        const std::vector<Record *> &T2SuperClasses =
-          RecTy2->getRecord()->getSuperClasses();
-        for (std::vector<Record *>::const_iterator i = T2SuperClasses.begin(),
-              iend = T2SuperClasses.end();
-            i != iend;
-            ++i) {
-          RecordRecTy *SuperRecTy2 = RecordRecTy::get(*i);
-          RecTy *NewType2 = resolveTypes(T1, SuperRecTy2);
-          if (NewType2 != 0) {
-            if (NewType2 != SuperRecTy2) {
-              delete SuperRecTy2;
-            }
-            return NewType2;
-          }
+    }
+  }
+  if (RecordRecTy *RecTy2 = dynamic_cast<RecordRecTy*>(T2)) {
+    // See if T1 inherits from a type T2 also inherits from
+    const std::vector<Record *> &T2SuperClasses =
+      RecTy2->getRecord()->getSuperClasses();
+    for (std::vector<Record *>::const_iterator i = T2SuperClasses.begin(),
+          iend = T2SuperClasses.end();
+        i != iend;
+        ++i) {
+      RecordRecTy *SuperRecTy2 = RecordRecTy::get(*i);
+      RecTy *NewType2 = resolveTypes(T1, SuperRecTy2);
+      if (NewType2 != 0) {
+        if (NewType2 != SuperRecTy2) {
+          delete SuperRecTy2;
         }
+        return NewType2;
       }
-      return 0;
     }
-    return T2;
   }
-  return T1;
+  return 0;
 }
 
 

Modified: llvm/branches/R600/lib/Target/ARM/ARMExpandPseudoInsts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMExpandPseudoInsts.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMExpandPseudoInsts.cpp (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMExpandPseudoInsts.cpp Fri Sep 21 12:29:50 2012
@@ -103,9 +103,9 @@
     bool IsLoad;
     bool isUpdating;
     bool hasWritebackOperand;
-    NEONRegSpacing RegSpacing;
-    unsigned char NumRegs; // D registers loaded or stored
-    unsigned char RegElts; // elements per D register; used for lane ops
+    uint8_t RegSpacing; // One of type NEONRegSpacing
+    uint8_t NumRegs; // D registers loaded or stored
+    uint8_t RegElts; // elements per D register; used for lane ops
     // FIXME: Temporary flag to denote whether the real instruction takes
     // a single register (like the encoding) or all of the registers in
     // the list (like the asm syntax and the isel DAG). When all definitions
@@ -377,7 +377,7 @@
 
   const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
   assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
-  NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+  NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
   unsigned NumRegs = TableEntry->NumRegs;
 
   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
@@ -442,7 +442,7 @@
 
   const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
   assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
-  NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+  NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
   unsigned NumRegs = TableEntry->NumRegs;
 
   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
@@ -493,7 +493,7 @@
 
   const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
   assert(TableEntry && "NEONLdStTable lookup failed");
-  NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+  NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
   unsigned NumRegs = TableEntry->NumRegs;
   unsigned RegElts = TableEntry->RegElts;
 

Modified: llvm/branches/R600/lib/Target/ARM/ARMFastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMFastISel.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMFastISel.cpp (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMFastISel.cpp Fri Sep 21 12:29:50 2012
@@ -1021,6 +1021,9 @@
       RC = &ARM::GPRRegClass;
       break;
     case MVT::i16:
+      if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
+        return false;
+
       if (isThumb2) {
         if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
           Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8;
@@ -1139,6 +1142,9 @@
       }
       break;
     case MVT::i16:
+      if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
+        return false;
+
       if (isThumb2) {
         if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
           StrOpc = ARM::t2STRHi8;

Modified: llvm/branches/R600/lib/Target/ARM/ARMInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMInstrInfo.td?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMInstrInfo.td (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMInstrInfo.td Fri Sep 21 12:29:50 2012
@@ -3092,9 +3092,11 @@
              (SUBSri  GPR:$src, so_imm_neg:$imm)>;
 
 def : ARMPat<(add     GPR:$src, imm0_65535_neg:$imm),
-             (SUBrr   GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>;
+             (SUBrr   GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>,
+             Requires<[IsARM, HasV6T2]>;
 def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm),
-             (SUBSrr  GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>;
+             (SUBSrr  GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>,
+             Requires<[IsARM, HasV6T2]>;
 
 // The with-carry-in form matches bitwise not instead of the negation.
 // Effectively, the inverse interpretation of the carry flag already accounts

Modified: llvm/branches/R600/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMInstrNEON.td?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMInstrNEON.td Fri Sep 21 12:29:50 2012
@@ -4488,10 +4488,23 @@
                      "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
                      [(set DPR:$Vd,
                            (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
+def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
+                                   (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
+          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+        Requires<[HasNEON]>;
+def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
+                                    (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
+          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+        Requires<[HasNEON]>;
+def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
+                                    (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
+          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+        Requires<[HasNEON]>;
 
 def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
                      (and DPR:$Vm, (vnotd DPR:$Vd)))),
-          (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
+          (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
+        Requires<[HasNEON]>;
 
 def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
                      (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
@@ -4500,9 +4513,23 @@
                      [(set QPR:$Vd,
                            (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
 
+def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
+                                   (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
+          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+        Requires<[HasNEON]>;
+def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
+                                    (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
+          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+        Requires<[HasNEON]>;
+def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
+                                    (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
+          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+        Requires<[HasNEON]>;
+
 def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
                      (and QPR:$Vm, (vnotq QPR:$Vd)))),
-          (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
+          (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
+        Requires<[HasNEON]>;
 
 //   VBIF     : Vector Bitwise Insert if False
 //              like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",

Modified: llvm/branches/R600/lib/Target/ARM/ARMScheduleA9.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMScheduleA9.td?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMScheduleA9.td (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMScheduleA9.td Fri Sep 21 12:29:50 2012
@@ -1938,13 +1938,13 @@
 def A9WriteFMulD  : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
 def A9WriteFMAS   : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; }
 def A9WriteFMAD   : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
-def A9WriteFDivS  : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; }
-def A9WriteFDivD  : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 20; }
-def A9WriteFSqrtS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 13; }
-def A9WriteFSqrtD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 28; }
+def A9WriteFDivS  : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; }
+def A9WriteFDivD  : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; }
+def A9WriteFSqrtS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 17; }
+def A9WriteFSqrtD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 32; }
 
 // NEON has an odd mix of latencies. Simply name the write types by latency.
-def A9WriteV1 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 2; }
+def A9WriteV1 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
 def A9WriteV2 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 2; }
 def A9WriteV3 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 3; }
 def A9WriteV4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; }

Modified: llvm/branches/R600/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original)
+++ llvm/branches/R600/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Fri Sep 21 12:29:50 2012
@@ -4616,7 +4616,7 @@
       return true;
 
     const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal,
-                                                   getContext());
+                                              getContext());
     E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
     Operands.push_back(ARMOperand::CreateImm(ExprVal, S, E));
     return false;

Modified: llvm/branches/R600/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h (original)
+++ llvm/branches/R600/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h Fri Sep 21 12:29:50 2012
@@ -28,7 +28,7 @@
 
   explicit ARMMCExpr(VariantKind _Kind, const MCExpr *_Expr)
     : Kind(_Kind), Expr(_Expr) {}
-  
+
 public:
   /// @name Construction
   /// @{
@@ -67,7 +67,7 @@
   static bool classof(const MCExpr *E) {
     return E->getKind() == MCExpr::Target;
   }
-  
+
   static bool classof(const ARMMCExpr *) { return true; }
 
 };

Modified: llvm/branches/R600/lib/Target/CppBackend/CPPBackend.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/CppBackend/CPPBackend.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/CppBackend/CPPBackend.cpp (original)
+++ llvm/branches/R600/lib/Target/CppBackend/CPPBackend.cpp Fri Sep 21 12:29:50 2012
@@ -508,7 +508,7 @@
 #undef HANDLE_ATTR
       if (attrs & Attribute::StackAlignment)
         Out << " | Attribute::constructStackAlignmentFromInt("
-            << Attribute::getStackAlignmentFromAttrs(attrs)
+            << attrs.getStackAlignment()
             << ")"; 
       attrs &= ~Attribute::StackAlignment;
       assert(attrs == 0 && "Unhandled attribute!");

Modified: llvm/branches/R600/lib/Target/Mips/Mips16FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/Mips/Mips16FrameLowering.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/Mips/Mips16FrameLowering.cpp (original)
+++ llvm/branches/R600/lib/Target/Mips/Mips16FrameLowering.cpp Fri Sep 21 12:29:50 2012
@@ -66,7 +66,41 @@
                           MachineBasicBlock::iterator MI,
                           const std::vector<CalleeSavedInfo> &CSI,
                           const TargetRegisterInfo *TRI) const {
-  // FIXME: implement.
+  MachineFunction *MF = MBB.getParent();
+  MachineBasicBlock *EntryBlock = MF->begin();
+
+  //
+  // Registers RA, S0,S1 are the callee saved registers and they
+  // will be saved with the "save" instruction
+  // during emitPrologue
+  //
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    // Add the callee-saved register as live-in. Do not add if the register is
+    // RA and return address is taken, because it has already been added in
+    // method MipsTargetLowering::LowerRETURNADDR.
+    // It's killed at the spill, unless the register is RA and return address
+    // is taken.
+    unsigned Reg = CSI[i].getReg();
+    bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA)
+      && MF->getFrameInfo()->isReturnAddressTaken();
+    if (!IsRAAndRetAddrIsTaken)
+      EntryBlock->addLiveIn(Reg);
+  }
+
+  return true;
+}
+
+bool Mips16FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator MI,
+                                       const std::vector<CalleeSavedInfo> &CSI,
+                                       const TargetRegisterInfo *TRI) const {
+  //
+  // Registers RA,S0,S1 are the callee saved registers and they will be restored
+  // with the restore instruction during emitEpilogue.
+  // We need to override this virtual function, otherwise llvm will try and
+  // restore the registers on it's on from the stack.
+  //
+
   return true;
 }
 
@@ -79,6 +113,9 @@
 void Mips16FrameLowering::
 processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                      RegScavenger *RS) const {
+  MF.getRegInfo().setPhysRegUsed(Mips::RA);
+  MF.getRegInfo().setPhysRegUsed(Mips::S0);
+  MF.getRegInfo().setPhysRegUsed(Mips::S1);
 }
 
 const MipsFrameLowering *

Modified: llvm/branches/R600/lib/Target/Mips/Mips16FrameLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/Mips/Mips16FrameLowering.h?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/Mips/Mips16FrameLowering.h (original)
+++ llvm/branches/R600/lib/Target/Mips/Mips16FrameLowering.h Fri Sep 21 12:29:50 2012
@@ -32,6 +32,11 @@
                                  const std::vector<CalleeSavedInfo> &CSI,
                                  const TargetRegisterInfo *TRI) const;
 
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   const TargetRegisterInfo *TRI) const;
+
   bool hasReservedCallFrame(const MachineFunction &MF) const;
 
   void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,

Modified: llvm/branches/R600/lib/Target/Mips/Mips16InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/Mips/Mips16InstrInfo.td?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/Mips/Mips16InstrInfo.td (original)
+++ llvm/branches/R600/lib/Target/Mips/Mips16InstrInfo.td Fri Sep 21 12:29:50 2012
@@ -254,10 +254,12 @@
 // for direct object emitter, encoding needs to be adjusted for the
 // frame size
 //
-let ra=1, s=0,s0=0,s1=0 in
+let ra=1, s=0,s0=1,s1=1 in
 def RestoreRaF16:
   FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
-             "restore \t$$ra, $frame_size", [], IILoad >;
+             "restore \t$$ra,  $$s0, $$s1, $frame_size", [], IILoad > {
+  let isCodeGenOnly = 1;
+}
 
 //
 // Format: SAVE {ra,}{s0/s1/s0-1,}{framesize} (All arguments are optional)
@@ -266,11 +268,12 @@
 // To set up a stack frame on entry to a subroutine,
 // saving return address and static registers, and adjusting stack
 //
-let ra=1, s=1,s0=0,s1=0 in
+let ra=1, s=1,s0=1,s1=1 in
 def SaveRaF16:
   FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
-             "save \t$$ra, $frame_size", [], IILoad >;
-
+             "save \t$$ra, $$s0, $$s1, $frame_size", [], IILoad > {
+  let isCodeGenOnly = 1;
+}
 //
 // Format: SB ry, offset(rx) MIPS16e
 // Purpose: Store Byte (Extended)

Modified: llvm/branches/R600/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/PowerPC/PPCISelLowering.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/branches/R600/lib/Target/PowerPC/PPCISelLowering.cpp Fri Sep 21 12:29:50 2012
@@ -1717,16 +1717,16 @@
                                         SmallVectorImpl<SDValue> &InVals)
                                           const {
   if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) {
-    return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins,
-                                     dl, DAG, InVals);
-  } else {
-    return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
+    return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
                                        dl, DAG, InVals);
+  } else {
+    return LowerFormalArguments_Darwin_Or_64SVR4(Chain, CallConv, isVarArg, Ins,
+                                                 dl, DAG, InVals);
   }
 }
 
 SDValue
-PPCTargetLowering::LowerFormalArguments_SVR4(
+PPCTargetLowering::LowerFormalArguments_32SVR4(
                                       SDValue Chain,
                                       CallingConv::ID CallConv, bool isVarArg,
                                       const SmallVectorImpl<ISD::InputArg>
@@ -1944,7 +1944,7 @@
 }
 
 SDValue
-PPCTargetLowering::LowerFormalArguments_Darwin(
+PPCTargetLowering::LowerFormalArguments_Darwin_Or_64SVR4(
                                       SDValue Chain,
                                       CallingConv::ID CallConv, bool isVarArg,
                                       const SmallVectorImpl<ISD::InputArg>
@@ -1959,6 +1959,7 @@
 
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   bool isPPC64 = PtrVT == MVT::i64;
+  bool isSVR4ABI = PPCSubTarget.isSVR4ABI();
   // Potential tail calls could cause overwriting of argument stack slots.
   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
                        (CallConv == CallingConv::Fast));
@@ -2019,10 +2020,12 @@
       default: llvm_unreachable("Unhandled argument type!");
       case MVT::i32:
       case MVT::f32:
-        VecArgOffset += isPPC64 ? 8 : 4;
+        VecArgOffset += 4;
         break;
       case MVT::i64:  // PPC64
       case MVT::f64:
+        // FIXME: We are guaranteed to be !isPPC64 at this point.
+        // Does MVT::i64 apply?
         VecArgOffset += 8;
         break;
       case MVT::v4f32:
@@ -2076,8 +2079,11 @@
       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
       ObjSize = Flags.getByValSize();
       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
-      // Objects of size 1 and 2 are right justified, everything else is
-      // left justified.  This means the memory address is adjusted forwards.
+      // FOR DARWIN: Objects of size 1 and 2 are right justified, everything
+      // else is left justified.  This means the memory address is adjusted
+      // forwards.
+      // FOR 64-BIT SVR4: All aggregates smaller than 8 bytes must be passed
+      // right-justified.
       if (ObjSize==1 || ObjSize==2) {
         CurArgOffset = CurArgOffset + (4 - ObjSize);
       }
@@ -2085,7 +2091,8 @@
       int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       InVals.push_back(FIN);
-      if (ObjSize==1 || ObjSize==2) {
+      if (ObjSize==1 || ObjSize==2 ||
+          (ObjSize==4 && isSVR4ABI)) {
         if (GPR_idx != Num_GPR_Regs) {
           unsigned VReg;
           if (isPPC64)
@@ -2093,10 +2100,11 @@
           else
             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+          EVT ObjType = (ObjSize == 1 ? MVT::i8 :
+                         (ObjSize == 2 ? MVT::i16 : MVT::i32));
           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
                                             MachinePointerInfo(),
-                                            ObjSize==1 ? MVT::i8 : MVT::i16,
-                                            false, false, 0);
+                                            ObjType, false, false, 0);
           MemOps.push_back(Store);
           ++GPR_idx;
         }
@@ -2107,8 +2115,8 @@
       }
       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
         // Store whatever pieces of the object are in registers
-        // to memory.  ArgVal will be address of the beginning of
-        // the object.
+        // to memory.  ArgOffset will be the address of the beginning
+        // of the object.
         if (GPR_idx != Num_GPR_Regs) {
           unsigned VReg;
           if (isPPC64)
@@ -2118,7 +2126,16 @@
           int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
-          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+          SDValue Shifted = Val;
+
+          // For 64-bit SVR4, small structs come in right-adjusted.
+          // Shift them left so the following logic works as expected.
+          if (ObjSize < 8 && isSVR4ABI) {
+            SDValue ShiftAmt = DAG.getConstant(64 - 8 * ObjSize, PtrVT);
+            Shifted = DAG.getNode(ISD::SHL, dl, PtrVT, Val, ShiftAmt);
+          }
+
+          SDValue Store = DAG.getStore(Val.getValue(1), dl, Shifted, FIN,
                                        MachinePointerInfo(),
                                        false, false, 0);
           MemOps.push_back(Store);
@@ -2308,8 +2325,8 @@
   return Chain;
 }
 
-/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus
-/// linkage area for the Darwin ABI.
+/// CalculateParameterAndLinkageAreaSize - Get the size of the parameter plus
+/// linkage area for the Darwin ABI, or the 64-bit SVR4 ABI.
 static unsigned
 CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
                                      bool isPPC64,
@@ -2718,7 +2735,7 @@
       // Thus for a call through a function pointer, the following actions need
       // to be performed:
       //   1. Save the TOC of the caller in the TOC save area of its stack
-      //      frame (this is done in LowerCall_Darwin()).
+      //      frame (this is done in LowerCall_Darwin_Or_64SVR4()).
       //   2. Load the address of the function entry point from the function
       //      descriptor.
       //   3. Load the TOC of the callee from the function descriptor into r2.
@@ -2969,25 +2986,25 @@
                                                    Ins, DAG);
 
   if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
-    return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg,
-                          isTailCall, Outs, OutVals, Ins,
-                          dl, DAG, InVals);
-
-  return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
-                          isTailCall, Outs, OutVals, Ins,
-                          dl, DAG, InVals);
+    return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
+                            isTailCall, Outs, OutVals, Ins,
+                            dl, DAG, InVals);
+
+  return LowerCall_Darwin_Or_64SVR4(Chain, Callee, CallConv, isVarArg,
+                                    isTailCall, Outs, OutVals, Ins,
+                                    dl, DAG, InVals);
 }
 
 SDValue
-PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
-                                  CallingConv::ID CallConv, bool isVarArg,
-                                  bool isTailCall,
-                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                  const SmallVectorImpl<SDValue> &OutVals,
-                                  const SmallVectorImpl<ISD::InputArg> &Ins,
-                                  DebugLoc dl, SelectionDAG &DAG,
-                                  SmallVectorImpl<SDValue> &InVals) const {
-  // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description
+PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
+                                    CallingConv::ID CallConv, bool isVarArg,
+                                    bool isTailCall,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SmallVectorImpl<SDValue> &InVals) const {
+  // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
   // of the 32-bit SVR4 ABI stack frame layout.
 
   assert((CallConv == CallingConv::C ||
@@ -3192,7 +3209,7 @@
 }
 
 SDValue
-PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
+PPCTargetLowering::LowerCall_Darwin_Or_64SVR4(SDValue Chain, SDValue Callee,
                                     CallingConv::ID CallConv, bool isVarArg,
                                     bool isTailCall,
                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
@@ -3201,6 +3218,8 @@
                                     DebugLoc dl, SelectionDAG &DAG,
                                     SmallVectorImpl<SDValue> &InVals) const {
 
+  bool isSVR4ABI = PPCSubTarget.isSVR4ABI();
+
   unsigned NumOps  = Outs.size();
 
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -3308,12 +3327,22 @@
     }
 
     // FIXME memcpy is used way more than necessary.  Correctness first.
+    // Note: "by value" is code for passing a structure by value, not
+    // basic types.
     if (Flags.isByVal()) {
+      // Note: Size includes alignment padding, so
+      //   struct x { short a; char b; }
+      // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
+      // These are the proper values we need for right-justifying the
+      // aggregate in a parameter register for 64-bit SVR4.
       unsigned Size = Flags.getByValSize();
-      if (Size==1 || Size==2) {
-        // Very small objects are passed right-justified.
-        // Everything else is passed left-justified.
-        EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
+      // FOR DARWIN ONLY:  Very small objects are passed right-justified.
+      // Everything else is passed left-justified.
+      // FOR 64-BIT SVR4:  All aggregates smaller than 8 bytes must
+      // be passed right-justified.
+      if (Size==1 || Size==2 ||
+          (Size==4 && isSVR4ABI)) {
+        EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
         if (GPR_idx != NumGPRs) {
           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
                                         MachinePointerInfo(), VT,
@@ -3341,15 +3370,67 @@
       // Copy entire object into memory.  There are cases where gcc-generated
       // code assumes it is there, even if it could be put entirely into
       // registers.  (This is not what the doc says.)
-      SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
-                            CallSeqStart.getNode()->getOperand(0),
-                            Flags, DAG, dl);
-      // This must go outside the CALLSEQ_START..END.
-      SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
-                           CallSeqStart.getNode()->getOperand(1));
-      DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode());
-      Chain = CallSeqStart = NewCallSeqStart;
-      // And copy the pieces of it that fit into registers.
+
+      // FIXME: The above statement is likely due to a misunderstanding of the
+      // documents.  At least for 64-bit SVR4, all arguments must be copied
+      // into the parameter area BY THE CALLEE in the event that the callee
+      // takes the address of any formal argument.  That has not yet been
+      // implemented.  However, it is reasonable to use the stack area as a
+      // staging area for the register load.
+
+      // Skip this for small aggregates under 64-bit SVR4, as we will use
+      // the same slot for a right-justified copy, below.
+      if (Size >= 8 || !isSVR4ABI) {
+        SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
+                              CallSeqStart.getNode()->getOperand(0),
+                              Flags, DAG, dl);
+        // This must go outside the CALLSEQ_START..END.
+        SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+                                   CallSeqStart.getNode()->getOperand(1));
+        DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+                               NewCallSeqStart.getNode());
+        Chain = CallSeqStart = NewCallSeqStart;
+      }
+
+      // FOR 64-BIT SVR4:  When a register is available, pass the
+      // aggregate right-justified.
+      if (isSVR4ABI && Size < 8 && GPR_idx != NumGPRs) {
+        // The easiest way to get this right-justified in a register
+        // is to copy the structure into the rightmost portion of a
+        // local variable slot, then load the whole slot into the
+        // register.
+        // FIXME: The memcpy seems to produce pretty awful code for
+        // small aggregates, particularly for packed ones.
+        // FIXME: It would be preferable to use the slot in the 
+        // parameter save area instead of a new local variable.
+        SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
+        SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+        SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
+                              CallSeqStart.getNode()->getOperand(0),
+                              Flags, DAG, dl);
+
+        // Place the memcpy outside the CALLSEQ_START..END.
+        SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+                                   CallSeqStart.getNode()->getOperand(1));
+        DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 
+                               NewCallSeqStart.getNode());
+        Chain = CallSeqStart = NewCallSeqStart;
+
+        // Load the slot into the register.
+        SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff,
+                                   MachinePointerInfo(),
+                                   false, false, false, 0);
+        MemOpChains.push_back(Load.getValue(1));
+        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+
+        // Done with this argument.
+        ArgOffset += PtrByteSize;
+        continue;
+      }
+
+      // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
+      // copy the pieces of the object that fit into registers from the
+      // parameter save area.
       for (unsigned j=0; j<Size; j+=PtrByteSize) {
         SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);

Modified: llvm/branches/R600/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/PowerPC/PPCISelLowering.h?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/branches/R600/lib/Target/PowerPC/PPCISelLowering.h Fri Sep 21 12:29:50 2012
@@ -467,20 +467,21 @@
                   DebugLoc dl, SelectionDAG &DAG) const;
 
     SDValue
-      LowerFormalArguments_Darwin(SDValue Chain,
+      LowerFormalArguments_Darwin_Or_64SVR4(SDValue Chain,
                                   CallingConv::ID CallConv, bool isVarArg,
                                   const SmallVectorImpl<ISD::InputArg> &Ins,
                                   DebugLoc dl, SelectionDAG &DAG,
                                   SmallVectorImpl<SDValue> &InVals) const;
     SDValue
-      LowerFormalArguments_SVR4(SDValue Chain,
-                                CallingConv::ID CallConv, bool isVarArg,
-                                const SmallVectorImpl<ISD::InputArg> &Ins,
-                                DebugLoc dl, SelectionDAG &DAG,
-                                SmallVectorImpl<SDValue> &InVals) const;
+      LowerFormalArguments_32SVR4(SDValue Chain,
+                                  CallingConv::ID CallConv, bool isVarArg,
+                                  const SmallVectorImpl<ISD::InputArg> &Ins,
+                                  DebugLoc dl, SelectionDAG &DAG,
+                                  SmallVectorImpl<SDValue> &InVals) const;
 
     SDValue
-      LowerCall_Darwin(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+      LowerCall_Darwin_Or_64SVR4(SDValue Chain, SDValue Callee,
+                       CallingConv::ID CallConv,
                        bool isVarArg, bool isTailCall,
                        const SmallVectorImpl<ISD::OutputArg> &Outs,
                        const SmallVectorImpl<SDValue> &OutVals,
@@ -488,13 +489,13 @@
                        DebugLoc dl, SelectionDAG &DAG,
                        SmallVectorImpl<SDValue> &InVals) const;
     SDValue
-    LowerCall_SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
-                   bool isVarArg, bool isTailCall,
-                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                   const SmallVectorImpl<SDValue> &OutVals,
-                   const SmallVectorImpl<ISD::InputArg> &Ins,
-                   DebugLoc dl, SelectionDAG &DAG,
-                   SmallVectorImpl<SDValue> &InVals) const;
+    LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+                     bool isVarArg, bool isTailCall,
+                     const SmallVectorImpl<ISD::OutputArg> &Outs,
+                     const SmallVectorImpl<SDValue> &OutVals,
+                     const SmallVectorImpl<ISD::InputArg> &Ins,
+                     DebugLoc dl, SelectionDAG &DAG,
+                     SmallVectorImpl<SDValue> &InVals) const;
   };
 }
 

Modified: llvm/branches/R600/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp (original)
+++ llvm/branches/R600/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp Fri Sep 21 12:29:50 2012
@@ -560,15 +560,6 @@
   }
 
 
-  // Set the vector length to 256-bit if YMM0-YMM15 is used
-  for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
-    if (!MI.getOperand(i).isReg())
-      continue;
-    unsigned SrcReg = MI.getOperand(i).getReg();
-    if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15)
-      VEX_L = 1;
-  }
-
   // Classify VEX_B, VEX_4V, VEX_R, VEX_X
   unsigned NumOps = Desc.getNumOperands();
   unsigned CurOp = 0;

Modified: llvm/branches/R600/lib/Target/X86/X86CodeEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86CodeEmitter.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86CodeEmitter.cpp (original)
+++ llvm/branches/R600/lib/Target/X86/X86CodeEmitter.cpp Fri Sep 21 12:29:50 2012
@@ -921,17 +921,6 @@
   }
 
 
-  // Set the vector length to 256-bit if YMM0-YMM15 is used
-  for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
-    if (!MI.getOperand(i).isReg())
-      continue;
-    if (MI.getOperand(i).isImplicit())
-      continue;
-    unsigned SrcReg = MI.getOperand(i).getReg();
-    if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15)
-      VEX_L = 1;
-  }
-
   // Classify VEX_B, VEX_4V, VEX_R, VEX_X
   unsigned NumOps = Desc->getNumOperands();
   unsigned CurOp = 0;

Modified: llvm/branches/R600/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/branches/R600/lib/Target/X86/X86ISelDAGToDAG.cpp Fri Sep 21 12:29:50 2012
@@ -191,7 +191,6 @@
     SDNode *Select(SDNode *N);
     SDNode *SelectGather(SDNode *N, unsigned Opc);
     SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
-    SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
     SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT);
 
     bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
@@ -1480,6 +1479,7 @@
   SDValue In1 = Node->getOperand(1);
   SDValue In2L = Node->getOperand(2);
   SDValue In2H = Node->getOperand(3);
+
   SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
   if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
     return NULL;
@@ -1493,159 +1493,13 @@
   return ResNode;
 }
 
-// FIXME: Figure out some way to unify this with the 'or' and other code
-// below.
-SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
-  if (Node->hasAnyUseOfValue(0))
-    return 0;
-
-  // Optimize common patterns for __sync_add_and_fetch and
-  // __sync_sub_and_fetch where the result is not used. This allows us
-  // to use "lock" version of add, sub, inc, dec instructions.
-  // FIXME: Do not use special instructions but instead add the "lock"
-  // prefix to the target node somehow. The extra information will then be
-  // transferred to machine instruction and it denotes the prefix.
-  SDValue Chain = Node->getOperand(0);
-  SDValue Ptr = Node->getOperand(1);
-  SDValue Val = Node->getOperand(2);
-  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
-  if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
-    return 0;
-
-  bool isInc = false, isDec = false, isSub = false, isCN = false;
-  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
-  if (CN && CN->getSExtValue() == (int32_t)CN->getSExtValue()) {
-    isCN = true;
-    int64_t CNVal = CN->getSExtValue();
-    if (CNVal == 1)
-      isInc = true;
-    else if (CNVal == -1)
-      isDec = true;
-    else if (CNVal >= 0)
-      Val = CurDAG->getTargetConstant(CNVal, NVT);
-    else {
-      isSub = true;
-      Val = CurDAG->getTargetConstant(-CNVal, NVT);
-    }
-  } else if (Val.hasOneUse() &&
-             Val.getOpcode() == ISD::SUB &&
-             X86::isZeroNode(Val.getOperand(0))) {
-    isSub = true;
-    Val = Val.getOperand(1);
-  }
-
-  DebugLoc dl = Node->getDebugLoc();
-  unsigned Opc = 0;
-  switch (NVT.getSimpleVT().SimpleTy) {
-  default: return 0;
-  case MVT::i8:
-    if (isInc)
-      Opc = X86::LOCK_INC8m;
-    else if (isDec)
-      Opc = X86::LOCK_DEC8m;
-    else if (isSub) {
-      if (isCN)
-        Opc = X86::LOCK_SUB8mi;
-      else
-        Opc = X86::LOCK_SUB8mr;
-    } else {
-      if (isCN)
-        Opc = X86::LOCK_ADD8mi;
-      else
-        Opc = X86::LOCK_ADD8mr;
-    }
-    break;
-  case MVT::i16:
-    if (isInc)
-      Opc = X86::LOCK_INC16m;
-    else if (isDec)
-      Opc = X86::LOCK_DEC16m;
-    else if (isSub) {
-      if (isCN) {
-        if (immSext8(Val.getNode()))
-          Opc = X86::LOCK_SUB16mi8;
-        else
-          Opc = X86::LOCK_SUB16mi;
-      } else
-        Opc = X86::LOCK_SUB16mr;
-    } else {
-      if (isCN) {
-        if (immSext8(Val.getNode()))
-          Opc = X86::LOCK_ADD16mi8;
-        else
-          Opc = X86::LOCK_ADD16mi;
-      } else
-        Opc = X86::LOCK_ADD16mr;
-    }
-    break;
-  case MVT::i32:
-    if (isInc)
-      Opc = X86::LOCK_INC32m;
-    else if (isDec)
-      Opc = X86::LOCK_DEC32m;
-    else if (isSub) {
-      if (isCN) {
-        if (immSext8(Val.getNode()))
-          Opc = X86::LOCK_SUB32mi8;
-        else
-          Opc = X86::LOCK_SUB32mi;
-      } else
-        Opc = X86::LOCK_SUB32mr;
-    } else {
-      if (isCN) {
-        if (immSext8(Val.getNode()))
-          Opc = X86::LOCK_ADD32mi8;
-        else
-          Opc = X86::LOCK_ADD32mi;
-      } else
-        Opc = X86::LOCK_ADD32mr;
-    }
-    break;
-  case MVT::i64:
-    if (isInc)
-      Opc = X86::LOCK_INC64m;
-    else if (isDec)
-      Opc = X86::LOCK_DEC64m;
-    else if (isSub) {
-      Opc = X86::LOCK_SUB64mr;
-      if (isCN) {
-        if (immSext8(Val.getNode()))
-          Opc = X86::LOCK_SUB64mi8;
-        else if (i64immSExt32(Val.getNode()))
-          Opc = X86::LOCK_SUB64mi32;
-      }
-    } else {
-      Opc = X86::LOCK_ADD64mr;
-      if (isCN) {
-        if (immSext8(Val.getNode()))
-          Opc = X86::LOCK_ADD64mi8;
-        else if (i64immSExt32(Val.getNode()))
-          Opc = X86::LOCK_ADD64mi32;
-      }
-    }
-    break;
-  }
-
-  SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
-                                                 dl, NVT), 0);
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-  MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
-  if (isInc || isDec) {
-    SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
-    SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6), 0);
-    cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
-    SDValue RetVals[] = { Undef, Ret };
-    return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
-  } else {
-    SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
-    SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0);
-    cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
-    SDValue RetVals[] = { Undef, Ret };
-    return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
-  }
-}
-
+/// Atomic opcode table
+///
 enum AtomicOpc {
+  ADD,
+  SUB,
+  INC,
+  DEC,
   OR,
   AND,
   XOR,
@@ -1669,6 +1523,58 @@
 
 static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
   {
+    X86::LOCK_ADD8mi,
+    X86::LOCK_ADD8mr,
+    X86::LOCK_ADD16mi8,
+    X86::LOCK_ADD16mi,
+    X86::LOCK_ADD16mr,
+    X86::LOCK_ADD32mi8,
+    X86::LOCK_ADD32mi,
+    X86::LOCK_ADD32mr,
+    X86::LOCK_ADD64mi8,
+    X86::LOCK_ADD64mi32,
+    X86::LOCK_ADD64mr,
+  },
+  {
+    X86::LOCK_SUB8mi,
+    X86::LOCK_SUB8mr,
+    X86::LOCK_SUB16mi8,
+    X86::LOCK_SUB16mi,
+    X86::LOCK_SUB16mr,
+    X86::LOCK_SUB32mi8,
+    X86::LOCK_SUB32mi,
+    X86::LOCK_SUB32mr,
+    X86::LOCK_SUB64mi8,
+    X86::LOCK_SUB64mi32,
+    X86::LOCK_SUB64mr,
+  },
+  {
+    0,
+    X86::LOCK_INC8m,
+    0,
+    0,
+    X86::LOCK_INC16m,
+    0,
+    0,
+    X86::LOCK_INC32m,
+    0,
+    0,
+    X86::LOCK_INC64m,
+  },
+  {
+    0,
+    X86::LOCK_DEC8m,
+    0,
+    0,
+    X86::LOCK_DEC16m,
+    0,
+    0,
+    X86::LOCK_DEC32m,
+    0,
+    0,
+    X86::LOCK_DEC64m,
+  },
+  {
     X86::LOCK_OR8mi,
     X86::LOCK_OR8mr,
     X86::LOCK_OR16mi8,
@@ -1679,7 +1585,7 @@
     X86::LOCK_OR32mr,
     X86::LOCK_OR64mi8,
     X86::LOCK_OR64mi32,
-    X86::LOCK_OR64mr
+    X86::LOCK_OR64mr,
   },
   {
     X86::LOCK_AND8mi,
@@ -1692,7 +1598,7 @@
     X86::LOCK_AND32mr,
     X86::LOCK_AND64mi8,
     X86::LOCK_AND64mi32,
-    X86::LOCK_AND64mr
+    X86::LOCK_AND64mr,
   },
   {
     X86::LOCK_XOR8mi,
@@ -1705,18 +1611,74 @@
     X86::LOCK_XOR32mr,
     X86::LOCK_XOR64mi8,
     X86::LOCK_XOR64mi32,
-    X86::LOCK_XOR64mr
+    X86::LOCK_XOR64mr,
   }
 };
 
+// Return the target constant operand for atomic-load-op and do simple
+// translations, such as from atomic-load-add to lock-sub. The return value is
+// one of the following 3 cases:
+// + target-constant, the operand could be supported as a target constant.
+// + empty, the operand is not needed any more with the new op selected.
+// + non-empty, otherwise.
+static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
+                                                DebugLoc dl,
+                                                enum AtomicOpc &Op, EVT NVT,
+                                                SDValue Val) {
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val)) {
+    int64_t CNVal = CN->getSExtValue();
+    // Quit if not 32-bit imm.
+    if ((int32_t)CNVal != CNVal)
+      return Val;
+    // For atomic-load-add, we could do some optimizations.
+    if (Op == ADD) {
+      // Translate to INC/DEC if ADD by 1 or -1.
+      if ((CNVal == 1) || (CNVal == -1)) {
+        Op = (CNVal == 1) ? INC : DEC;
+        // No more constant operand after being translated into INC/DEC.
+        return SDValue();
+      }
+      // Translate to SUB if ADD by negative value.
+      if (CNVal < 0) {
+        Op = SUB;
+        CNVal = -CNVal;
+      }
+    }
+    return CurDAG->getTargetConstant(CNVal, NVT);
+  }
+
+  // If the value operand is single-used, try to optimize it.
+  if (Op == ADD && Val.hasOneUse()) {
+    // Translate (atomic-load-add ptr (sub 0 x)) back to (lock-sub x).
+    if (Val.getOpcode() == ISD::SUB && X86::isZeroNode(Val.getOperand(0))) {
+      Op = SUB;
+      return Val.getOperand(1);
+    }
+    // A special case for i16, which needs truncating as, in most cases, it's
+    // promoted to i32. We will translate
+    // (atomic-load-add (truncate (sub 0 x))) to (lock-sub (EXTRACT_SUBREG x))
+    if (Val.getOpcode() == ISD::TRUNCATE && NVT == MVT::i16 &&
+        Val.getOperand(0).getOpcode() == ISD::SUB &&
+        X86::isZeroNode(Val.getOperand(0).getOperand(0))) {
+      Op = SUB;
+      Val = Val.getOperand(0);
+      return CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, NVT,
+                                            Val.getOperand(1));
+    }
+  }
+
+  return Val;
+}
+
 SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
   if (Node->hasAnyUseOfValue(0))
     return 0;
 
+  DebugLoc dl = Node->getDebugLoc();
+
   // Optimize common patterns for __sync_or_and_fetch and similar arith
   // operations where the result is not used. This allows us to use the "lock"
   // version of the arithmetic instruction.
-  // FIXME: Same as for 'add' and 'sub', try to merge those down here.
   SDValue Chain = Node->getOperand(0);
   SDValue Ptr = Node->getOperand(1);
   SDValue Val = Node->getOperand(2);
@@ -1727,6 +1689,8 @@
   // Which index into the table.
   enum AtomicOpc Op;
   switch (Node->getOpcode()) {
+    default:
+      return 0;
     case ISD::ATOMIC_LOAD_OR:
       Op = OR;
       break;
@@ -1736,16 +1700,14 @@
     case ISD::ATOMIC_LOAD_XOR:
       Op = XOR;
       break;
-    default:
-      return 0;
-  }
-
-  bool isCN = false;
-  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
-  if (CN && (int32_t)CN->getSExtValue() == CN->getSExtValue()) {
-    isCN = true;
-    Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT);
+    case ISD::ATOMIC_LOAD_ADD:
+      Op = ADD;
+      break;
   }
+  
+  Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val);
+  bool isUnOp = !Val.getNode();
+  bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant);
 
   unsigned Opc = 0;
   switch (NVT.getSimpleVT().SimpleTy) {
@@ -1787,13 +1749,20 @@
 
   assert(Opc != 0 && "Invalid arith lock transform!");
 
-  DebugLoc dl = Node->getDebugLoc();
+  SDValue Ret;
   SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
                                                  dl, NVT), 0);
   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
-  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
-  SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0);
+  if (isUnOp) {
+    SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
+    Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops,
+                                         array_lengthof(Ops)), 0);
+  } else {
+    SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
+    Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops,
+                                         array_lengthof(Ops)), 0);
+  }
   cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
   SDValue RetVals[] = { Undef, Ret };
   return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
@@ -2089,15 +2058,10 @@
     break;
   }
 
-  case ISD::ATOMIC_LOAD_ADD: {
-    SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT);
-    if (RetVal)
-      return RetVal;
-    break;
-  }
   case ISD::ATOMIC_LOAD_XOR:
   case ISD::ATOMIC_LOAD_AND:
-  case ISD::ATOMIC_LOAD_OR: {
+  case ISD::ATOMIC_LOAD_OR:
+  case ISD::ATOMIC_LOAD_ADD: {
     SDNode *RetVal = SelectAtomicLoadArith(Node, NVT);
     if (RetVal)
       return RetVal;

Modified: llvm/branches/R600/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86ISelLowering.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/R600/lib/Target/X86/X86ISelLowering.cpp Fri Sep 21 12:29:50 2012
@@ -11911,385 +11911,534 @@
 //===----------------------------------------------------------------------===//
 
 // private utility function
+
+// Get CMPXCHG opcode for the specified data type.
+static unsigned getCmpXChgOpcode(EVT VT) {
+  switch (VT.getSimpleVT().SimpleTy) {
+  case MVT::i8:  return X86::LCMPXCHG8;
+  case MVT::i16: return X86::LCMPXCHG16;
+  case MVT::i32: return X86::LCMPXCHG32;
+  case MVT::i64: return X86::LCMPXCHG64;
+  default:
+    break;
+  }
+  llvm_unreachable("Invalid operand size!");
+}
+
+// Get LOAD opcode for the specified data type.
+static unsigned getLoadOpcode(EVT VT) {
+  switch (VT.getSimpleVT().SimpleTy) {
+  case MVT::i8:  return X86::MOV8rm;
+  case MVT::i16: return X86::MOV16rm;
+  case MVT::i32: return X86::MOV32rm;
+  case MVT::i64: return X86::MOV64rm;
+  default:
+    break;
+  }
+  llvm_unreachable("Invalid operand size!");
+}
+
+// Get opcode of the non-atomic one from the specified atomic instruction.
+static unsigned getNonAtomicOpcode(unsigned Opc) {
+  switch (Opc) {
+  case X86::ATOMAND8:  return X86::AND8rr;
+  case X86::ATOMAND16: return X86::AND16rr;
+  case X86::ATOMAND32: return X86::AND32rr;
+  case X86::ATOMAND64: return X86::AND64rr;
+  case X86::ATOMOR8:   return X86::OR8rr;
+  case X86::ATOMOR16:  return X86::OR16rr;
+  case X86::ATOMOR32:  return X86::OR32rr;
+  case X86::ATOMOR64:  return X86::OR64rr;
+  case X86::ATOMXOR8:  return X86::XOR8rr;
+  case X86::ATOMXOR16: return X86::XOR16rr;
+  case X86::ATOMXOR32: return X86::XOR32rr;
+  case X86::ATOMXOR64: return X86::XOR64rr;
+  }
+  llvm_unreachable("Unhandled atomic-load-op opcode!");
+}
+
+// Get opcode of the non-atomic one from the specified atomic instruction with
+// extra opcode.
+static unsigned getNonAtomicOpcodeWithExtraOpc(unsigned Opc,
+                                               unsigned &ExtraOpc) {
+  switch (Opc) {
+  case X86::ATOMNAND8:  ExtraOpc = X86::NOT8r;   return X86::AND8rr;
+  case X86::ATOMNAND16: ExtraOpc = X86::NOT16r;  return X86::AND16rr;
+  case X86::ATOMNAND32: ExtraOpc = X86::NOT32r;  return X86::AND32rr;
+  case X86::ATOMNAND64: ExtraOpc = X86::NOT64r;  return X86::AND64rr;
+  case X86::ATOMMAX8:   ExtraOpc = X86::CMP8rr;  return X86::CMOVL32rr;
+  case X86::ATOMMAX16:  ExtraOpc = X86::CMP16rr; return X86::CMOVL16rr;
+  case X86::ATOMMAX32:  ExtraOpc = X86::CMP32rr; return X86::CMOVL32rr;
+  case X86::ATOMMAX64:  ExtraOpc = X86::CMP64rr; return X86::CMOVL64rr;
+  case X86::ATOMMIN8:   ExtraOpc = X86::CMP8rr;  return X86::CMOVG32rr;
+  case X86::ATOMMIN16:  ExtraOpc = X86::CMP16rr; return X86::CMOVG16rr;
+  case X86::ATOMMIN32:  ExtraOpc = X86::CMP32rr; return X86::CMOVG32rr;
+  case X86::ATOMMIN64:  ExtraOpc = X86::CMP64rr; return X86::CMOVG64rr;
+  case X86::ATOMUMAX8:  ExtraOpc = X86::CMP8rr;  return X86::CMOVB32rr;
+  case X86::ATOMUMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVB16rr;
+  case X86::ATOMUMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVB32rr;
+  case X86::ATOMUMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVB64rr;
+  case X86::ATOMUMIN8:  ExtraOpc = X86::CMP8rr;  return X86::CMOVA32rr;
+  case X86::ATOMUMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVA16rr;
+  case X86::ATOMUMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVA32rr;
+  case X86::ATOMUMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVA64rr;
+  }
+  llvm_unreachable("Unhandled atomic-load-op opcode!");
+}
+
+// Get opcode of the non-atomic one from the specified atomic instruction for
+// 64-bit data type on 32-bit target.
+static unsigned getNonAtomic6432Opcode(unsigned Opc, unsigned &HiOpc) {
+  switch (Opc) {
+  case X86::ATOMAND6432:  HiOpc = X86::AND32rr; return X86::AND32rr;
+  case X86::ATOMOR6432:   HiOpc = X86::OR32rr;  return X86::OR32rr;
+  case X86::ATOMXOR6432:  HiOpc = X86::XOR32rr; return X86::XOR32rr;
+  case X86::ATOMADD6432:  HiOpc = X86::ADC32rr; return X86::ADD32rr;
+  case X86::ATOMSUB6432:  HiOpc = X86::SBB32rr; return X86::SUB32rr;
+  case X86::ATOMSWAP6432: HiOpc = X86::MOV32rr; return X86::MOV32rr;
+  }
+  llvm_unreachable("Unhandled atomic-load-op opcode!");
+}
+
+// Get opcode of the non-atomic one from the specified atomic instruction for
+// 64-bit data type on 32-bit target with extra opcode.
+static unsigned getNonAtomic6432OpcodeWithExtraOpc(unsigned Opc,
+                                                   unsigned &HiOpc,
+                                                   unsigned &ExtraOpc) {
+  switch (Opc) {
+  case X86::ATOMNAND6432:
+    ExtraOpc = X86::NOT32r;
+    HiOpc = X86::AND32rr;
+    return X86::AND32rr;
+  }
+  llvm_unreachable("Unhandled atomic-load-op opcode!");
+}
+
+// Get pseudo CMOV opcode from the specified data type.
+static unsigned getPseudoCMOVOpc(EVT VT) {
+  switch (VT.getSimpleVT().SimpleTy) {
+  case MVT::i8:  return X86::CMOV_GR8;
+  case MVT::i16: return X86::CMOV_GR16;
+  case MVT::i32: return X86::CMOV_GR32;
+  default:
+    break;
+  }
+  llvm_unreachable("Unknown CMOV opcode!");
+}
+
+// EmitAtomicLoadArith - emit the code sequence for pseudo atomic instructions.
+// They will be translated into a spin-loop or compare-exchange loop from
+//
+//    ...
+//    dst = atomic-fetch-op MI.addr, MI.val
+//    ...
+//
+// to
+//
+//    ...
+//    EAX = LOAD MI.addr
+// loop:
+//    t1 = OP MI.val, EAX
+//    LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
+//    JNE loop
+// sink:
+//    dst = EAX
+//    ...
 MachineBasicBlock *
-X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
-                                                       MachineBasicBlock *MBB,
-                                                       unsigned regOpc,
-                                                       unsigned immOpc,
-                                                       unsigned LoadOpc,
-                                                       unsigned CXchgOpc,
-                                                       unsigned notOpc,
-                                                       unsigned EAXreg,
-                                                 const TargetRegisterClass *RC,
-                                                       bool Invert) const {
-  // For the atomic bitwise operator, we generate
-  //   thisMBB:
-  //   newMBB:
-  //     ld  t1 = [bitinstr.addr]
-  //     op  t2 = t1, [bitinstr.val]
-  //     not t3 = t2  (if Invert)
-  //     mov EAX = t1
-  //     lcs dest = [bitinstr.addr], t3  [EAX is implicit]
-  //     bz  newMBB
-  //     fallthrough -->nextMBB
+X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
+                                       MachineBasicBlock *MBB) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
-  MachineFunction::iterator MBBIter = MBB;
-  ++MBBIter;
+  DebugLoc DL = MI->getDebugLoc();
 
-  /// First build the CFG
-  MachineFunction *F = MBB->getParent();
-  MachineBasicBlock *thisMBB = MBB;
-  MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
-  F->insert(MBBIter, newMBB);
-  F->insert(MBBIter, nextMBB);
-
-  // Transfer the remainder of thisMBB and its successor edges to nextMBB.
-  nextMBB->splice(nextMBB->begin(), thisMBB,
-                  llvm::next(MachineBasicBlock::iterator(bInstr)),
-                  thisMBB->end());
-  nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
-
-  // Update thisMBB to fall through to newMBB
-  thisMBB->addSuccessor(newMBB);
-
-  // newMBB jumps to itself and fall through to nextMBB
-  newMBB->addSuccessor(nextMBB);
-  newMBB->addSuccessor(newMBB);
-
-  // Insert instructions into newMBB based on incoming instruction
-  assert(bInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
-         "unexpected number of operands");
-  DebugLoc dl = bInstr->getDebugLoc();
-  MachineOperand& destOper = bInstr->getOperand(0);
-  MachineOperand* argOpers[2 + X86::AddrNumOperands];
-  int numArgs = bInstr->getNumOperands() - 1;
-  for (int i=0; i < numArgs; ++i)
-    argOpers[i] = &bInstr->getOperand(i+1);
-
-  // x86 address has 4 operands: base, index, scale, and displacement
-  int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
-  int valArgIndx = lastAddrIndx + 1;
-
-  unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
-  MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(LoadOpc), t1);
-  for (int i=0; i <= lastAddrIndx; ++i)
-    (*MIB).addOperand(*argOpers[i]);
-
-  unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
-  assert((argOpers[valArgIndx]->isReg() ||
-          argOpers[valArgIndx]->isImm()) &&
-         "invalid operand");
-  if (argOpers[valArgIndx]->isReg())
-    MIB = BuildMI(newMBB, dl, TII->get(regOpc), t2);
-  else
-    MIB = BuildMI(newMBB, dl, TII->get(immOpc), t2);
-  MIB.addReg(t1);
-  (*MIB).addOperand(*argOpers[valArgIndx]);
+  MachineFunction *MF = MBB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
 
-  unsigned t3 = F->getRegInfo().createVirtualRegister(RC);
-  if (Invert) {
-    MIB = BuildMI(newMBB, dl, TII->get(notOpc), t3).addReg(t2);
-  }
-  else
-    t3 = t2;
+  const BasicBlock *BB = MBB->getBasicBlock();
+  MachineFunction::iterator I = MBB;
+  ++I;
 
-  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg);
-  MIB.addReg(t1);
+  assert(MI->getNumOperands() <= X86::AddrNumOperands + 2 &&
+         "Unexpected number of operands");
+
+  assert(MI->hasOneMemOperand() &&
+         "Expected atomic-load-op to have one memoperand");
+
+  // Memory Reference
+  MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+  MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
 
-  MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc));
-  for (int i=0; i <= lastAddrIndx; ++i)
-    (*MIB).addOperand(*argOpers[i]);
-  MIB.addReg(t3);
-  assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
-  (*MIB).setMemRefs(bInstr->memoperands_begin(),
-                    bInstr->memoperands_end());
+  unsigned DstReg, SrcReg;
+  unsigned MemOpndSlot;
 
-  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
-  MIB.addReg(EAXreg);
+  unsigned CurOp = 0;
 
-  // insert branch
-  BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+  DstReg = MI->getOperand(CurOp++).getReg();
+  MemOpndSlot = CurOp;
+  CurOp += X86::AddrNumOperands;
+  SrcReg = MI->getOperand(CurOp++).getReg();
 
-  bInstr->eraseFromParent();   // The pseudo instruction is gone now.
-  return nextMBB;
-}
+  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+  EVT VT = *RC->vt_begin();
+  unsigned AccPhyReg = getX86SubSuperRegister(X86::EAX, VT);
 
-// private utility function:  64 bit atomics on 32 bit host.
-MachineBasicBlock *
-X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
-                                                       MachineBasicBlock *MBB,
-                                                       unsigned regOpcL,
-                                                       unsigned regOpcH,
-                                                       unsigned immOpcL,
-                                                       unsigned immOpcH,
-                                                       bool Invert) const {
-  // For the atomic bitwise operator, we generate
-  //   thisMBB (instructions are in pairs, except cmpxchg8b)
-  //     ld t1,t2 = [bitinstr.addr]
-  //   newMBB:
-  //     out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4)
-  //     op  t5, t6 <- out1, out2, [bitinstr.val]
-  //      (for SWAP, substitute:  mov t5, t6 <- [bitinstr.val])
-  //     neg t7, t8 < t5, t6  (if Invert)
-  //     mov ECX, EBX <- t5, t6
-  //     mov EAX, EDX <- t1, t2
-  //     cmpxchg8b [bitinstr.addr]  [EAX, EDX, EBX, ECX implicit]
-  //     mov t3, t4 <- EAX, EDX
-  //     bz  newMBB
-  //     result in out1, out2
-  //     fallthrough -->nextMBB
+  unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT);
+  unsigned LOADOpc = getLoadOpcode(VT);
 
-  const TargetRegisterClass *RC = &X86::GR32RegClass;
-  const unsigned LoadOpc = X86::MOV32rm;
-  const unsigned NotOpc = X86::NOT32r;
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
-  MachineFunction::iterator MBBIter = MBB;
-  ++MBBIter;
+  // For the atomic load-arith operator, we generate
+  //
+  //  thisMBB:
+  //    EAX = LOAD [MI.addr]
+  //  mainMBB:
+  //    t1 = OP MI.val, EAX
+  //    LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
+  //    JNE mainMBB
+  //  sinkMBB:
 
-  /// First build the CFG
-  MachineFunction *F = MBB->getParent();
   MachineBasicBlock *thisMBB = MBB;
-  MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
-  F->insert(MBBIter, newMBB);
-  F->insert(MBBIter, nextMBB);
-
-  // Transfer the remainder of thisMBB and its successor edges to nextMBB.
-  nextMBB->splice(nextMBB->begin(), thisMBB,
-                  llvm::next(MachineBasicBlock::iterator(bInstr)),
-                  thisMBB->end());
-  nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
-
-  // Update thisMBB to fall through to newMBB
-  thisMBB->addSuccessor(newMBB);
-
-  // newMBB jumps to itself and fall through to nextMBB
-  newMBB->addSuccessor(nextMBB);
-  newMBB->addSuccessor(newMBB);
-
-  DebugLoc dl = bInstr->getDebugLoc();
-  // Insert instructions into newMBB based on incoming instruction
-  // There are 8 "real" operands plus 9 implicit def/uses, ignored here.
-  assert(bInstr->getNumOperands() < X86::AddrNumOperands + 14 &&
-         "unexpected number of operands");
-  MachineOperand& dest1Oper = bInstr->getOperand(0);
-  MachineOperand& dest2Oper = bInstr->getOperand(1);
-  MachineOperand* argOpers[2 + X86::AddrNumOperands];
-  for (int i=0; i < 2 + X86::AddrNumOperands; ++i) {
-    argOpers[i] = &bInstr->getOperand(i+2);
-
-    // We use some of the operands multiple times, so conservatively just
-    // clear any kill flags that might be present.
-    if (argOpers[i]->isReg() && argOpers[i]->isUse())
-      argOpers[i]->setIsKill(false);
-  }
-
-  // x86 address has 5 operands: base, index, scale, displacement, and segment.
-  int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
-
-  unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
-  MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1);
-  for (int i=0; i <= lastAddrIndx; ++i)
-    (*MIB).addOperand(*argOpers[i]);
-  unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
-  MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t2);
-  // add 4 to displacement.
-  for (int i=0; i <= lastAddrIndx-2; ++i)
-    (*MIB).addOperand(*argOpers[i]);
-  MachineOperand newOp3 = *(argOpers[3]);
-  if (newOp3.isImm())
-    newOp3.setImm(newOp3.getImm()+4);
-  else
-    newOp3.setOffset(newOp3.getOffset()+4);
-  (*MIB).addOperand(newOp3);
-  (*MIB).addOperand(*argOpers[lastAddrIndx]);
-
-  // t3/4 are defined later, at the bottom of the loop
-  unsigned t3 = F->getRegInfo().createVirtualRegister(RC);
-  unsigned t4 = F->getRegInfo().createVirtualRegister(RC);
-  BuildMI(newMBB, dl, TII->get(X86::PHI), dest1Oper.getReg())
-    .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB);
-  BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg())
-    .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB);
-
-  // The subsequent operations should be using the destination registers of
-  // the PHI instructions.
-  t1 = dest1Oper.getReg();
-  t2 = dest2Oper.getReg();
-
-  int valArgIndx = lastAddrIndx + 1;
-  assert((argOpers[valArgIndx]->isReg() ||
-          argOpers[valArgIndx]->isImm()) &&
-         "invalid operand");
-  unsigned t5 = F->getRegInfo().createVirtualRegister(RC);
-  unsigned t6 = F->getRegInfo().createVirtualRegister(RC);
-  if (argOpers[valArgIndx]->isReg())
-    MIB = BuildMI(newMBB, dl, TII->get(regOpcL), t5);
-  else
-    MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5);
-  if (regOpcL != X86::MOV32rr)
-    MIB.addReg(t1);
-  (*MIB).addOperand(*argOpers[valArgIndx]);
-  assert(argOpers[valArgIndx + 1]->isReg() ==
-         argOpers[valArgIndx]->isReg());
-  assert(argOpers[valArgIndx + 1]->isImm() ==
-         argOpers[valArgIndx]->isImm());
-  if (argOpers[valArgIndx + 1]->isReg())
-    MIB = BuildMI(newMBB, dl, TII->get(regOpcH), t6);
-  else
-    MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6);
-  if (regOpcH != X86::MOV32rr)
-    MIB.addReg(t2);
-  (*MIB).addOperand(*argOpers[valArgIndx + 1]);
-
-  unsigned t7, t8;
-  if (Invert) {
-    t7 = F->getRegInfo().createVirtualRegister(RC);
-    t8 = F->getRegInfo().createVirtualRegister(RC);
-    MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t7).addReg(t5);
-    MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t8).addReg(t6);
-  } else {
-    t7 = t5;
-    t8 = t6;
+  MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+  MF->insert(I, mainMBB);
+  MF->insert(I, sinkMBB);
+
+  MachineInstrBuilder MIB;
+
+  // Transfer the remainder of BB and its successor edges to sinkMBB.
+  sinkMBB->splice(sinkMBB->begin(), MBB,
+                  llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+  sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+  // thisMBB:
+  MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), AccPhyReg);
+  for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
+    MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+  MIB.setMemRefs(MMOBegin, MMOEnd);
+
+  thisMBB->addSuccessor(mainMBB);
+
+  // mainMBB:
+  MachineBasicBlock *origMainMBB = mainMBB;
+  mainMBB->addLiveIn(AccPhyReg);
+
+  // Copy AccPhyReg as it is used more than once.
+  unsigned AccReg = MRI.createVirtualRegister(RC);
+  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccReg)
+    .addReg(AccPhyReg);
+
+  unsigned t1 = MRI.createVirtualRegister(RC);
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+  default:
+    llvm_unreachable("Unhandled atomic-load-op opcode!");
+  case X86::ATOMAND8:
+  case X86::ATOMAND16:
+  case X86::ATOMAND32:
+  case X86::ATOMAND64:
+  case X86::ATOMOR8:
+  case X86::ATOMOR16:
+  case X86::ATOMOR32:
+  case X86::ATOMOR64:
+  case X86::ATOMXOR8:
+  case X86::ATOMXOR16:
+  case X86::ATOMXOR32:
+  case X86::ATOMXOR64: {
+    unsigned ARITHOpc = getNonAtomicOpcode(Opc);
+    BuildMI(mainMBB, DL, TII->get(ARITHOpc), t1).addReg(SrcReg)
+      .addReg(AccReg);
+    break;
+  }
+  case X86::ATOMNAND8:
+  case X86::ATOMNAND16:
+  case X86::ATOMNAND32:
+  case X86::ATOMNAND64: {
+    unsigned t2 = MRI.createVirtualRegister(RC);
+    unsigned NOTOpc;
+    unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc);
+    BuildMI(mainMBB, DL, TII->get(ANDOpc), t2).addReg(SrcReg)
+      .addReg(AccReg);
+    BuildMI(mainMBB, DL, TII->get(NOTOpc), t1).addReg(t2);
+    break;
+  }
+  case X86::ATOMMAX8:
+  case X86::ATOMMAX16:
+  case X86::ATOMMAX32:
+  case X86::ATOMMAX64:
+  case X86::ATOMMIN8:
+  case X86::ATOMMIN16:
+  case X86::ATOMMIN32:
+  case X86::ATOMMIN64:
+  case X86::ATOMUMAX8:
+  case X86::ATOMUMAX16:
+  case X86::ATOMUMAX32:
+  case X86::ATOMUMAX64:
+  case X86::ATOMUMIN8:
+  case X86::ATOMUMIN16:
+  case X86::ATOMUMIN32:
+  case X86::ATOMUMIN64: {
+    unsigned CMPOpc;
+    unsigned CMOVOpc = getNonAtomicOpcodeWithExtraOpc(Opc, CMPOpc);
+
+    BuildMI(mainMBB, DL, TII->get(CMPOpc))
+      .addReg(SrcReg)
+      .addReg(AccReg);
+
+    if (Subtarget->hasCMov()) {
+      if (VT != MVT::i8) {
+        // Native support
+        BuildMI(mainMBB, DL, TII->get(CMOVOpc), t1)
+          .addReg(SrcReg)
+          .addReg(AccReg);
+      } else {
+        // Promote i8 to i32 to use CMOV32
+        const TargetRegisterClass *RC32 = getRegClassFor(MVT::i32);
+        unsigned SrcReg32 = MRI.createVirtualRegister(RC32);
+        unsigned AccReg32 = MRI.createVirtualRegister(RC32);
+        unsigned t2 = MRI.createVirtualRegister(RC32);
+
+        unsigned Undef = MRI.createVirtualRegister(RC32);
+        BuildMI(mainMBB, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Undef);
+
+        BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), SrcReg32)
+          .addReg(Undef)
+          .addReg(SrcReg)
+          .addImm(X86::sub_8bit);
+        BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), AccReg32)
+          .addReg(Undef)
+          .addReg(AccReg)
+          .addImm(X86::sub_8bit);
+
+        BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2)
+          .addReg(SrcReg32)
+          .addReg(AccReg32);
+
+        BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t1)
+          .addReg(t2, 0, X86::sub_8bit);
+      }
+    } else {
+      // Use pseudo select and lower them.
+      assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
+             "Invalid atomic-load-op transformation!");
+      unsigned SelOpc = getPseudoCMOVOpc(VT);
+      X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc);
+      assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!");
+      MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t1)
+              .addReg(SrcReg).addReg(AccReg)
+              .addImm(CC);
+      mainMBB = EmitLoweredSelect(MIB, mainMBB);
+    }
+    break;
+  }
   }
 
-  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
+  // Copy AccPhyReg back from virtual register.
+  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccPhyReg)
+    .addReg(AccReg);
+
+  MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
+  for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
+    MIB.addOperand(MI->getOperand(MemOpndSlot + i));
   MIB.addReg(t1);
-  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX);
-  MIB.addReg(t2);
+  MIB.setMemRefs(MMOBegin, MMOEnd);
+
+  BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
+
+  mainMBB->addSuccessor(origMainMBB);
+  mainMBB->addSuccessor(sinkMBB);
 
-  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX);
-  MIB.addReg(t7);
-  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX);
-  MIB.addReg(t8);
-
-  MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B));
-  for (int i=0; i <= lastAddrIndx; ++i)
-    (*MIB).addOperand(*argOpers[i]);
-
-  assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
-  (*MIB).setMemRefs(bInstr->memoperands_begin(),
-                    bInstr->memoperands_end());
-
-  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t3);
-  MIB.addReg(X86::EAX);
-  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t4);
-  MIB.addReg(X86::EDX);
+  // sinkMBB:
+  sinkMBB->addLiveIn(AccPhyReg);
 
-  // insert branch
-  BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+          TII->get(TargetOpcode::COPY), DstReg)
+    .addReg(AccPhyReg);
 
-  bInstr->eraseFromParent();   // The pseudo instruction is gone now.
-  return nextMBB;
+  MI->eraseFromParent();
+  return sinkMBB;
 }
 
-// private utility function
+// EmitAtomicLoadArith6432 - emit the code sequence for pseudo atomic
+// instructions. They will be translated into a spin-loop or compare-exchange
+// loop from
+//
+//    ...
+//    dst = atomic-fetch-op MI.addr, MI.val
+//    ...
+//
+// to
+//
+//    ...
+//    EAX = LOAD [MI.addr + 0]
+//    EDX = LOAD [MI.addr + 4]
+// loop:
+//    EBX = OP MI.val.lo, EAX
+//    ECX = OP MI.val.hi, EDX
+//    LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
+//    JNE loop
+// sink:
+//    dst = EDX:EAX
+//    ...
 MachineBasicBlock *
-X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
-                                                      MachineBasicBlock *MBB,
-                                                      unsigned cmovOpc) const {
-  // For the atomic min/max operator, we generate
-  //   thisMBB:
-  //   newMBB:
-  //     ld t1 = [min/max.addr]
-  //     mov t2 = [min/max.val]
-  //     cmp  t1, t2
-  //     cmov[cond] t2 = t1
-  //     mov EAX = t1
-  //     lcs dest = [bitinstr.addr], t2  [EAX is implicit]
-  //     bz   newMBB
-  //     fallthrough -->nextMBB
-  //
+X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
+                                           MachineBasicBlock *MBB) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
-  MachineFunction::iterator MBBIter = MBB;
-  ++MBBIter;
+  DebugLoc DL = MI->getDebugLoc();
+
+  MachineFunction *MF = MBB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  const BasicBlock *BB = MBB->getBasicBlock();
+  MachineFunction::iterator I = MBB;
+  ++I;
+
+  assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 &&
+         "Unexpected number of operands");
+
+  assert(MI->hasOneMemOperand() &&
+         "Expected atomic-load-op32 to have one memoperand");
+
+  // Memory Reference
+  MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+  MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+  unsigned DstLoReg, DstHiReg;
+  unsigned SrcLoReg, SrcHiReg;
+  unsigned MemOpndSlot;
+
+  unsigned CurOp = 0;
+
+  DstLoReg = MI->getOperand(CurOp++).getReg();
+  DstHiReg = MI->getOperand(CurOp++).getReg();
+  MemOpndSlot = CurOp;
+  CurOp += X86::AddrNumOperands;
+  SrcLoReg = MI->getOperand(CurOp++).getReg();
+  SrcHiReg = MI->getOperand(CurOp++).getReg();
+
+  const TargetRegisterClass *RC = &X86::GR32RegClass;
+
+  unsigned LCMPXCHGOpc = X86::LCMPXCHG8B;
+  unsigned LOADOpc = X86::MOV32rm;
+
+  // For the atomic load-arith operator, we generate
+  //
+  //  thisMBB:
+  //    EAX = LOAD [MI.addr + 0]
+  //    EDX = LOAD [MI.addr + 4]
+  //  mainMBB:
+  //    EBX = OP MI.vallo, EAX
+  //    ECX = OP MI.valhi, EDX
+  //    LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
+  //    JNE mainMBB
+  //  sinkMBB:
 
-  /// First build the CFG
-  MachineFunction *F = MBB->getParent();
   MachineBasicBlock *thisMBB = MBB;
-  MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
-  F->insert(MBBIter, newMBB);
-  F->insert(MBBIter, nextMBB);
-
-  // Transfer the remainder of thisMBB and its successor edges to nextMBB.
-  nextMBB->splice(nextMBB->begin(), thisMBB,
-                  llvm::next(MachineBasicBlock::iterator(mInstr)),
-                  thisMBB->end());
-  nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
-
-  // Update thisMBB to fall through to newMBB
-  thisMBB->addSuccessor(newMBB);
-
-  // newMBB jumps to newMBB and fall through to nextMBB
-  newMBB->addSuccessor(nextMBB);
-  newMBB->addSuccessor(newMBB);
-
-  DebugLoc dl = mInstr->getDebugLoc();
-  // Insert instructions into newMBB based on incoming instruction
-  assert(mInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
-         "unexpected number of operands");
-  MachineOperand& destOper = mInstr->getOperand(0);
-  MachineOperand* argOpers[2 + X86::AddrNumOperands];
-  int numArgs = mInstr->getNumOperands() - 1;
-  for (int i=0; i < numArgs; ++i)
-    argOpers[i] = &mInstr->getOperand(i+1);
-
-  // x86 address has 4 operands: base, index, scale, and displacement
-  int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
-  int valArgIndx = lastAddrIndx + 1;
-
-  unsigned t1 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
-  MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rm), t1);
-  for (int i=0; i <= lastAddrIndx; ++i)
-    (*MIB).addOperand(*argOpers[i]);
-
-  // We only support register and immediate values
-  assert((argOpers[valArgIndx]->isReg() ||
-          argOpers[valArgIndx]->isImm()) &&
-         "invalid operand");
-
-  unsigned t2 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
-  if (argOpers[valArgIndx]->isReg())
-    MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2);
-  else
-    MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
-  (*MIB).addOperand(*argOpers[valArgIndx]);
+  MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+  MF->insert(I, mainMBB);
+  MF->insert(I, sinkMBB);
 
-  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
-  MIB.addReg(t1);
+  MachineInstrBuilder MIB;
 
-  MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr));
-  MIB.addReg(t1);
-  MIB.addReg(t2);
+  // Transfer the remainder of BB and its successor edges to sinkMBB.
+  sinkMBB->splice(sinkMBB->begin(), MBB,
+                  llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+  sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+  // thisMBB:
+  // Lo
+  MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EAX);
+  for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
+    MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+  MIB.setMemRefs(MMOBegin, MMOEnd);
+  // Hi
+  MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EDX);
+  for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+    if (i == X86::AddrDisp)
+      MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32)
+    else
+      MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+  }
+  MIB.setMemRefs(MMOBegin, MMOEnd);
 
-  // Generate movc
-  unsigned t3 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
-  MIB = BuildMI(newMBB, dl, TII->get(cmovOpc),t3);
-  MIB.addReg(t2);
-  MIB.addReg(t1);
+  thisMBB->addSuccessor(mainMBB);
 
-  // Cmp and exchange if none has modified the memory location
-  MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG32));
-  for (int i=0; i <= lastAddrIndx; ++i)
-    (*MIB).addOperand(*argOpers[i]);
-  MIB.addReg(t3);
-  assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand");
-  (*MIB).setMemRefs(mInstr->memoperands_begin(),
-                    mInstr->memoperands_end());
+  // mainMBB:
+  MachineBasicBlock *origMainMBB = mainMBB;
+  mainMBB->addLiveIn(X86::EAX);
+  mainMBB->addLiveIn(X86::EDX);
 
-  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
-  MIB.addReg(X86::EAX);
+  // Copy EDX:EAX as they are used more than once.
+  unsigned LoReg = MRI.createVirtualRegister(RC);
+  unsigned HiReg = MRI.createVirtualRegister(RC);
+  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), LoReg).addReg(X86::EAX);
+  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), HiReg).addReg(X86::EDX);
 
-  // insert branch
-  BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+  unsigned t1L = MRI.createVirtualRegister(RC);
+  unsigned t1H = MRI.createVirtualRegister(RC);
 
-  mInstr->eraseFromParent();   // The pseudo instruction is gone now.
-  return nextMBB;
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+  default:
+    llvm_unreachable("Unhandled atomic-load-op6432 opcode!");
+  case X86::ATOMAND6432:
+  case X86::ATOMOR6432:
+  case X86::ATOMXOR6432:
+  case X86::ATOMADD6432:
+  case X86::ATOMSUB6432: {
+    unsigned HiOpc;
+    unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
+    BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(SrcLoReg).addReg(LoReg);
+    BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(SrcHiReg).addReg(HiReg);
+    break;
+  }
+  case X86::ATOMNAND6432: {
+    unsigned HiOpc, NOTOpc;
+    unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc);
+    unsigned t2L = MRI.createVirtualRegister(RC);
+    unsigned t2H = MRI.createVirtualRegister(RC);
+    BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg).addReg(LoReg);
+    BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg).addReg(HiReg);
+    BuildMI(mainMBB, DL, TII->get(NOTOpc), t1L).addReg(t2L);
+    BuildMI(mainMBB, DL, TII->get(NOTOpc), t1H).addReg(t2H);
+    break;
+  }
+  case X86::ATOMSWAP6432: {
+    unsigned HiOpc;
+    unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
+    BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(SrcLoReg);
+    BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(SrcHiReg);
+    break;
+  }
+  }
+
+  // Copy EDX:EAX back from HiReg:LoReg
+  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(LoReg);
+  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(HiReg);
+  // Copy ECX:EBX from t1H:t1L
+  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t1L);
+  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t1H);
+
+  MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
+  for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
+    MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+  MIB.setMemRefs(MMOBegin, MMOEnd);
+
+  BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
+
+  mainMBB->addSuccessor(origMainMBB);
+  mainMBB->addSuccessor(sinkMBB);
+
+  // sinkMBB:
+  sinkMBB->addLiveIn(X86::EAX);
+  sinkMBB->addLiveIn(X86::EDX);
+
+  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+          TII->get(TargetOpcode::COPY), DstLoReg)
+    .addReg(X86::EAX);
+  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+          TII->get(TargetOpcode::COPY), DstHiReg)
+    .addReg(X86::EDX);
+
+  MI->eraseFromParent();
+  return sinkMBB;
 }
 
 // FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
@@ -13176,130 +13325,46 @@
     return EmitMonitor(MI, BB);
 
     // Atomic Lowering.
-  case X86::ATOMMIN32:
-  case X86::ATOMMAX32:
-  case X86::ATOMUMIN32:
-  case X86::ATOMUMAX32:
-  case X86::ATOMMIN16:
-  case X86::ATOMMAX16:
-  case X86::ATOMUMIN16:
-  case X86::ATOMUMAX16:
-  case X86::ATOMMIN64:
-  case X86::ATOMMAX64:
-  case X86::ATOMUMIN64:
-  case X86::ATOMUMAX64: {
-    unsigned Opc;
-    switch (MI->getOpcode()) {
-    default: llvm_unreachable("illegal opcode!");
-    case X86::ATOMMIN32:  Opc = X86::CMOVL32rr; break;
-    case X86::ATOMMAX32:  Opc = X86::CMOVG32rr; break;
-    case X86::ATOMUMIN32: Opc = X86::CMOVB32rr; break;
-    case X86::ATOMUMAX32: Opc = X86::CMOVA32rr; break;
-    case X86::ATOMMIN16:  Opc = X86::CMOVL16rr; break;
-    case X86::ATOMMAX16:  Opc = X86::CMOVG16rr; break;
-    case X86::ATOMUMIN16: Opc = X86::CMOVB16rr; break;
-    case X86::ATOMUMAX16: Opc = X86::CMOVA16rr; break;
-    case X86::ATOMMIN64:  Opc = X86::CMOVL64rr; break;
-    case X86::ATOMMAX64:  Opc = X86::CMOVG64rr; break;
-    case X86::ATOMUMIN64: Opc = X86::CMOVB64rr; break;
-    case X86::ATOMUMAX64: Opc = X86::CMOVA64rr; break;
-    // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
-    }
-    return EmitAtomicMinMaxWithCustomInserter(MI, BB, Opc);
-  }
-
-  case X86::ATOMAND32:
-  case X86::ATOMOR32:
-  case X86::ATOMXOR32:
-  case X86::ATOMNAND32: {
-    bool Invert = false;
-    unsigned RegOpc, ImmOpc;
-    switch (MI->getOpcode()) {
-    default: llvm_unreachable("illegal opcode!");
-    case X86::ATOMAND32:
-      RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; break;
-    case X86::ATOMOR32:
-      RegOpc = X86::OR32rr;  ImmOpc = X86::OR32ri; break;
-    case X86::ATOMXOR32:
-      RegOpc = X86::XOR32rr; ImmOpc = X86::XOR32ri; break;
-    case X86::ATOMNAND32:
-      RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; Invert = true; break;
-    }
-    return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
-                                               X86::MOV32rm, X86::LCMPXCHG32,
-                                               X86::NOT32r, X86::EAX,
-                                               &X86::GR32RegClass, Invert);
-  }
-
+  case X86::ATOMAND8:
   case X86::ATOMAND16:
+  case X86::ATOMAND32:
+  case X86::ATOMAND64:
+    // Fall through
+  case X86::ATOMOR8:
   case X86::ATOMOR16:
+  case X86::ATOMOR32:
+  case X86::ATOMOR64:
+    // Fall through
   case X86::ATOMXOR16:
-  case X86::ATOMNAND16: {
-    bool Invert = false;
-    unsigned RegOpc, ImmOpc;
-    switch (MI->getOpcode()) {
-    default: llvm_unreachable("illegal opcode!");
-    case X86::ATOMAND16:
-      RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; break;
-    case X86::ATOMOR16:
-      RegOpc = X86::OR16rr;  ImmOpc = X86::OR16ri; break;
-    case X86::ATOMXOR16:
-      RegOpc = X86::XOR16rr; ImmOpc = X86::XOR16ri; break;
-    case X86::ATOMNAND16:
-      RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; Invert = true; break;
-    }
-    return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
-                                               X86::MOV16rm, X86::LCMPXCHG16,
-                                               X86::NOT16r, X86::AX,
-                                               &X86::GR16RegClass, Invert);
-  }
-
-  case X86::ATOMAND8:
-  case X86::ATOMOR8:
   case X86::ATOMXOR8:
-  case X86::ATOMNAND8: {
-    bool Invert = false;
-    unsigned RegOpc, ImmOpc;
-    switch (MI->getOpcode()) {
-    default: llvm_unreachable("illegal opcode!");
-    case X86::ATOMAND8:
-      RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; break;
-    case X86::ATOMOR8:
-      RegOpc = X86::OR8rr;  ImmOpc = X86::OR8ri; break;
-    case X86::ATOMXOR8:
-      RegOpc = X86::XOR8rr; ImmOpc = X86::XOR8ri; break;
-    case X86::ATOMNAND8:
-      RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; Invert = true; break;
-    }
-    return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
-                                               X86::MOV8rm, X86::LCMPXCHG8,
-                                               X86::NOT8r, X86::AL,
-                                               &X86::GR8RegClass, Invert);
-  }
-
-  // This group is for 64-bit host.
-  case X86::ATOMAND64:
-  case X86::ATOMOR64:
+  case X86::ATOMXOR32:
   case X86::ATOMXOR64:
-  case X86::ATOMNAND64: {
-    bool Invert = false;
-    unsigned RegOpc, ImmOpc;
-    switch (MI->getOpcode()) {
-    default: llvm_unreachable("illegal opcode!");
-    case X86::ATOMAND64:
-      RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; break;
-    case X86::ATOMOR64:
-      RegOpc = X86::OR64rr;  ImmOpc = X86::OR64ri32; break;
-    case X86::ATOMXOR64:
-      RegOpc = X86::XOR64rr; ImmOpc = X86::XOR64ri32; break;
-    case X86::ATOMNAND64:
-      RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; Invert = true; break;
-    }
-    return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
-                                               X86::MOV64rm, X86::LCMPXCHG64,
-                                               X86::NOT64r, X86::RAX,
-                                               &X86::GR64RegClass, Invert);
-  }
+    // Fall through
+  case X86::ATOMNAND8:
+  case X86::ATOMNAND16:
+  case X86::ATOMNAND32:
+  case X86::ATOMNAND64:
+    // Fall through
+  case X86::ATOMMAX8:
+  case X86::ATOMMAX16:
+  case X86::ATOMMAX32:
+  case X86::ATOMMAX64:
+    // Fall through
+  case X86::ATOMMIN8:
+  case X86::ATOMMIN16:
+  case X86::ATOMMIN32:
+  case X86::ATOMMIN64:
+    // Fall through
+  case X86::ATOMUMAX8:
+  case X86::ATOMUMAX16:
+  case X86::ATOMUMAX32:
+  case X86::ATOMUMAX64:
+    // Fall through
+  case X86::ATOMUMIN8:
+  case X86::ATOMUMIN16:
+  case X86::ATOMUMIN32:
+  case X86::ATOMUMIN64:
+    return EmitAtomicLoadArith(MI, BB);
 
   // This group does 64-bit operations on a 32-bit host.
   case X86::ATOMAND6432:
@@ -13308,44 +13373,8 @@
   case X86::ATOMNAND6432:
   case X86::ATOMADD6432:
   case X86::ATOMSUB6432:
-  case X86::ATOMSWAP6432: {
-    bool Invert = false;
-    unsigned RegOpcL, RegOpcH, ImmOpcL, ImmOpcH;
-    switch (MI->getOpcode()) {
-    default: llvm_unreachable("illegal opcode!");
-    case X86::ATOMAND6432:
-      RegOpcL = RegOpcH = X86::AND32rr;
-      ImmOpcL = ImmOpcH = X86::AND32ri;
-      break;
-    case X86::ATOMOR6432:
-      RegOpcL = RegOpcH = X86::OR32rr;
-      ImmOpcL = ImmOpcH = X86::OR32ri;
-      break;
-    case X86::ATOMXOR6432:
-      RegOpcL = RegOpcH = X86::XOR32rr;
-      ImmOpcL = ImmOpcH = X86::XOR32ri;
-      break;
-    case X86::ATOMNAND6432:
-      RegOpcL = RegOpcH = X86::AND32rr;
-      ImmOpcL = ImmOpcH = X86::AND32ri;
-      Invert = true;
-      break;
-    case X86::ATOMADD6432:
-      RegOpcL = X86::ADD32rr; RegOpcH = X86::ADC32rr;
-      ImmOpcL = X86::ADD32ri; ImmOpcH = X86::ADC32ri;
-      break;
-    case X86::ATOMSUB6432:
-      RegOpcL = X86::SUB32rr; RegOpcH = X86::SBB32rr;
-      ImmOpcL = X86::SUB32ri; ImmOpcH = X86::SBB32ri;
-      break;
-    case X86::ATOMSWAP6432:
-      RegOpcL = RegOpcH = X86::MOV32rr;
-      ImmOpcL = ImmOpcH = X86::MOV32ri;
-      break;
-    }
-    return EmitAtomicBit6432WithCustomInserter(MI, BB, RegOpcL, RegOpcH,
-                                               ImmOpcL, ImmOpcH, Invert);
-  }
+  case X86::ATOMSWAP6432:
+    return EmitAtomicLoadArith6432(MI, BB);
 
   case X86::VASTART_SAVE_XMM_REGS:
     return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);

Modified: llvm/branches/R600/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86ISelLowering.h?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/branches/R600/lib/Target/X86/X86ISelLowering.h Fri Sep 21 12:29:50 2012
@@ -861,36 +861,17 @@
                                    MachineBasicBlock *BB) const;
     MachineBasicBlock *EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const;
 
-    /// Utility function to emit atomic bitwise operations (and, or, xor).
-    /// It takes the bitwise instruction to expand, the associated machine basic
-    /// block, and the associated X86 opcodes for reg/reg and reg/imm.
-    MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter(
-                                                    MachineInstr *BInstr,
-                                                    MachineBasicBlock *BB,
-                                                    unsigned regOpc,
-                                                    unsigned immOpc,
-                                                    unsigned loadOpc,
-                                                    unsigned cxchgOpc,
-                                                    unsigned notOpc,
-                                                    unsigned EAXreg,
-                                              const TargetRegisterClass *RC,
-                                                    bool Invert = false) const;
-
-    MachineBasicBlock *EmitAtomicBit6432WithCustomInserter(
-                                                    MachineInstr *BInstr,
-                                                    MachineBasicBlock *BB,
-                                                    unsigned regOpcL,
-                                                    unsigned regOpcH,
-                                                    unsigned immOpcL,
-                                                    unsigned immOpcH,
-                                                    bool Invert = false) const;
-
-    /// Utility function to emit atomic min and max.  It takes the min/max
-    /// instruction to expand, the associated basic block, and the associated
-    /// cmov opcode for moving the min or max value.
-    MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr,
-                                                          MachineBasicBlock *BB,
-                                                        unsigned cmovOpc) const;
+    /// Utility function to emit atomic-load-arith operations (and, or, xor,
+    /// nand, max, min, umax, umin). It takes the corresponding instruction to
+    /// expand, the associated machine basic block, and the associated X86
+    /// opcodes for reg/reg.
+    MachineBasicBlock *EmitAtomicLoadArith(MachineInstr *MI,
+                                           MachineBasicBlock *MBB) const;
+
+    /// Utility function to emit atomic-load-arith operations (and, or, xor,
+    /// nand, add, sub, swap) for 64-bit operands on 32-bit target.
+    MachineBasicBlock *EmitAtomicLoadArith6432(MachineInstr *MI,
+                                               MachineBasicBlock *MBB) const;
 
     // Utility function to emit the low-level va_arg code for X86-64.
     MachineBasicBlock *EmitVAARG64WithCustomInserter(

Modified: llvm/branches/R600/lib/Target/X86/X86InstrCompiler.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86InstrCompiler.td?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86InstrCompiler.td (original)
+++ llvm/branches/R600/lib/Target/X86/X86InstrCompiler.td Fri Sep 21 12:29:50 2012
@@ -482,130 +482,74 @@
 // Atomic Instruction Pseudo Instructions
 //===----------------------------------------------------------------------===//
 
-// Atomic exchange, and, or, xor
-let Constraints = "$val = $dst", Defs = [EFLAGS],
-                  usesCustomInserter = 1 in {
+// Pseudo atomic instructions
+
+multiclass PSEUDO_ATOMIC_LOAD_BINOP<string mnemonic> {
+  let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in {
+    def #NAME#8  : I<0, Pseudo, (outs GR8:$dst),
+                     (ins i8mem:$ptr, GR8:$val),
+                     !strconcat(mnemonic, "8 PSEUDO!"), []>;
+    def #NAME#16 : I<0, Pseudo,(outs GR16:$dst),
+                     (ins i16mem:$ptr, GR16:$val),
+                     !strconcat(mnemonic, "16 PSEUDO!"), []>;
+    def #NAME#32 : I<0, Pseudo, (outs GR32:$dst),
+                     (ins i32mem:$ptr, GR32:$val),
+                     !strconcat(mnemonic, "32 PSEUDO!"), []>;
+    def #NAME#64 : I<0, Pseudo, (outs GR64:$dst),
+                     (ins i64mem:$ptr, GR64:$val),
+                     !strconcat(mnemonic, "64 PSEUDO!"), []>;
+  }
+}
 
-def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
-               "#ATOMAND8 PSEUDO!",
-               [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>;
-def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
-               "#ATOMOR8 PSEUDO!",
-               [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>;
-def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
-               "#ATOMXOR8 PSEUDO!",
-               [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>;
-def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
-               "#ATOMNAND8 PSEUDO!",
-               [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>;
-
-def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
-               "#ATOMAND16 PSEUDO!",
-               [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>;
-def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
-               "#ATOMOR16 PSEUDO!",
-               [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>;
-def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
-               "#ATOMXOR16 PSEUDO!",
-               [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>;
-def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
-               "#ATOMNAND16 PSEUDO!",
-               [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>;
-def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
-               "#ATOMMIN16 PSEUDO!",
-               [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>;
-def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
-               "#ATOMMAX16 PSEUDO!",
-               [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>;
-def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
-               "#ATOMUMIN16 PSEUDO!",
-               [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>;
-def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
-               "#ATOMUMAX16 PSEUDO!",
-               [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>;
-
-
-def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
-               "#ATOMAND32 PSEUDO!",
-               [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>;
-def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
-               "#ATOMOR32 PSEUDO!",
-               [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>;
-def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
-               "#ATOMXOR32 PSEUDO!",
-               [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>;
-def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
-               "#ATOMNAND32 PSEUDO!",
-               [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>;
-def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
-               "#ATOMMIN32 PSEUDO!",
-               [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>;
-def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
-               "#ATOMMAX32 PSEUDO!",
-               [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>;
-def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
-               "#ATOMUMIN32 PSEUDO!",
-               [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>;
-def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
-               "#ATOMUMAX32 PSEUDO!",
-               [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>;
-
-
-
-def ATOMAND64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
-               "#ATOMAND64 PSEUDO!",
-               [(set GR64:$dst, (atomic_load_and_64 addr:$ptr, GR64:$val))]>;
-def ATOMOR64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
-               "#ATOMOR64 PSEUDO!",
-               [(set GR64:$dst, (atomic_load_or_64 addr:$ptr, GR64:$val))]>;
-def ATOMXOR64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
-               "#ATOMXOR64 PSEUDO!",
-               [(set GR64:$dst, (atomic_load_xor_64 addr:$ptr, GR64:$val))]>;
-def ATOMNAND64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
-               "#ATOMNAND64 PSEUDO!",
-               [(set GR64:$dst, (atomic_load_nand_64 addr:$ptr, GR64:$val))]>;
-def ATOMMIN64: I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val),
-               "#ATOMMIN64 PSEUDO!",
-               [(set GR64:$dst, (atomic_load_min_64 addr:$ptr, GR64:$val))]>;
-def ATOMMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
-               "#ATOMMAX64 PSEUDO!",
-               [(set GR64:$dst, (atomic_load_max_64 addr:$ptr, GR64:$val))]>;
-def ATOMUMIN64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
-               "#ATOMUMIN64 PSEUDO!",
-               [(set GR64:$dst, (atomic_load_umin_64 addr:$ptr, GR64:$val))]>;
-def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
-               "#ATOMUMAX64 PSEUDO!",
-               [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>;
+multiclass PSEUDO_ATOMIC_LOAD_BINOP_PATS<string name, string frag> {
+  def : Pat<(!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val),
+            (!cast<Instruction>(name # "8") addr:$ptr, GR8:$val)>;
+  def : Pat<(!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val),
+            (!cast<Instruction>(name # "16") addr:$ptr, GR16:$val)>;
+  def : Pat<(!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val),
+            (!cast<Instruction>(name # "32") addr:$ptr, GR32:$val)>;
+  def : Pat<(!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val),
+            (!cast<Instruction>(name # "64") addr:$ptr, GR64:$val)>;
 }
 
-let Constraints = "$val1 = $dst1, $val2 = $dst2",
-                  Defs = [EFLAGS, EAX, EBX, ECX, EDX],
-                  Uses = [EAX, EBX, ECX, EDX],
-                  mayLoad = 1, mayStore = 1,
-                  usesCustomInserter = 1 in {
-def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
-                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
-               "#ATOMAND6432 PSEUDO!", []>;
-def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
-                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
-               "#ATOMOR6432 PSEUDO!", []>;
-def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
-                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
-               "#ATOMXOR6432 PSEUDO!", []>;
-def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
-                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
-               "#ATOMNAND6432 PSEUDO!", []>;
-def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
-                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
-               "#ATOMADD6432 PSEUDO!", []>;
-def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
-                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
-               "#ATOMSUB6432 PSEUDO!", []>;
-def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
-                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
-               "#ATOMSWAP6432 PSEUDO!", []>;
+// Atomic exchange, and, or, xor
+defm ATOMAND  : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMAND">;
+defm ATOMOR   : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMOR">;
+defm ATOMXOR  : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMXOR">;
+defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMNAND">;
+defm ATOMMAX  : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMAX">;
+defm ATOMMIN  : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMIN">;
+defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMAX">;
+defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMIN">;
+
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMAND",  "atomic_load_and">;
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMOR",   "atomic_load_or">;
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMXOR",  "atomic_load_xor">;
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMNAND", "atomic_load_nand">;
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMAX",  "atomic_load_max">;
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMIN",  "atomic_load_min">;
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMAX", "atomic_load_umax">;
+defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">;
+
+multiclass PSEUDO_ATOMIC_LOAD_BINOP6432<string mnemonic> {
+  let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in
+    def #NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+                       (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+                       !strconcat(mnemonic, "6432 PSEUDO!"), []>;
 }
 
+defm ATOMAND  : PSEUDO_ATOMIC_LOAD_BINOP6432<"ATOMAND">;
+defm ATOMOR   : PSEUDO_ATOMIC_LOAD_BINOP6432<"ATOMOR">;
+defm ATOMXOR  : PSEUDO_ATOMIC_LOAD_BINOP6432<"ATOMXOR">;
+defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"ATOMNAND">;
+defm ATOMADD  : PSEUDO_ATOMIC_LOAD_BINOP6432<"ATOMADD">;
+defm ATOMSUB  : PSEUDO_ATOMIC_LOAD_BINOP6432<"ATOMSUB">;
+defm ATOMMAX  : PSEUDO_ATOMIC_LOAD_BINOP6432<"ATOMMAX">;
+defm ATOMMIN  : PSEUDO_ATOMIC_LOAD_BINOP6432<"ATOMMIN">;
+defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"ATOMUMAX">;
+defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"ATOMUMIN">;
+defm ATOMSWAP : PSEUDO_ATOMIC_LOAD_BINOP6432<"ATOMSWAP">;
+
 //===----------------------------------------------------------------------===//
 // Normal-Instructions-With-Lock-Prefix Pseudo Instructions
 //===----------------------------------------------------------------------===//
@@ -717,107 +661,125 @@
 defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">;
 
 // Optimized codegen when the non-memory output is not used.
+multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,
+                          string mnemonic> {
 let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
 
-def LOCK_INC8m  : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
-                    "lock\n\t"
-                    "inc{b}\t$dst", [], IIC_UNARY_MEM>, LOCK;
-def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
-                    "lock\n\t"
-                    "inc{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK;
-def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
-                    "lock\n\t"
-                    "inc{l}\t$dst", [], IIC_UNARY_MEM>, LOCK;
-def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
-                     "lock\n\t"
-                     "inc{q}\t$dst", [], IIC_UNARY_MEM>, LOCK;
-
-def LOCK_DEC8m  : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
-                    "lock\n\t"
-                    "dec{b}\t$dst", [], IIC_UNARY_MEM>, LOCK;
-def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
-                    "lock\n\t"
-                    "dec{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK;
-def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
-                    "lock\n\t"
-                    "dec{l}\t$dst", [], IIC_UNARY_MEM>, LOCK;
-def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
-                      "lock\n\t"
-                      "dec{q}\t$dst", [], IIC_UNARY_MEM>, LOCK;
+def #NAME#8m  : I<Opc8, Form, (outs), (ins i8mem :$dst),
+                  !strconcat("lock\n\t", mnemonic, "{b}\t$dst"),
+                  [], IIC_UNARY_MEM>, LOCK;
+def #NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),
+                  !strconcat("lock\n\t", mnemonic, "{w}\t$dst"),
+                  [], IIC_UNARY_MEM>, OpSize, LOCK;
+def #NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),
+                  !strconcat("lock\n\t", mnemonic, "{l}\t$dst"),
+                  [], IIC_UNARY_MEM>, LOCK;
+def #NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
+                   !strconcat("lock\n\t", mnemonic, "{q}\t$dst"),
+                   [], IIC_UNARY_MEM>, LOCK;
+}
 }
 
-// Atomic compare and swap.
-let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
-    isCodeGenOnly = 1 in
-def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
-               "lock\n\t"
-               "cmpxchg8b\t$ptr",
-               [(X86cas8 addr:$ptr)], IIC_CMPX_LOCK_8B>, TB, LOCK;
+defm LOCK_INC    : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "inc">;
+defm LOCK_DEC    : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "dec">;
 
-let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
-    isCodeGenOnly = 1 in
-def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr),
-                    "lock\n\t"
-                    "cmpxchg16b\t$ptr",
-                    [(X86cas16 addr:$ptr)], IIC_CMPX_LOCK_16B>, TB, LOCK,
-                    Requires<[HasCmpxchg16b]>;
-
-let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in {
-def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
-               "lock\n\t"
-               "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
-               [(X86cas addr:$ptr, GR8:$swap, 1)], IIC_CMPX_LOCK_8>, TB, LOCK;
+// Atomic compare and swap.
+multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
+                         SDPatternOperator frag, X86MemOperand x86memop,
+                         InstrItinClass itin> {
+let isCodeGenOnly = 1 in {
+  def #NAME# : I<Opc, Form, (outs), (ins x86memop:$ptr),
+                 !strconcat("lock\n\t", mnemonic, "\t$ptr"),
+                 [(frag addr:$ptr)], itin>, TB, LOCK;
+}
 }
 
-let Defs = [AX, EFLAGS], Uses = [AX], isCodeGenOnly = 1 in {
-def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap),
-               "lock\n\t"
-               "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}",
-               [(X86cas addr:$ptr, GR16:$swap, 2)], IIC_CMPX_LOCK>, TB, OpSize, LOCK;
+multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
+                          string mnemonic, SDPatternOperator frag,
+                          InstrItinClass itin8, InstrItinClass itin> {
+let isCodeGenOnly = 1 in {
+  let Defs = [AL, EFLAGS], Uses = [AL] in
+  def #NAME#8  : I<Opc, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
+                   !strconcat("lock\n\t", mnemonic,
+                              "{b}\t{$swap, $ptr|$ptr, $swap}"),
+                   [(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK;
+  let Defs = [AX, EFLAGS], Uses = [AX] in
+  def #NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap),
+                   !strconcat("lock\n\t", mnemonic,
+                              "{w}\t{$swap, $ptr|$ptr, $swap}"),
+                   [(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize, LOCK;
+  let Defs = [EAX, EFLAGS], Uses = [EAX] in
+  def #NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap),
+                   !strconcat("lock\n\t", mnemonic,
+                              "{l}\t{$swap, $ptr|$ptr, $swap}"),
+                   [(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, LOCK;
+  let Defs = [RAX, EFLAGS], Uses = [RAX] in
+  def #NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap),
+                    !strconcat("lock\n\t", mnemonic,
+                               "{q}\t{$swap, $ptr|$ptr, $swap}"),
+                    [(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK;
+}
 }
 
-let Defs = [EAX, EFLAGS], Uses = [EAX], isCodeGenOnly = 1 in {
-def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
-               "lock\n\t"
-               "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}",
-               [(X86cas addr:$ptr, GR32:$swap, 4)], IIC_CMPX_LOCK>, TB, LOCK;
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in {
+defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b",
+                                X86cas8, i64mem,
+                                IIC_CMPX_LOCK_8B>;
 }
 
-let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in {
-def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
-               "lock\n\t"
-               "cmpxchg{q}\t{$swap, $ptr|$ptr, $swap}",
-               [(X86cas addr:$ptr, GR64:$swap, 8)], IIC_CMPX_LOCK>, TB, LOCK;
+let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
+    Predicates = [HasCmpxchg16b] in {
+defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b",
+                                 X86cas16, i128mem,
+                                 IIC_CMPX_LOCK_16B>, REX_W;
 }
 
+defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg",
+                               X86cas, IIC_CMPX_LOCK_8, IIC_CMPX_LOCK>;
+
 // Atomic exchange and add
-let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in {
-def LXADD8  : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
-               "lock\n\t"
-               "xadd{b}\t{$val, $ptr|$ptr, $val}",
-               [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))],
-                IIC_XADD_LOCK_MEM8>,
-                TB, LOCK;
-def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr),
-               "lock\n\t"
-               "xadd{w}\t{$val, $ptr|$ptr, $val}",
-               [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))],
-                IIC_XADD_LOCK_MEM>,
-                TB, OpSize, LOCK;
-def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr),
-               "lock\n\t"
-               "xadd{l}\t{$val, $ptr|$ptr, $val}",
-               [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))],
-                IIC_XADD_LOCK_MEM>,
-                TB, LOCK;
-def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr),
-               "lock\n\t"
-               "xadd{q}\t{$val, $ptr|$ptr, $val}",
-               [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))],
-                IIC_XADD_LOCK_MEM>,
-                TB, LOCK;
+multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic,
+                             string frag,
+                             InstrItinClass itin8, InstrItinClass itin> {
+  let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in {
+    def #NAME#8  : I<opc8, MRMSrcMem, (outs GR8:$dst),
+                     (ins GR8:$val, i8mem:$ptr),
+                     !strconcat("lock\n\t", mnemonic,
+                                "{b}\t{$val, $ptr|$ptr, $val}"),
+                     [(set GR8:$dst,
+                           (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
+                     itin8>;
+    def #NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst),
+                     (ins GR16:$val, i16mem:$ptr),
+                     !strconcat("lock\n\t", mnemonic,
+                                "{w}\t{$val, $ptr|$ptr, $val}"),
+                     [(set
+                        GR16:$dst,
+                        (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
+                     itin>;
+    def #NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst),
+                     (ins GR32:$val, i32mem:$ptr),
+                     !strconcat("lock\n\t", mnemonic,
+                                "{l}\t{$val, $ptr|$ptr, $val}"),
+                     [(set
+                        GR32:$dst,
+                        (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
+                     itin>;
+    def #NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),
+                      (ins GR64:$val, i64mem:$ptr),
+                      !strconcat("lock\n\t", mnemonic,
+                                 "{q}\t{$val, $ptr|$ptr, $val}"),
+                      [(set
+                         GR64:$dst,
+                         (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
+                      itin>;
+  }
 }
 
+defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add",
+                               IIC_XADD_LOCK_MEM8, IIC_XADD_LOCK_MEM>,
+             TB, LOCK;
+
 def ACQUIRE_MOV8rm  : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),
                       "#ACQUIRE_MOV PSEUDO!",
                       [(set GR8:$dst,  (atomic_load_8  addr:$src))]>;

Modified: llvm/branches/R600/lib/Target/X86/X86InstrFMA.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86InstrFMA.td?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86InstrFMA.td (original)
+++ llvm/branches/R600/lib/Target/X86/X86InstrFMA.td Fri Sep 21 12:29:50 2012
@@ -42,7 +42,7 @@
                    !strconcat(OpcodeStr,
                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
                    [(set VR256:$dst, (OpVT256 (Op VR256:$src2, VR256:$src1,
-                                               VR256:$src3)))]>;
+                                               VR256:$src3)))]>, VEX_L;
 
   let mayLoad = 1 in
   def mY    : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
@@ -51,7 +51,7 @@
                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
                    [(set VR256:$dst,
                      (OpVT256 (Op VR256:$src2, VR256:$src1,
-                               (MemFrag256 addr:$src3))))]>;
+                               (MemFrag256 addr:$src3))))]>, VEX_L;
 }
 } // Constraints = "$src1 = $dst"
 
@@ -280,19 +280,19 @@
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
            [(set VR256:$dst,
              (OpVT256 (OpNode VR256:$src1, VR256:$src2, VR256:$src3)))]>,
-           VEX_W, MemOp4;
+           VEX_W, MemOp4, VEX_L;
   def rmY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
            (ins VR256:$src1, VR256:$src2, f256mem:$src3),
            !strconcat(OpcodeStr,
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
            [(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2,
-                              (ld_frag256 addr:$src3)))]>, VEX_W, MemOp4;
+                              (ld_frag256 addr:$src3)))]>, VEX_W, MemOp4, VEX_L;
   def mrY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
            (ins VR256:$src1, f256mem:$src2, VR256:$src3),
            !strconcat(OpcodeStr,
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-           [(set VR256:$dst,
-             (OpNode VR256:$src1, (ld_frag256 addr:$src2), VR256:$src3))]>;
+           [(set VR256:$dst, (OpNode VR256:$src1,
+                              (ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L;
 // For disassembler
 let isCodeGenOnly = 1 in {
   def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
@@ -302,7 +302,8 @@
   def rrY_REV : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
                 (ins VR256:$src1, VR256:$src2, VR256:$src3),
                 !strconcat(OpcodeStr,
-                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;
+                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
+                VEX_L;
 } // isCodeGenOnly = 1
 }
 

Modified: llvm/branches/R600/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86InstrInfo.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/branches/R600/lib/Target/X86/X86InstrInfo.cpp Fri Sep 21 12:29:50 2012
@@ -2266,7 +2266,7 @@
 }
 
 /// getCondFromCmovOpc - return condition code of a CMov opcode.
-static X86::CondCode getCondFromCMovOpc(unsigned Opc) {
+X86::CondCode X86::getCondFromCMovOpc(unsigned Opc) {
   switch (Opc) {
   default: return X86::COND_INVALID;
   case X86::CMOVA16rm:  case X86::CMOVA16rr:  case X86::CMOVA32rm:
@@ -3314,7 +3314,7 @@
         if (OldCC != X86::COND_INVALID)
           OpcIsSET = true;
         else
-          OldCC = getCondFromCMovOpc(Instr.getOpcode());
+          OldCC = X86::getCondFromCMovOpc(Instr.getOpcode());
       }
       if (OldCC == X86::COND_INVALID) return false;
     }

Modified: llvm/branches/R600/lib/Target/X86/X86InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86InstrInfo.h?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86InstrInfo.h (original)
+++ llvm/branches/R600/lib/Target/X86/X86InstrInfo.h Fri Sep 21 12:29:50 2012
@@ -61,6 +61,9 @@
   // Turn condition code into conditional branch opcode.
   unsigned GetCondBranchFromCond(CondCode CC);
 
+  // Turn CMov opcode into condition code.
+  CondCode getCondFromCMovOpc(unsigned Opc);
+
   /// GetOppositeBranchCondition - Return the inverse of the specified cond,
   /// e.g. turning COND_E to COND_NE.
   CondCode GetOppositeBranchCondition(X86::CondCode CC);

Modified: llvm/branches/R600/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86InstrInfo.td?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/branches/R600/lib/Target/X86/X86InstrInfo.td Fri Sep 21 12:29:50 2012
@@ -1266,28 +1266,46 @@
 // Atomic support
 //
 
-
 // Atomic swap. These are just normal xchg instructions. But since a memory
 // operand is referenced, the atomicity is ensured.
-let Constraints = "$val = $dst" in {
-def XCHG8rm  : I<0x86, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
-               "xchg{b}\t{$val, $ptr|$ptr, $val}",
-               [(set GR8:$dst, (atomic_swap_8 addr:$ptr, GR8:$val))],
-               IIC_XCHG_MEM>;
-def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst),(ins GR16:$val, i16mem:$ptr),
-               "xchg{w}\t{$val, $ptr|$ptr, $val}",
-               [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))],
-               IIC_XCHG_MEM>,
-                OpSize;
-def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst),(ins GR32:$val, i32mem:$ptr),
-               "xchg{l}\t{$val, $ptr|$ptr, $val}",
-               [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))],
-               IIC_XCHG_MEM>;
-def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst),(ins GR64:$val,i64mem:$ptr),
-                  "xchg{q}\t{$val, $ptr|$ptr, $val}",
-                  [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))],
-                  IIC_XCHG_MEM>;
+multiclass ATOMIC_SWAP<bits<8> opc8, bits<8> opc, string mnemonic, string frag,
+                       InstrItinClass itin> {
+  let Constraints = "$val = $dst" in {
+    def #NAME#8rm  : I<opc8, MRMSrcMem, (outs GR8:$dst),
+                       (ins GR8:$val, i8mem:$ptr),
+                       !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
+                       [(set
+                          GR8:$dst,
+                          (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
+                       itin>;
+    def #NAME#16rm : I<opc, MRMSrcMem, (outs GR16:$dst),
+                       (ins GR16:$val, i16mem:$ptr),
+                       !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
+                       [(set
+                          GR16:$dst,
+                          (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
+                       itin>, OpSize;
+    def #NAME#32rm : I<opc, MRMSrcMem, (outs GR32:$dst),
+                       (ins GR32:$val, i32mem:$ptr),
+                       !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
+                       [(set
+                          GR32:$dst,
+                          (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
+                       itin>;
+    def #NAME#64rm : RI<opc, MRMSrcMem, (outs GR64:$dst),
+                        (ins GR64:$val, i64mem:$ptr),
+                        !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
+                        [(set
+                          GR64:$dst,
+                          (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
+                        itin>;
+  }
+}
 
+defm XCHG    : ATOMIC_SWAP<0x86, 0x87, "xchg", "atomic_swap", IIC_XCHG_MEM>;
+
+// Swap between registers.
+let Constraints = "$val = $dst" in {
 def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src),
                 "xchg{b}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>;
 def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src),
@@ -1298,6 +1316,7 @@
                   "xchg{q}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>;
 }
 
+// Swap between EAX and other registers.
 def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src),
                   "xchg{w}\t{$src, %ax|AX, $src}", [], IIC_XCHG_REG>, OpSize;
 def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src),

Modified: llvm/branches/R600/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86InstrSSE.td?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/branches/R600/lib/Target/X86/X86InstrSSE.td Fri Sep 21 12:29:50 2012
@@ -813,16 +813,16 @@
 
 defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32,
                               "movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
-                              TB, VEX;
+                              TB, VEX, VEX_L;
 defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64,
                               "movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
-                              TB, OpSize, VEX;
+                              TB, OpSize, VEX, VEX_L;
 defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
                               "movups", SSEPackedSingle, SSE_MOVU_ITINS>,
-                              TB, VEX;
+                              TB, VEX, VEX_L;
 defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
                               "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
-                              TB, OpSize, VEX;
+                              TB, OpSize, VEX, VEX_L;
 defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
                               "movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
                               TB;
@@ -855,19 +855,19 @@
 def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
                    "movaps\t{$src, $dst|$dst, $src}",
                    [(alignedstore256 (v8f32 VR256:$src), addr:$dst)],
-                   IIC_SSE_MOVA_P_MR>, VEX;
+                   IIC_SSE_MOVA_P_MR>, VEX, VEX_L;
 def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
                    "movapd\t{$src, $dst|$dst, $src}",
                    [(alignedstore256 (v4f64 VR256:$src), addr:$dst)],
-                   IIC_SSE_MOVA_P_MR>, VEX;
+                   IIC_SSE_MOVA_P_MR>, VEX, VEX_L;
 def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
                    "movups\t{$src, $dst|$dst, $src}",
                    [(store (v8f32 VR256:$src), addr:$dst)],
-                   IIC_SSE_MOVU_P_MR>, VEX;
+                   IIC_SSE_MOVU_P_MR>, VEX, VEX_L;
 def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
                    "movupd\t{$src, $dst|$dst, $src}",
                    [(store (v4f64 VR256:$src), addr:$dst)],
-                   IIC_SSE_MOVU_P_MR>, VEX;
+                   IIC_SSE_MOVU_P_MR>, VEX, VEX_L;
 
 // For disassembler
 let isCodeGenOnly = 1 in {
@@ -890,19 +890,19 @@
   def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst),
                             (ins VR256:$src),
                             "movaps\t{$src, $dst|$dst, $src}", [],
-                            IIC_SSE_MOVA_P_RR>, VEX;
+                            IIC_SSE_MOVA_P_RR>, VEX, VEX_L;
   def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst),
                             (ins VR256:$src),
                             "movapd\t{$src, $dst|$dst, $src}", [],
-                            IIC_SSE_MOVA_P_RR>, VEX;
+                            IIC_SSE_MOVA_P_RR>, VEX, VEX_L;
   def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst),
                             (ins VR256:$src),
                             "movups\t{$src, $dst|$dst, $src}", [],
-                            IIC_SSE_MOVU_P_RR>, VEX;
+                            IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
   def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst),
                             (ins VR256:$src),
                             "movupd\t{$src, $dst|$dst, $src}", [],
-                            IIC_SSE_MOVU_P_RR>, VEX;
+                            IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
 }
 
 let Predicates = [HasAVX] in {
@@ -1659,7 +1659,7 @@
 defm VCVTDQ2PSY  : sse12_cvt_p<0x5B, VR256, VR256, i256mem,
                                "vcvtdq2ps\t{$src, $dst|$dst, $src}",
                                SSEPackedSingle, SSE_CVT_PS>,
-                               TB, VEX, Requires<[HasAVX]>;
+                               TB, VEX, VEX_L, Requires<[HasAVX]>;
 
 defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
                             "cvtdq2ps\t{$src, $dst|$dst, $src}",
@@ -1814,12 +1814,12 @@
                         "cvtps2dq\t{$src, $dst|$dst, $src}",
                         [(set VR256:$dst,
                           (int_x86_avx_cvt_ps2dq_256 VR256:$src))],
-                        IIC_SSE_CVT_PS_RR>, VEX;
+                        IIC_SSE_CVT_PS_RR>, VEX, VEX_L;
 def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                         "cvtps2dq\t{$src, $dst|$dst, $src}",
                         [(set VR256:$dst,
                           (int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))],
-                        IIC_SSE_CVT_PS_RM>, VEX;
+                        IIC_SSE_CVT_PS_RM>, VEX, VEX_L;
 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                      "cvtps2dq\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
@@ -1853,7 +1853,7 @@
 def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
                        "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
-                         (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX;
+                         (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L;
 def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
                        "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
@@ -1889,12 +1889,12 @@
                           "cvttps2dq\t{$src, $dst|$dst, $src}",
                           [(set VR256:$dst,
                             (int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
-                          IIC_SSE_CVT_PS_RR>, VEX;
+                          IIC_SSE_CVT_PS_RR>, VEX, VEX_L;
 def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                           "cvttps2dq\t{$src, $dst|$dst, $src}",
                           [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
                                              (memopv8f32 addr:$src)))],
-                          IIC_SSE_CVT_PS_RM>, VEX;
+                          IIC_SSE_CVT_PS_RM>, VEX, VEX_L;
 
 def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvttps2dq\t{$src, $dst|$dst, $src}",
@@ -1974,7 +1974,7 @@
                          "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst,
                            (int_x86_avx_cvtt_pd2dq_256 VR256:$src))],
-                         IIC_SSE_CVT_PD_RR>, VEX;
+                         IIC_SSE_CVT_PD_RR>, VEX, VEX_L;
 def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
                          "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst,
@@ -2015,12 +2015,12 @@
                      "vcvtps2pd\t{$src, $dst|$dst, $src}",
                      [(set VR256:$dst,
                        (int_x86_avx_cvt_ps2_pd_256 VR128:$src))],
-                     IIC_SSE_CVT_PD_RR>, TB, VEX;
+                     IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L;
 def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
                      "vcvtps2pd\t{$src, $dst|$dst, $src}",
                      [(set VR256:$dst,
                        (int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))],
-                     IIC_SSE_CVT_PD_RM>, TB, VEX;
+                     IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L;
 }
 
 let Predicates = [UseSSE2] in {
@@ -2048,11 +2048,11 @@
                      "vcvtdq2pd\t{$src, $dst|$dst, $src}",
                      [(set VR256:$dst,
                        (int_x86_avx_cvtdq2_pd_256
-                        (bitconvert (memopv2i64 addr:$src))))]>, VEX;
+                        (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L;
 def VCVTDQ2PDYrr  : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
                      "vcvtdq2pd\t{$src, $dst|$dst, $src}",
                      [(set VR256:$dst,
-                       (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX;
+                       (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L;
 }
 
 let neverHasSideEffects = 1, mayLoad = 1 in
@@ -2095,7 +2095,7 @@
                         "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
                           (int_x86_avx_cvt_pd2_ps_256 VR256:$src))],
-                        IIC_SSE_CVT_PD_RR>, VEX;
+                        IIC_SSE_CVT_PD_RR>, VEX, VEX_L;
 def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
                         "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
@@ -2329,11 +2329,11 @@
 defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_ps_256,
                "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
-               SSEPackedSingle>, TB, VEX_4V;
+               SSEPackedSingle>, TB, VEX_4V, VEX_L;
 defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_pd_256,
                "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
-               SSEPackedDouble>, TB, OpSize, VEX_4V;
+               SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L;
 let Constraints = "$src1 = $dst" in {
   defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps,
                  "cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
@@ -2403,13 +2403,13 @@
            memopv4f32, SSEPackedSingle>, TB, VEX_4V;
 defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
            "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
-           memopv8f32, SSEPackedSingle>, TB, VEX_4V;
+           memopv8f32, SSEPackedSingle>, TB, VEX_4V, VEX_L;
 defm VSHUFPD  : sse12_shuffle<VR128, f128mem, v2f64,
            "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
            memopv2f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
 defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
            "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
-           memopv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
+           memopv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L;
 
 let Constraints = "$src1 = $dst" in {
   defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
@@ -2503,16 +2503,16 @@
 
 defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, memopv8f32,
       VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                     SSEPackedSingle>, TB, VEX_4V;
+                     SSEPackedSingle>, TB, VEX_4V, VEX_L;
 defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, memopv4f64,
       VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                     SSEPackedDouble>, TB, OpSize, VEX_4V;
+                     SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L;
 defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, memopv8f32,
       VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                     SSEPackedSingle>, TB, VEX_4V;
+                     SSEPackedSingle>, TB, VEX_4V, VEX_L;
 defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, memopv4f64,
       VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                     SSEPackedDouble>, TB, OpSize, VEX_4V;
+                     SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L;
 
 let Constraints = "$src1 = $dst" in {
   defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
@@ -2589,10 +2589,11 @@
                                         "movmskpd", SSEPackedDouble>, TB,
                                         OpSize, VEX;
   defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
-                                        "movmskps", SSEPackedSingle>, TB, VEX;
+                                        "movmskps", SSEPackedSingle>, TB,
+                                        VEX, VEX_L;
   defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
                                         "movmskpd", SSEPackedDouble>, TB,
-                                        OpSize, VEX;
+                                        OpSize, VEX, VEX_L;
 
   def : Pat<(i32 (X86fgetsign FR32:$src)),
             (VMOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>;
@@ -2613,11 +2614,11 @@
              OpSize, VEX;
   def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
              "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
-             SSEPackedSingle>, TB, VEX;
+             SSEPackedSingle>, TB, VEX, VEX_L;
   def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
              "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
              SSEPackedDouble>, TB,
-             OpSize, VEX;
+             OpSize, VEX, VEX_L;
 }
 
 defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
@@ -2695,13 +2696,13 @@
 
 let Predicates = [HasAVX2] in {
 defm VPANDY : PDI_binop_rm<0xDB, "vpand", and, v4i64, VR256, memopv4i64,
-                           i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
+                           i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L;
 defm VPORY  : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64,
-                           i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
+                           i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L;
 defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64,
-                           i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
+                           i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L;
 defm VPANDNY : PDI_binop_rm<0xDF, "vpandn", X86andnp, v4i64, VR256, memopv4i64,
-                            i256mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V;
+                            i256mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V, VEX_L;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2787,7 +2788,7 @@
           !strconcat(OpcodeStr, "ps"), f256mem,
           [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
           [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
-                                    (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V;
+                             (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V, VEX_L;
 
     defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
           !strconcat(OpcodeStr, "pd"), f256mem,
@@ -2795,7 +2796,7 @@
                                     (bc_v4i64 (v4f64 VR256:$src2))))],
           [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
                                     (memopv4i64 addr:$src2)))], 0>,
-                                    TB, OpSize, VEX_4V;
+                                    TB, OpSize, VEX_4V, VEX_L;
 }
 
 // AVX 256-bit packed logical ops forms
@@ -2854,10 +2855,10 @@
                                     SizeItins itins> {
   defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256,
                 v8f32, f256mem, memopv8f32, SSEPackedSingle, itins.s, 0>,
-                TB;
+                TB, VEX_L;
   defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256,
                 v4f64, f256mem, memopv4f64, SSEPackedDouble, itins.d, 0>,
-                TB, OpSize;
+                TB, OpSize, VEX_L;
 }
 
 multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
@@ -2889,11 +2890,11 @@
                                         SizeItins itins> {
   defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
      !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32,
-      SSEPackedSingle, itins.s, 0>, TB;
+      SSEPackedSingle, itins.s, 0>, TB, VEX_L;
 
   defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
      !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64,
-      SSEPackedDouble, itins.d, 0>, TB, OpSize;
+      SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_L;
 }
 
 // Binary Arithmetic instructions
@@ -3063,11 +3064,11 @@
   def PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
               !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
               [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))],
-              itins.rr>;
+              itins.rr>, VEX_L;
   def PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
                 [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))],
-                itins.rm>;
+                itins.rm>, VEX_L;
 }
 
 /// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
@@ -3089,11 +3090,11 @@
   def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                     !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
                     [(set VR256:$dst, (V4F32Int VR256:$src))],
-                    itins.rr>;
+                    itins.rr>, VEX_L;
   def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                     !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
                     [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))],
-                    itins.rm>;
+                    itins.rm>, VEX_L;
 }
 
 /// sse2_fp_unop_s - SSE2 unops in scalar form.
@@ -3149,11 +3150,11 @@
   def PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
               !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
               [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))],
-              itins.rr>;
+              itins.rr>, VEX_L;
   def PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
                 [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))],
-                itins.rm>;
+                itins.rm>, VEX_L;
 }
 
 /// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms.
@@ -3175,11 +3176,11 @@
   def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                     !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
                     [(set VR256:$dst, (V2F64Int VR256:$src))],
-                    itins.rr>;
+                    itins.rr>, VEX_L;
   def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                     !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
                     [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))],
-                    itins.rm>;
+                    itins.rm>, VEX_L;
 }
 
 let Predicates = [HasAVX] in {
@@ -3331,20 +3332,20 @@
                        "movntps\t{$src, $dst|$dst, $src}",
                        [(alignednontemporalstore (v8f32 VR256:$src),
                                                  addr:$dst)],
-                                                 IIC_SSE_MOVNT>, VEX;
+                                                 IIC_SSE_MOVNT>, VEX, VEX_L;
   def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
                        (ins f256mem:$dst, VR256:$src),
                        "movntpd\t{$src, $dst|$dst, $src}",
                        [(alignednontemporalstore (v4f64 VR256:$src),
                                                  addr:$dst)],
-                                                 IIC_SSE_MOVNT>, VEX;
+                                                 IIC_SSE_MOVNT>, VEX, VEX_L;
   let ExeDomain = SSEPackedInt in
   def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
                       (ins f256mem:$dst, VR256:$src),
                       "movntdq\t{$src, $dst|$dst, $src}",
                       [(alignednontemporalstore (v4i64 VR256:$src),
                                                 addr:$dst)],
-                                                IIC_SSE_MOVNT>, VEX;
+                                                IIC_SSE_MOVNT>, VEX, VEX_L;
 }
 
 let AddedComplexity = 400 in { // Prefer non-temporal versions
@@ -3453,14 +3454,14 @@
                     VEX;
 def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                     "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
-                    VEX;
+                    VEX, VEX_L;
 }
 def VMOVDQUrr  : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
                     VEX;
 def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                     "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
-                    VEX;
+                    VEX, VEX_L;
 
 // For Disassembler
 let isCodeGenOnly = 1 in {
@@ -3470,16 +3471,14 @@
                         VEX;
 def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
                         "movdqa\t{$src, $dst|$dst, $src}", [],
-                        IIC_SSE_MOVA_P_RR>,
-                        VEX;
+                        IIC_SSE_MOVA_P_RR>, VEX, VEX_L;
 def VMOVDQUrr_REV  : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
                         "movdqu\t{$src, $dst|$dst, $src}", [],
                         IIC_SSE_MOVU_P_RR>,
                         VEX;
 def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
                         "movdqu\t{$src, $dst|$dst, $src}", [],
-                        IIC_SSE_MOVU_P_RR>,
-                        VEX;
+                        IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
 }
 
 let canFoldAsLoad = 1, mayLoad = 1 in {
@@ -3488,14 +3487,14 @@
                    VEX;
 def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
                    "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
-                   VEX;
+                   VEX, VEX_L;
 let Predicates = [HasAVX] in {
   def VMOVDQUrm  : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                     "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>,
                     XS, VEX;
   def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
                     "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>,
-                    XS, VEX;
+                    XS, VEX, VEX_L;
 }
 }
 
@@ -3507,14 +3506,14 @@
 def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
                      (ins i256mem:$dst, VR256:$src),
                      "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
-                     VEX;
+                     VEX, VEX_L;
 let Predicates = [HasAVX] in {
 def VMOVDQUmr  : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
                   "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>,
                   XS, VEX;
 def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
                   "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>,
-                  XS, VEX;
+                  XS, VEX, VEX_L;
 }
 }
 
@@ -3750,82 +3749,82 @@
 
 let Predicates = [HasAVX2] in {
 defm VPADDBY  : PDI_binop_rm<0xFC, "vpaddb", add, v32i8, VR256, memopv4i64,
-                             i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+                             i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
 defm VPADDWY  : PDI_binop_rm<0xFD, "vpaddw", add, v16i16, VR256, memopv4i64,
-                             i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+                             i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
 defm VPADDDY  : PDI_binop_rm<0xFE, "vpaddd", add, v8i32, VR256, memopv4i64,
-                             i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+                             i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
 defm VPADDQY  : PDI_binop_rm<0xD4, "vpaddq", add, v4i64, VR256, memopv4i64,
-                             i256mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V;
+                             i256mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V, VEX_L;
 defm VPMULLWY : PDI_binop_rm<0xD5, "vpmullw", mul, v16i16, VR256, memopv4i64,
-                             i256mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
+                             i256mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
 defm VPSUBBY  : PDI_binop_rm<0xF8, "vpsubb", sub, v32i8, VR256, memopv4i64,
-                             i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+                             i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
 defm VPSUBWY  : PDI_binop_rm<0xF9, "vpsubw", sub, v16i16,VR256, memopv4i64,
-                             i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+                             i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
 defm VPSUBDY  : PDI_binop_rm<0xFA, "vpsubd", sub, v8i32, VR256, memopv4i64,
-                             i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+                             i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
 defm VPSUBQY  : PDI_binop_rm<0xFB, "vpsubq", sub, v4i64, VR256, memopv4i64,
-                             i256mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V;
+                             i256mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V, VEX_L;
 defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32,
                                VR256, memopv4i64, i256mem,
-                               SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
+                               SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
 
 // Intrinsic forms
 defm VPSUBSBY  : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b,
                                   VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
 defm VPSUBSWY  : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_avx2_psubs_w,
                                   VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
 defm VPSUBUSBY : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_avx2_psubus_b,
                                   VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
 defm VPSUBUSWY : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_avx2_psubus_w,
                                   VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
 defm VPADDSBY  : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_avx2_padds_b,
                                   VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
 defm VPADDSWY  : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_avx2_padds_w,
                                   VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
 defm VPADDUSBY : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_avx2_paddus_b,
                                   VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
 defm VPADDUSWY : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_avx2_paddus_w,
                                   VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
 defm VPMULHUWY : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_avx2_pmulhu_w,
                                   VR256, memopv4i64, i256mem,
-                                  SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
+                                  SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
 defm VPMULHWY  : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_avx2_pmulh_w,
                                   VR256, memopv4i64, i256mem,
-                                  SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
+                                  SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
 defm VPMADDWDY : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_avx2_pmadd_wd,
                                   VR256, memopv4i64, i256mem,
-                                  SSE_PMADD, 1, 0>, VEX_4V;
+                                  SSE_PMADD, 1, 0>, VEX_4V, VEX_L;
 defm VPAVGBY   : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_avx2_pavg_b,
                                   VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
 defm VPAVGWY   : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_avx2_pavg_w,
                                   VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
 defm VPMINUBY  : PDI_binop_rm_int<0xDA, "vpminub", int_x86_avx2_pminu_b,
                                   VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
 defm VPMINSWY  : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_avx2_pmins_w,
                                   VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
 defm VPMAXUBY  : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_avx2_pmaxu_b,
                                   VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
 defm VPMAXSWY  : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_avx2_pmaxs_w,
                                   VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
 defm VPSADBWY  : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_avx2_psad_bw,
                                   VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
 }
 
 let Constraints = "$src1 = $dst" in {
@@ -3961,30 +3960,30 @@
 let Predicates = [HasAVX2] in {
 defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
                              VR256, v16i16, v8i16, bc_v8i16,
-                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 defm VPSLLDY : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
                              VR256, v8i32, v4i32, bc_v4i32,
-                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 defm VPSLLQY : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
                              VR256, v4i64, v2i64, bc_v2i64,
-                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 
 defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
                              VR256, v16i16, v8i16, bc_v8i16,
-                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 defm VPSRLDY : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
                              VR256, v8i32, v4i32, bc_v4i32,
-                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 defm VPSRLQY : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
                              VR256, v4i64, v2i64, bc_v2i64,
-                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 
 defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
                              VR256, v16i16, v8i16, bc_v8i16,
-                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
                              VR256, v8i32, v4i32, bc_v4i32,
-                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 
 let ExeDomain = SSEPackedInt in {
   // 256-bit logical shifts.
@@ -3993,13 +3992,13 @@
                     "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR256:$dst,
                       (int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2))]>,
-                    VEX_4V;
+                    VEX_4V, VEX_L;
   def VPSRLDQYri : PDIi8<0x73, MRM3r,
                     (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
                     "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR256:$dst,
                       (int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2))]>,
-                    VEX_4V;
+                    VEX_4V, VEX_L;
   // PSRADQYri doesn't exist in SSE[1-3].
 }
 } // Predicates = [HasAVX2]
@@ -4113,22 +4112,22 @@
 let Predicates = [HasAVX2] in {
   defm VPCMPEQBY : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v32i8,
                                 VR256, memopv4i64, i256mem,
-                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
   defm VPCMPEQWY : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v16i16,
                                 VR256, memopv4i64, i256mem,
-                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
   defm VPCMPEQDY : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v8i32,
                                 VR256, memopv4i64, i256mem,
-                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
   defm VPCMPGTBY : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v32i8,
                                 VR256, memopv4i64, i256mem,
-                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
   defm VPCMPGTWY : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v16i16,
                                 VR256, memopv4i64, i256mem,
-                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
   defm VPCMPGTDY : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v8i32,
                                 VR256, memopv4i64, i256mem,
-                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
 }
 
 let Constraints = "$src1 = $dst" in {
@@ -4171,13 +4170,13 @@
 let Predicates = [HasAVX2] in {
 defm VPACKSSWBY : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_avx2_packsswb,
                                    VR256, memopv4i64, i256mem,
-                                   SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+                                   SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
 defm VPACKSSDWY : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_avx2_packssdw,
                                    VR256, memopv4i64, i256mem,
-                                   SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+                                   SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
 defm VPACKUSWBY : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_avx2_packuswb,
                                    VR256, memopv4i64, i256mem,
-                                   SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+                                   SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
 }
 
 let Constraints = "$src1 = $dst" in {
@@ -4247,9 +4246,12 @@
 }
 
 let Predicates = [HasAVX2] in {
-  defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, X86PShufd>, TB, OpSize, VEX;
-  defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, X86PShufhw>, XS, VEX;
-  defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>, XD, VEX;
+  defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, X86PShufd>,
+                                TB, OpSize, VEX,VEX_L;
+  defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, X86PShufhw>,
+                                  XS, VEX, VEX_L;
+  defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>,
+                                  XD, VEX, VEX_L;
 }
 
 let Predicates = [UseSSE2] in {
@@ -4328,22 +4330,22 @@
 
 let Predicates = [HasAVX2] in {
   defm VPUNPCKLBW  : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl,
-                                   bc_v32i8>, VEX_4V;
+                                   bc_v32i8>, VEX_4V, VEX_L;
   defm VPUNPCKLWD  : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl,
-                                   bc_v16i16>, VEX_4V;
+                                   bc_v16i16>, VEX_4V, VEX_L;
   defm VPUNPCKLDQ  : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl,
-                                   bc_v8i32>, VEX_4V;
+                                   bc_v8i32>, VEX_4V, VEX_L;
   defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl,
-                                   bc_v4i64>, VEX_4V;
+                                   bc_v4i64>, VEX_4V, VEX_L;
 
   defm VPUNPCKHBW  : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh,
-                                   bc_v32i8>, VEX_4V;
+                                   bc_v32i8>, VEX_4V, VEX_L;
   defm VPUNPCKHWD  : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh,
-                                   bc_v16i16>, VEX_4V;
+                                   bc_v16i16>, VEX_4V, VEX_L;
   defm VPUNPCKHDQ  : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh,
-                                   bc_v8i32>, VEX_4V;
+                                   bc_v8i32>, VEX_4V, VEX_L;
   defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh,
-                                   bc_v4i64>, VEX_4V;
+                                   bc_v4i64>, VEX_4V, VEX_L;
 }
 
 let Constraints = "$src1 = $dst" in {
@@ -4435,9 +4437,9 @@
 let Predicates = [HasAVX2] in {
 def VPMOVMSKBYrr  : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
            "pmovmskb\t{$src, $dst|$dst, $src}",
-           [(set GR32:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, VEX;
+           [(set GR32:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, VEX, VEX_L;
 def VPMOVMSKBYr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
-           "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX;
+           "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
 }
 
 def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
@@ -4900,9 +4902,9 @@
   defm VMOVSLDUP  : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
                                        v4f32, VR128, memopv4f32, f128mem>, VEX;
   defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
-                                       v8f32, VR256, memopv8f32, f256mem>, VEX;
+                                 v8f32, VR256, memopv8f32, f256mem>, VEX, VEX_L;
   defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
-                                       v8f32, VR256, memopv8f32, f256mem>, VEX;
+                                 v8f32, VR256, memopv8f32, f256mem>, VEX, VEX_L;
 }
 defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128,
                                    memopv4f32, f128mem>;
@@ -4970,7 +4972,7 @@
 
 let Predicates = [HasAVX] in {
   defm VMOVDDUP  : sse3_replicate_dfp<"vmovddup">, VEX;
-  defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
+  defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX, VEX_L;
 }
 
 defm MOVDDUP : sse3_replicate_dfp<"movddup">;
@@ -5019,7 +5021,8 @@
                    [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
   def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
                    "vlddqu\t{$src, $dst|$dst, $src}",
-                   [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, VEX;
+                   [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>,
+                   VEX, VEX_L;
 }
 def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                    "lddqu\t{$src, $dst|$dst, $src}",
@@ -5052,13 +5055,13 @@
     defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
                                  f128mem, SSE_ALU_F32P, 0>, TB, XD, VEX_4V;
     defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
-                                 f256mem, SSE_ALU_F32P, 0>, TB, XD, VEX_4V;
+                               f256mem, SSE_ALU_F32P, 0>, TB, XD, VEX_4V, VEX_L;
   }
   let ExeDomain = SSEPackedDouble in {
     defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
                                  f128mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V;
     defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
-                                 f256mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V;
+                           f256mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V, VEX_L;
   }
 }
 let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {
@@ -5113,9 +5116,9 @@
     defm VHSUBPS  : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
                             X86fhsub, 0>, VEX_4V;
     defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
-                            X86fhadd, 0>, VEX_4V;
+                            X86fhadd, 0>, VEX_4V, VEX_L;
     defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
-                            X86fhsub, 0>, VEX_4V;
+                            X86fhsub, 0>, VEX_4V, VEX_L;
   }
   let ExeDomain = SSEPackedDouble in {
     defm VHADDPD  : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
@@ -5123,9 +5126,9 @@
     defm VHSUBPD  : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
                             X86fhsub, 0>, VEX_4V;
     defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
-                            X86fhadd, 0>, VEX_4V;
+                            X86fhadd, 0>, VEX_4V, VEX_L;
     defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
-                            X86fhsub, 0>, VEX_4V;
+                            X86fhsub, 0>, VEX_4V, VEX_L;
   }
 }
 
@@ -5191,11 +5194,11 @@
 
 let Predicates = [HasAVX2] in {
   defm VPABSB  : SS3I_unop_rm_int_y<0x1C, "vpabsb",
-                                    int_x86_avx2_pabs_b>, VEX;
+                                    int_x86_avx2_pabs_b>, VEX, VEX_L;
   defm VPABSW  : SS3I_unop_rm_int_y<0x1D, "vpabsw",
-                                    int_x86_avx2_pabs_w>, VEX;
+                                    int_x86_avx2_pabs_w>, VEX, VEX_L;
   defm VPABSD  : SS3I_unop_rm_int_y<0x1E, "vpabsd",
-                                    int_x86_avx2_pabs_d>, VEX;
+                                    int_x86_avx2_pabs_d>, VEX, VEX_L;
 }
 
 defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb",
@@ -5334,37 +5337,37 @@
 let isCommutable = 0 in {
   defm VPHADDWY   : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256,
                                   memopv4i64, i256mem,
-                                  SSE_PHADDSUBW, 0>, VEX_4V;
+                                  SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
   defm VPHADDDY   : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256,
                                   memopv4i64, i256mem,
-                                  SSE_PHADDSUBW, 0>, VEX_4V;
+                                  SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
   defm VPHSUBWY   : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256,
                                   memopv4i64, i256mem,
-                                  SSE_PHADDSUBW, 0>, VEX_4V;
+                                  SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
   defm VPHSUBDY   : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256,
                                   memopv4i64, i256mem,
-                                  SSE_PHADDSUBW, 0>, VEX_4V;
+                                  SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
   defm VPSIGNBY   : SS3I_binop_rm<0x08, "vpsignb", X86psign, v32i8, VR256,
                                   memopv4i64, i256mem,
-                                  SSE_PHADDSUBW, 0>, VEX_4V;
+                                  SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
   defm VPSIGNWY   : SS3I_binop_rm<0x09, "vpsignw", X86psign, v16i16, VR256,
                                   memopv4i64, i256mem,
-                                  SSE_PHADDSUBW, 0>, VEX_4V;
+                                  SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
   defm VPSIGNDY   : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v8i32, VR256,
                                   memopv4i64, i256mem,
-                                  SSE_PHADDSUBW, 0>, VEX_4V;
+                                  SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
   defm VPSHUFBY   : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256,
                                   memopv4i64, i256mem,
-                                  SSE_PHADDSUBW, 0>, VEX_4V;
+                                  SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
   defm VPHADDSW   : SS3I_binop_rm_int_y<0x03, "vphaddsw",
-                                        int_x86_avx2_phadd_sw>, VEX_4V;
+                                        int_x86_avx2_phadd_sw>, VEX_4V, VEX_L;
   defm VPHSUBSW   : SS3I_binop_rm_int_y<0x07, "vphsubsw",
-                                        int_x86_avx2_phsub_sw>, VEX_4V;
+                                        int_x86_avx2_phsub_sw>, VEX_4V, VEX_L;
   defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw",
-                                        int_x86_avx2_pmadd_ub_sw>, VEX_4V;
+                                       int_x86_avx2_pmadd_ub_sw>, VEX_4V, VEX_L;
 }
 defm VPMULHRSW    : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw",
-                                        int_x86_avx2_pmul_hr_sw>, VEX_4V;
+                                        int_x86_avx2_pmul_hr_sw>, VEX_4V, VEX_L;
 }
 
 // None of these have i8 immediate fields.
@@ -5443,7 +5446,7 @@
 let Predicates = [HasAVX] in
   defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
 let Predicates = [HasAVX2] in
-  defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V;
+  defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V, VEX_L;
 let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
   defm PALIGN : ssse3_palign<"palignr">;
 
@@ -5550,17 +5553,17 @@
 
 let Predicates = [HasAVX2] in {
 defm VPMOVSXBW : SS41I_binop_rm_int16_y<0x20, "vpmovsxbw",
-                                        int_x86_avx2_pmovsxbw>, VEX;
+                                        int_x86_avx2_pmovsxbw>, VEX, VEX_L;
 defm VPMOVSXWD : SS41I_binop_rm_int16_y<0x23, "vpmovsxwd",
-                                        int_x86_avx2_pmovsxwd>, VEX;
+                                        int_x86_avx2_pmovsxwd>, VEX, VEX_L;
 defm VPMOVSXDQ : SS41I_binop_rm_int16_y<0x25, "vpmovsxdq",
-                                        int_x86_avx2_pmovsxdq>, VEX;
+                                        int_x86_avx2_pmovsxdq>, VEX, VEX_L;
 defm VPMOVZXBW : SS41I_binop_rm_int16_y<0x30, "vpmovzxbw",
-                                        int_x86_avx2_pmovzxbw>, VEX;
+                                        int_x86_avx2_pmovzxbw>, VEX, VEX_L;
 defm VPMOVZXWD : SS41I_binop_rm_int16_y<0x33, "vpmovzxwd",
-                                        int_x86_avx2_pmovzxwd>, VEX;
+                                        int_x86_avx2_pmovzxwd>, VEX, VEX_L;
 defm VPMOVZXDQ : SS41I_binop_rm_int16_y<0x35, "vpmovzxdq",
-                                        int_x86_avx2_pmovzxdq>, VEX;
+                                        int_x86_avx2_pmovzxdq>, VEX, VEX_L;
 }
 
 defm PMOVSXBW   : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
@@ -5721,13 +5724,13 @@
 
 let Predicates = [HasAVX2] in {
 defm VPMOVSXBD : SS41I_binop_rm_int8_y<0x21, "vpmovsxbd",
-                                       int_x86_avx2_pmovsxbd>, VEX;
+                                       int_x86_avx2_pmovsxbd>, VEX, VEX_L;
 defm VPMOVSXWQ : SS41I_binop_rm_int8_y<0x24, "vpmovsxwq",
-                                       int_x86_avx2_pmovsxwq>, VEX;
+                                       int_x86_avx2_pmovsxwq>, VEX, VEX_L;
 defm VPMOVZXBD : SS41I_binop_rm_int8_y<0x31, "vpmovzxbd",
-                                       int_x86_avx2_pmovzxbd>, VEX;
+                                       int_x86_avx2_pmovzxbd>, VEX, VEX_L;
 defm VPMOVZXWQ : SS41I_binop_rm_int8_y<0x34, "vpmovzxwq",
-                                       int_x86_avx2_pmovzxwq>, VEX;
+                                       int_x86_avx2_pmovzxwq>, VEX, VEX_L;
 }
 
 defm PMOVSXBD   : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
@@ -5796,9 +5799,9 @@
 }
 let Predicates = [HasAVX2] in {
 defm VPMOVSXBQ : SS41I_binop_rm_int4_y<0x22, "vpmovsxbq",
-                                       int_x86_avx2_pmovsxbq>, VEX;
+                                       int_x86_avx2_pmovsxbq>, VEX, VEX_L;
 defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq",
-                                       int_x86_avx2_pmovzxbq>, VEX;
+                                       int_x86_avx2_pmovzxbq>, VEX, VEX_L;
 }
 defm PMOVSXBQ   : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
 defm PMOVZXBQ   : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
@@ -6209,7 +6212,7 @@
   defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256,
                                   memopv8f32, memopv4f64,
                                   int_x86_avx_round_ps_256,
-                                  int_x86_avx_round_pd_256>, VEX;
+                                  int_x86_avx_round_pd_256>, VEX, VEX_L;
   defm VROUND  : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
                                   int_x86_sse41_round_ss,
                                   int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
@@ -6299,11 +6302,11 @@
 def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
                 "vptest\t{$src2, $src1|$src1, $src2}",
                 [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
-                OpSize, VEX;
+                OpSize, VEX, VEX_L;
 def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
                 "vptest\t{$src2, $src1|$src1, $src2}",
                 [(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>,
-                OpSize, VEX;
+                OpSize, VEX, VEX_L;
 }
 
 let Defs = [EFLAGS] in {
@@ -6332,11 +6335,13 @@
 let Defs = [EFLAGS], Predicates = [HasAVX] in {
 let ExeDomain = SSEPackedSingle in {
 defm VTESTPS  : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
-defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
+defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>,
+                            VEX_L;
 }
 let ExeDomain = SSEPackedDouble in {
 defm VTESTPD  : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
-defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
+defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>,
+                            VEX_L;
 }
 }
 
@@ -6459,25 +6464,25 @@
 let Predicates = [HasAVX2] in {
   let isCommutable = 0 in
   defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
-                                        int_x86_avx2_packusdw>, VEX_4V;
+                                        int_x86_avx2_packusdw>, VEX_4V, VEX_L;
   defm VPMINSB   : SS41I_binop_rm_int_y<0x38, "vpminsb",
-                                        int_x86_avx2_pmins_b>, VEX_4V;
+                                        int_x86_avx2_pmins_b>, VEX_4V, VEX_L;
   defm VPMINSD   : SS41I_binop_rm_int_y<0x39, "vpminsd",
-                                        int_x86_avx2_pmins_d>, VEX_4V;
+                                        int_x86_avx2_pmins_d>, VEX_4V, VEX_L;
   defm VPMINUD   : SS41I_binop_rm_int_y<0x3B, "vpminud",
-                                        int_x86_avx2_pminu_d>, VEX_4V;
+                                        int_x86_avx2_pminu_d>, VEX_4V, VEX_L;
   defm VPMINUW   : SS41I_binop_rm_int_y<0x3A, "vpminuw",
-                                        int_x86_avx2_pminu_w>, VEX_4V;
+                                        int_x86_avx2_pminu_w>, VEX_4V, VEX_L;
   defm VPMAXSB   : SS41I_binop_rm_int_y<0x3C, "vpmaxsb",
-                                        int_x86_avx2_pmaxs_b>, VEX_4V;
+                                        int_x86_avx2_pmaxs_b>, VEX_4V, VEX_L;
   defm VPMAXSD   : SS41I_binop_rm_int_y<0x3D, "vpmaxsd",
-                                        int_x86_avx2_pmaxs_d>, VEX_4V;
+                                        int_x86_avx2_pmaxs_d>, VEX_4V, VEX_L;
   defm VPMAXUD   : SS41I_binop_rm_int_y<0x3F, "vpmaxud",
-                                        int_x86_avx2_pmaxu_d>, VEX_4V;
+                                        int_x86_avx2_pmaxu_d>, VEX_4V, VEX_L;
   defm VPMAXUW   : SS41I_binop_rm_int_y<0x3E, "vpmaxuw",
-                                        int_x86_avx2_pmaxu_w>, VEX_4V;
+                                        int_x86_avx2_pmaxu_w>, VEX_4V, VEX_L;
   defm VPMULDQ   : SS41I_binop_rm_int_y<0x28, "vpmuldq",
-                                        int_x86_avx2_pmul_dq>, VEX_4V;
+                                        int_x86_avx2_pmul_dq>, VEX_4V, VEX_L;
 }
 
 let Constraints = "$src1 = $dst" in {
@@ -6523,9 +6528,9 @@
 }
 let Predicates = [HasAVX2] in {
   defm VPMULLDY  : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
-                                  memopv4i64, i256mem, 0>, VEX_4V;
+                                  memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
   defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
-                                  memopv4i64, i256mem, 0>, VEX_4V;
+                                  memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
 }
 
 let Constraints = "$src1 = $dst" in {
@@ -6568,13 +6573,15 @@
     defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
                                         VR128, memopv4f32, f128mem, 0>, VEX_4V;
     defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
-              int_x86_avx_blend_ps_256, VR256, memopv8f32, f256mem, 0>, VEX_4V;
+                                    int_x86_avx_blend_ps_256, VR256, memopv8f32,
+                                    f256mem, 0>, VEX_4V, VEX_L;
     }
     let ExeDomain = SSEPackedDouble in {
     defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
                                         VR128, memopv2f64, f128mem, 0>, VEX_4V;
     defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
-              int_x86_avx_blend_pd_256, VR256, memopv4f64, f256mem, 0>, VEX_4V;
+                                     int_x86_avx_blend_pd_256,VR256, memopv4f64,
+                                     f256mem, 0>, VEX_4V, VEX_L;
     }
   defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
                                       VR128, memopv2i64, i128mem, 0>, VEX_4V;
@@ -6589,15 +6596,15 @@
                                    VR128, memopv2f64, f128mem, 0>, VEX_4V;
   let ExeDomain = SSEPackedSingle in
   defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
-                                   VR256, memopv8f32, i256mem, 0>, VEX_4V;
+                                  VR256, memopv8f32, i256mem, 0>, VEX_4V, VEX_L;
 }
 
 let Predicates = [HasAVX2] in {
   let isCommutable = 0 in {
   defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw,
-                                       VR256, memopv4i64, i256mem, 0>, VEX_4V;
+                                  VR256, memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
   defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
-                                       VR256, memopv4i64, i256mem, 0>, VEX_4V;
+                                  VR256, memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
   }
 }
 
@@ -6648,13 +6655,13 @@
 defm VBLENDVPD  : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, f128mem,
                                            memopv2f64, int_x86_sse41_blendvpd>;
 defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem,
-                                         memopv4f64, int_x86_avx_blendv_pd_256>;
+                                  memopv4f64, int_x86_avx_blendv_pd_256>, VEX_L;
 } // ExeDomain = SSEPackedDouble
 let ExeDomain = SSEPackedSingle in {
 defm VBLENDVPS  : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem,
                                            memopv4f32, int_x86_sse41_blendvps>;
 defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem,
-                                         memopv8f32, int_x86_avx_blendv_ps_256>;
+                                  memopv8f32, int_x86_avx_blendv_ps_256>, VEX_L;
 } // ExeDomain = SSEPackedSingle
 defm VPBLENDVB  : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
                                            memopv2i64, int_x86_sse41_pblendvb>;
@@ -6662,7 +6669,7 @@
 
 let Predicates = [HasAVX2] in {
 defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem,
-                                           memopv4i64, int_x86_avx2_pblendvb>;
+                                      memopv4i64, int_x86_avx2_pblendvb>, VEX_L;
 }
 
 let Predicates = [HasAVX] in {
@@ -6803,7 +6810,7 @@
 def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
                          "vmovntdqa\t{$src, $dst|$dst, $src}",
                          [(set VR256:$dst, (int_x86_avx2_movntdqa addr:$src))]>,
-                         OpSize, VEX;
+                         OpSize, VEX, VEX_L;
 def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                        "movntdqa\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
@@ -6839,7 +6846,7 @@
 
 let Predicates = [HasAVX2] in
   defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256,
-                                  memopv4i64, i256mem, 0>, VEX_4V;
+                                  memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
 
 let Constraints = "$src1 = $dst" in
   defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128,
@@ -7251,27 +7258,27 @@
   def VBROADCASTSSrm  : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
                                       int_x86_avx_vbroadcast_ss>;
   def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
-                                      int_x86_avx_vbroadcast_ss_256>;
+                                      int_x86_avx_vbroadcast_ss_256>, VEX_L;
 }
 let ExeDomain = SSEPackedDouble in
 def VBROADCASTSDYrm  : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
-                                    int_x86_avx_vbroadcast_sd_256>;
+                                    int_x86_avx_vbroadcast_sd_256>, VEX_L;
 def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
-                                   int_x86_avx_vbroadcastf128_pd_256>;
+                                   int_x86_avx_vbroadcastf128_pd_256>, VEX_L;
 
 let ExeDomain = SSEPackedSingle in {
   def VBROADCASTSSrr  : avx2_broadcast_reg<0x18, "vbroadcastss", VR128,
                                            int_x86_avx2_vbroadcast_ss_ps>;
   def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256,
-                                           int_x86_avx2_vbroadcast_ss_ps_256>;
+                                      int_x86_avx2_vbroadcast_ss_ps_256>, VEX_L;
 }
 let ExeDomain = SSEPackedDouble in
 def VBROADCASTSDYrr  : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
-                                          int_x86_avx2_vbroadcast_sd_pd_256>;
+                                      int_x86_avx2_vbroadcast_sd_pd_256>, VEX_L;
 
 let Predicates = [HasAVX2] in
 def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
-                                   int_x86_avx2_vbroadcasti128>;
+                                   int_x86_avx2_vbroadcasti128>, VEX_L;
 
 let Predicates = [HasAVX] in
 def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
@@ -7285,12 +7292,12 @@
 def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
           (ins VR256:$src1, VR128:$src2, i8imm:$src3),
           "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
-          []>, VEX_4V;
+          []>, VEX_4V, VEX_L;
 let mayLoad = 1 in
 def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
           (ins VR256:$src1, f128mem:$src2, i8imm:$src3),
           "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
-          []>, VEX_4V;
+          []>, VEX_4V, VEX_L;
 }
 
 let Predicates = [HasAVX] in {
@@ -7359,12 +7366,12 @@
 def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
           (ins VR256:$src1, i8imm:$src2),
           "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-          []>, VEX;
+          []>, VEX, VEX_L;
 let mayStore = 1 in
 def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
           (ins f128mem:$dst, VR256:$src1, i8imm:$src2),
           "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-          []>, VEX;
+          []>, VEX, VEX_L;
 }
 
 // AVX1 patterns
@@ -7439,7 +7446,7 @@
              (ins VR256:$src1, f256mem:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
              [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
-             VEX_4V;
+             VEX_4V, VEX_L;
   def mr  : AVX8I<opc_mr, MRMDestMem, (outs),
              (ins f128mem:$dst, VR128:$src1, VR128:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -7447,7 +7454,7 @@
   def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
              (ins f256mem:$dst, VR256:$src1, VR256:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
+             [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V, VEX_L;
 }
 
 let ExeDomain = SSEPackedSingle in
@@ -7495,13 +7502,13 @@
   defm VPERMILPS  : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
                                memopv2i64, int_x86_avx_vpermilvar_ps, v4f32>;
   defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
-                              memopv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>;
+                       memopv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>, VEX_L;
 }
 let ExeDomain = SSEPackedDouble in {
   defm VPERMILPD  : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
                                memopv2i64, int_x86_avx_vpermilvar_pd, v2f64>;
   defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
-                              memopv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>;
+                       memopv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>, VEX_L;
 }
 
 let Predicates = [HasAVX] in {
@@ -7529,12 +7536,12 @@
           (ins VR256:$src1, VR256:$src2, i8imm:$src3),
           "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
           [(set VR256:$dst, (v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2,
-                              (i8 imm:$src3))))]>, VEX_4V;
+                              (i8 imm:$src3))))]>, VEX_4V, VEX_L;
 def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
           (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
           "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
           [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv8f32 addr:$src2),
-                             (i8 imm:$src3)))]>, VEX_4V;
+                             (i8 imm:$src3)))]>, VEX_4V, VEX_L;
 }
 
 let Predicates = [HasAVX] in {
@@ -7611,9 +7618,9 @@
 
 let Predicates = [HasAVX, HasF16C] in {
   defm VCVTPH2PS  : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>;
-  defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>;
+  defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>, VEX_L;
   defm VCVTPS2PH  : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>;
-  defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>;
+  defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>, VEX_L;
 }
 
 //===----------------------------------------------------------------------===//
@@ -7645,7 +7652,7 @@
 defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128,
                                    VR128, memopv2i64, i128mem>;
 defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
-                                    VR256, memopv4i64, i256mem>;
+                                    VR256, memopv4i64, i256mem>, VEX_L;
 }
 
 //===----------------------------------------------------------------------===//
@@ -7664,11 +7671,12 @@
                     (Int128 (scalar_to_vector (ld_frag addr:$src))))]>, VEX;
   def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                   [(set VR256:$dst, (Int256 VR128:$src))]>, VEX;
+                   [(set VR256:$dst, (Int256 VR128:$src))]>, VEX, VEX_L;
   def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                    [(set VR256:$dst,
-                    (Int256 (scalar_to_vector (ld_frag addr:$src))))]>, VEX;
+                    (Int256 (scalar_to_vector (ld_frag addr:$src))))]>,
+                   VEX, VEX_L;
 }
 
 defm VPBROADCASTB  : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8,
@@ -7803,7 +7811,8 @@
                    !strconcat(OpcodeStr,
                        "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                    [(set VR256:$dst,
-                     (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, VEX_4V;
+                     (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>,
+                   VEX_4V, VEX_L;
   def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
                    (ins VR256:$src1, i256mem:$src2),
                    !strconcat(OpcodeStr,
@@ -7811,7 +7820,7 @@
                    [(set VR256:$dst,
                      (OpVT (X86VPermv VR256:$src1,
                             (bitconvert (mem_frag addr:$src2)))))]>,
-                   VEX_4V;
+                   VEX_4V, VEX_L;
 }
 
 defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, v8i32>;
@@ -7825,14 +7834,15 @@
                      !strconcat(OpcodeStr,
                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                      [(set VR256:$dst,
-                       (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>, VEX;
+                       (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>,
+                     VEX, VEX_L;
   def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
                      (ins i256mem:$src1, i8imm:$src2),
                      !strconcat(OpcodeStr,
                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                      [(set VR256:$dst,
                        (OpVT (X86VPermi (mem_frag addr:$src1),
-                              (i8 imm:$src2))))]>, VEX;
+                              (i8 imm:$src2))))]>, VEX, VEX_L;
 }
 
 defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, v4i64>, VEX_W;
@@ -7846,12 +7856,12 @@
           (ins VR256:$src1, VR256:$src2, i8imm:$src3),
           "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
           [(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2,
-                            (i8 imm:$src3))))]>, VEX_4V;
+                            (i8 imm:$src3))))]>, VEX_4V, VEX_L;
 def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
           (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
           "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
           [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2),
-                             (i8 imm:$src3)))]>, VEX_4V;
+                             (i8 imm:$src3)))]>, VEX_4V, VEX_L;
 
 let Predicates = [HasAVX2] in {
 def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
@@ -7880,12 +7890,12 @@
 def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst),
           (ins VR256:$src1, VR128:$src2, i8imm:$src3),
           "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
-          []>, VEX_4V;
+          []>, VEX_4V, VEX_L;
 let mayLoad = 1 in
 def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
           (ins VR256:$src1, i128mem:$src2, i8imm:$src3),
           "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
-          []>, VEX_4V;
+          []>, VEX_4V, VEX_L;
 }
 
 let Predicates = [HasAVX2] in {
@@ -7935,11 +7945,12 @@
           "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
           [(set VR128:$dst,
             (int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>,
-          VEX;
+          VEX, VEX_L;
 let neverHasSideEffects = 1, mayStore = 1 in
 def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
           (ins i128mem:$dst, VR256:$src1, i8imm:$src2),
-          "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX;
+          "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+          VEX, VEX_L;
 
 let Predicates = [HasAVX2] in {
 def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
@@ -7990,7 +8001,8 @@
   def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst),
              (ins VR256:$src1, i256mem:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, VEX_4V;
+             [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
+             VEX_4V, VEX_L;
   def mr  : AVX28I<0x8e, MRMDestMem, (outs),
              (ins i128mem:$dst, VR128:$src1, VR128:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -7998,7 +8010,7 @@
   def Ymr : AVX28I<0x8e, MRMDestMem, (outs),
              (ins i256mem:$dst, VR256:$src1, VR256:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
+             [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V, VEX_L;
 }
 
 defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd",
@@ -8036,14 +8048,14 @@
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
              [(set VR256:$dst,
                (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>,
-             VEX_4V;
+             VEX_4V, VEX_L;
   def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
              (ins VR256:$src1, i256mem:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
              [(set VR256:$dst,
                (vt256 (OpNode VR256:$src1,
                        (vt256 (bitconvert (memopv4i64 addr:$src2))))))]>,
-             VEX_4V;
+             VEX_4V, VEX_L;
 }
 
 defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>;

Modified: llvm/branches/R600/lib/Target/X86/X86InstrXOP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86InstrXOP.td?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86InstrXOP.td (original)
+++ llvm/branches/R600/lib/Target/X86/X86InstrXOP.td Fri Sep 21 12:29:50 2012
@@ -75,10 +75,10 @@
                      PatFrag memop> {
   def rrY : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
            !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-           [(set VR256:$dst, (Int VR256:$src))]>, VEX;
+           [(set VR256:$dst, (Int VR256:$src))]>, VEX, VEX_L;
   def rmY : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
            !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-           [(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX;
+           [(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX, VEX_L;
 }
 
 let isAsmParserOnly = 1 in {
@@ -238,7 +238,7 @@
            !strconcat(OpcodeStr,
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
            [(set VR256:$dst, (Int VR256:$src1, VR256:$src2, VR256:$src3))]>,
-           VEX_4V, VEX_I8IMM;
+           VEX_4V, VEX_I8IMM, VEX_L;
   def rmY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst),
            (ins VR256:$src1, VR256:$src2, i256mem:$src3),
            !strconcat(OpcodeStr,
@@ -246,7 +246,7 @@
            [(set VR256:$dst,
              (Int VR256:$src1, VR256:$src2,
               (bitconvert (memopv4i64 addr:$src3))))]>,
-           VEX_4V, VEX_I8IMM, VEX_W, MemOp4;
+           VEX_4V, VEX_I8IMM, VEX_W, MemOp4, VEX_L;
   def mrY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst),
            (ins VR256:$src1, f256mem:$src2, VR256:$src3),
            !strconcat(OpcodeStr,
@@ -254,7 +254,7 @@
            [(set VR256:$dst,
              (Int VR256:$src1, (bitconvert (memopv4i64 addr:$src2)),
               VR256:$src3))]>,
-           VEX_4V, VEX_I8IMM;
+           VEX_4V, VEX_I8IMM, VEX_L;
 }
 
 let isAsmParserOnly = 1 in {
@@ -287,20 +287,21 @@
         !strconcat(OpcodeStr,
         "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
         [(set VR256:$dst,
-          (Int256 VR256:$src1, VR256:$src2, VR256:$src3, imm:$src4))]>;
+          (Int256 VR256:$src1, VR256:$src2, VR256:$src3, imm:$src4))]>, VEX_L;
   def rmY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
         (ins VR256:$src1, VR256:$src2, f256mem:$src3, i8imm:$src4),
         !strconcat(OpcodeStr,
         "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
         [(set VR256:$dst,
           (Int256 VR256:$src1, VR256:$src2, (ld_256 addr:$src3), imm:$src4))]>,
-        VEX_W, MemOp4;
+        VEX_W, MemOp4, VEX_L;
   def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
         (ins VR256:$src1, f256mem:$src2, VR256:$src3, i8imm:$src4),
         !strconcat(OpcodeStr,
         "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
         [(set VR256:$dst,
-           (Int256 VR256:$src1, (ld_256 addr:$src2), VR256:$src3, imm:$src4))]>;
+           (Int256 VR256:$src1, (ld_256 addr:$src2), VR256:$src3, imm:$src4))]>,
+        VEX_L;
 }
 
 defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", int_x86_xop_vpermil2pd,

Modified: llvm/branches/R600/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp (original)
+++ llvm/branches/R600/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp Fri Sep 21 12:29:50 2012
@@ -477,7 +477,8 @@
     if (match(Op1, m_Shl(m_Power2(CI), m_Value(N))) ||
         match(Op1, m_ZExt(m_Shl(m_Power2(CI), m_Value(N))))) {
       if (*CI != 1)
-        N = Builder->CreateAdd(N, ConstantInt::get(I.getType(),CI->logBase2()));
+        N = Builder->CreateAdd(N,
+                               ConstantInt::get(N->getType(), CI->logBase2()));
       if (ZExtInst *Z = dyn_cast<ZExtInst>(Op1))
         N = Builder->CreateZExt(N, Z->getDestTy());
       if (I.isExact())

Modified: llvm/branches/R600/lib/Transforms/Scalar/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Scalar/CodeGenPrepare.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Scalar/CodeGenPrepare.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Scalar/CodeGenPrepare.cpp Fri Sep 21 12:29:50 2012
@@ -18,6 +18,7 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
 #include "llvm/IRBuilder.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
@@ -126,6 +127,7 @@
     bool OptimizeSelectInst(SelectInst *SI);
     bool DupRetToEnableTailCallOpts(ReturnInst *RI);
     bool PlaceDbgValues(Function &F);
+    bool ConvertLoadToSwitch(LoadInst *LI);
   };
 }
 
@@ -169,7 +171,7 @@
   bool MadeChange = true;
   while (MadeChange) {
     MadeChange = false;
-    for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+    for (Function::iterator I = F.begin(); I != F.end(); ) {
       BasicBlock *BB = I++;
       MadeChange |= OptimizeBlock(*BB);
     }
@@ -1283,9 +1285,11 @@
     return OptimizeCmpExpression(CI);
 
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+    bool Changed = false;
     if (TLI)
-      return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
-    return false;
+      Changed |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
+    Changed |= ConvertLoadToSwitch(LI);
+    return Changed;
   }
 
   if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
@@ -1329,7 +1333,7 @@
   bool MadeChange = false;
 
   CurInstIterator = BB.begin();
-  for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; )
+  while (CurInstIterator != BB.end())
     MadeChange |= OptimizeInst(CurInstIterator++);
 
   return MadeChange;
@@ -1365,3 +1369,109 @@
   }
   return MadeChange;
 }
+
+static bool TargetSupportsJumpTables(const TargetLowering &TLI) {
+  return TLI.supportJumpTables() &&
+          (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+           TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
+}
+
+/// ConvertLoadToSwitch - Convert loads from constant lookup tables into
+/// switches. This undos the switch-to-lookup table transformation in
+/// SimplifyCFG for targets where that is inprofitable.
+bool CodeGenPrepare::ConvertLoadToSwitch(LoadInst *LI) {
+  // This only applies to targets that don't support jump tables.
+  if (!TLI || TargetSupportsJumpTables(*TLI))
+    return false;
+
+  // FIXME: In the future, it would be desirable to have enough target
+  // information in SimplifyCFG, so we could decide at that stage whether to
+  // transform the switch to a lookup table or not, and this
+  // reverse-transformation could be removed.
+
+  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getPointerOperand());
+  if (!GEP || !GEP->isInBounds() || GEP->getPointerAddressSpace())
+    return false;
+  if (GEP->getNumIndices() != 2)
+    return false;
+  Value *FirstIndex = GEP->idx_begin()[0];
+  ConstantInt *FirstIndexInt = dyn_cast<ConstantInt>(FirstIndex);
+  if (!FirstIndexInt || !FirstIndexInt->isZero())
+    return false;
+
+  Value *TableIndex = GEP->idx_begin()[1];
+  IntegerType *TableIndexTy = cast<IntegerType>(TableIndex->getType());
+
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand());
+  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
+    return false;
+
+  Constant *Arr = GV->getInitializer();
+  uint64_t NumElements;
+  if (ConstantArray *CA = dyn_cast<ConstantArray>(Arr))
+    NumElements = CA->getType()->getNumElements();
+  else if (ConstantDataArray *CDA = dyn_cast<ConstantDataArray>(Arr))
+    NumElements = CDA->getNumElements();
+  else
+    return false;
+  if (NumElements < 2)
+    return false;
+
+  // Split the block.
+  BasicBlock *OriginalBB = LI->getParent();
+  BasicBlock *PostSwitchBB = OriginalBB->splitBasicBlock(LI);
+
+  // Replace OriginalBB's terminator with a switch.
+  IRBuilder<> Builder(OriginalBB->getTerminator());
+  SwitchInst *Switch = Builder.CreateSwitch(TableIndex, PostSwitchBB,
+                                            NumElements - 1);
+  OriginalBB->getTerminator()->eraseFromParent();
+
+  // Count the frequency of each value to decide which to use as default.
+  SmallDenseMap<Constant*, uint64_t> ValueFreq;
+  for (uint64_t I = 0; I < NumElements; ++I)
+    ++ValueFreq[Arr->getAggregateElement(I)];
+  uint64_t MaxCount = 0;
+  Constant *DefaultValue = NULL;
+  for (SmallDenseMap<Constant*, uint64_t>::iterator I = ValueFreq.begin(),
+       E = ValueFreq.end(); I != E; ++I) {
+    if (I->second > MaxCount) {
+      MaxCount = I->second;
+      DefaultValue = I->first;
+    }
+  }
+  assert(DefaultValue && "No values in the array?");
+
+  // Create the phi node in PostSwitchBB, which will replace the load.
+  Builder.SetInsertPoint(PostSwitchBB->begin());
+  PHINode *PHI = Builder.CreatePHI(LI->getType(), NumElements);
+  PHI->addIncoming(DefaultValue, OriginalBB);
+
+  // Build basic blocks to target with the switch.
+  for (uint64_t I = 0; I < NumElements; ++I) {
+    Constant *C = Arr->getAggregateElement(I);
+    if (C == DefaultValue) continue; // Already covered by the default case.
+
+    BasicBlock *BB = BasicBlock::Create(PostSwitchBB->getContext(),
+                                        "lookup.bb",
+                                        PostSwitchBB->getParent(),
+                                        PostSwitchBB);
+    Switch->addCase(ConstantInt::get(TableIndexTy, I), BB);
+    Builder.SetInsertPoint(BB);
+    Builder.CreateBr(PostSwitchBB);
+    PHI->addIncoming(C, BB);
+  }
+
+  // Remove the load.
+  LI->replaceAllUsesWith(PHI);
+  LI->eraseFromParent();
+
+  // Clean up.
+  if (GEP->use_empty())
+    GEP->eraseFromParent();
+  if (GV->hasUnnamedAddr() && GV->hasPrivateLinkage() && GV->use_empty())
+    GV->eraseFromParent();
+
+  CurInstIterator = Switch;
+  return true;
+}

Modified: llvm/branches/R600/lib/Transforms/Scalar/SROA.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Scalar/SROA.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Scalar/SROA.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Scalar/SROA.cpp Fri Sep 21 12:29:50 2012
@@ -1487,6 +1487,8 @@
     return 0;
 
   Type *ElementTy = Ty->getElementType();
+  if (!ElementTy->isSized())
+    return 0; // We can't GEP through an unsized element.
   APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy));
   if (ElementSize == 0)
     return 0; // Zero-length arrays can't help us build a natural GEP.

Modified: llvm/branches/R600/lib/Transforms/Utils/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Utils/CMakeLists.txt?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Utils/CMakeLists.txt (original)
+++ llvm/branches/R600/lib/Transforms/Utils/CMakeLists.txt Fri Sep 21 12:29:50 2012
@@ -11,6 +11,7 @@
   DemoteRegToStack.cpp
   InlineFunction.cpp
   InstructionNamer.cpp
+  IntegerDivision.cpp
   LCSSA.cpp
   Local.cpp
   LoopSimplify.cpp

Added: llvm/branches/R600/lib/Transforms/Utils/IntegerDivision.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Utils/IntegerDivision.cpp?rev=164384&view=auto
==============================================================================
--- llvm/branches/R600/lib/Transforms/Utils/IntegerDivision.cpp (added)
+++ llvm/branches/R600/lib/Transforms/Utils/IntegerDivision.cpp Fri Sep 21 12:29:50 2012
@@ -0,0 +1,312 @@
+//===-- IntegerDivision.cpp - Expand integer division ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an implementation of 32bit scalar integer division for
+// targets that don't have native support. It's largely derived from
+// compiler-rt's implementation of __udivsi3, but hand-tuned to reduce the
+// amount of control flow
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "integer-division"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/Transforms/Utils/IntegerDivision.h"
+
+using namespace llvm;
+
+/// Generate code to divide two signed integers. Returns the quotient, rounded
+/// towards 0. Builder's insert point should be pointing at the sdiv
+/// instruction. This will generate a udiv in the process, and Builder's insert
+/// point will be pointing at the udiv (if present, i.e. not folded), ready to
+/// be expanded if the user wishes.
+static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
+                                         IRBuilder<> &Builder) {
+  // Implementation taken from compiler-rt's __divsi3
+
+  ConstantInt *ThirtyOne = Builder.getInt32(31);
+
+  // ;   %tmp    = ashr i32 %dividend, 31
+  // ;   %tmp1   = ashr i32 %divisor, 31
+  // ;   %tmp2   = xor i32 %tmp, %dividend
+  // ;   %u_dvnd = sub nsw i32 %tmp2, %tmp
+  // ;   %tmp3   = xor i32 %tmp1, %divisor
+  // ;   %u_dvsr = sub nsw i32 %tmp3, %tmp1
+  // ;   %q_sgn  = xor i32 %tmp1, %tmp
+  // ;   %q_mag  = udiv i32 %u_dvnd, %u_dvsr
+  // ;   %tmp4   = xor i32 %q_mag, %q_sgn
+  // ;   %q      = sub i32 %tmp4, %q_sgn
+  Value *Tmp    = Builder.CreateAShr(Dividend, ThirtyOne);
+  Value *Tmp1   = Builder.CreateAShr(Divisor, ThirtyOne);
+  Value *Tmp2   = Builder.CreateXor(Tmp, Dividend);
+  Value *U_Dvnd = Builder.CreateSub(Tmp2, Tmp);
+  Value *Tmp3   = Builder.CreateXor(Tmp1, Divisor);
+  Value *U_Dvsr = Builder.CreateSub(Tmp3, Tmp1);
+  Value *Q_Sgn  = Builder.CreateXor(Tmp1, Tmp);
+  Value *Q_Mag  = Builder.CreateUDiv(U_Dvnd, U_Dvsr);
+  Value *Tmp4   = Builder.CreateXor(Q_Mag, Q_Sgn);
+  Value *Q      = Builder.CreateSub(Tmp4, Q_Sgn);
+
+  if (Instruction *UDiv = dyn_cast<Instruction>(Q_Mag))
+    Builder.SetInsertPoint(UDiv);
+
+  return Q;
+}
+
+/// Generates code to divide two unsigned scalar 32-bit integers. Returns the
+/// quotient, rounded towards 0. Builder's insert point should be pointing at
+/// the udiv instruction.
+static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
+                                           IRBuilder<> &Builder) {
+  // The basic algorithm can be found in the compiler-rt project's
+  // implementation of __udivsi3.c. Here, we do a lower-level IR based approach
+  // that's been hand-tuned to lessen the amount of control flow involved.
+
+  // Some helper values
+  IntegerType *I32Ty = Builder.getInt32Ty();
+
+  ConstantInt *Zero      = Builder.getInt32(0);
+  ConstantInt *One       = Builder.getInt32(1);
+  ConstantInt *ThirtyOne = Builder.getInt32(31);
+  ConstantInt *NegOne    = ConstantInt::getSigned(I32Ty, -1);
+  ConstantInt *True      = Builder.getTrue();
+
+  BasicBlock *IBB = Builder.GetInsertBlock();
+  Function *F = IBB->getParent();
+  Function *CTLZi32 = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
+                                                I32Ty);
+
+  // Our CFG is going to look like:
+  // +---------------------+
+  // | special-cases       |
+  // |   ...               |
+  // +---------------------+
+  //  |       |
+  //  |   +----------+
+  //  |   |  bb1     |
+  //  |   |  ...     |
+  //  |   +----------+
+  //  |    |      |
+  //  |    |  +------------+
+  //  |    |  |  preheader |
+  //  |    |  |  ...       |
+  //  |    |  +------------+
+  //  |    |      |
+  //  |    |      |      +---+
+  //  |    |      |      |   |
+  //  |    |  +------------+ |
+  //  |    |  |  do-while  | |
+  //  |    |  |  ...       | |
+  //  |    |  +------------+ |
+  //  |    |      |      |   |
+  //  |   +-----------+  +---+
+  //  |   | loop-exit |
+  //  |   |  ...      |
+  //  |   +-----------+
+  //  |     |
+  // +-------+
+  // | ...   |
+  // | end   |
+  // +-------+
+  BasicBlock *SpecialCases = Builder.GetInsertBlock();
+  SpecialCases->setName(Twine(SpecialCases->getName(), "_udiv-special-cases"));
+  BasicBlock *End = SpecialCases->splitBasicBlock(Builder.GetInsertPoint(),
+                                                  "udiv-end");
+  BasicBlock *LoopExit  = BasicBlock::Create(Builder.getContext(),
+                                             "udiv-loop-exit", F, End);
+  BasicBlock *DoWhile   = BasicBlock::Create(Builder.getContext(),
+                                             "udiv-do-while", F, End);
+  BasicBlock *Preheader = BasicBlock::Create(Builder.getContext(),
+                                             "udiv-preheader", F, End);
+  BasicBlock *BB1       = BasicBlock::Create(Builder.getContext(),
+                                             "udiv-bb1", F, End);
+
+  // We'll be overwriting the terminator to insert our extra blocks
+  SpecialCases->getTerminator()->eraseFromParent();
+
+  // First off, check for special cases: dividend or divisor is zero, divisor
+  // is greater than dividend, and divisor is 1.
+  // ; special-cases:
+  // ;   %ret0_1      = icmp eq i32 %divisor, 0
+  // ;   %ret0_2      = icmp eq i32 %dividend, 0
+  // ;   %ret0_3      = or i1 %ret0_1, %ret0_2
+  // ;   %tmp0        = tail call i32 @llvm.ctlz.i32(i32 %divisor, i1 true)
+  // ;   %tmp1        = tail call i32 @llvm.ctlz.i32(i32 %dividend, i1 true)
+  // ;   %sr          = sub nsw i32 %tmp0, %tmp1
+  // ;   %ret0_4      = icmp ugt i32 %sr, 31
+  // ;   %ret0        = or i1 %ret0_3, %ret0_4
+  // ;   %retDividend = icmp eq i32 %sr, 31
+  // ;   %retVal      = select i1 %ret0, i32 0, i32 %dividend
+  // ;   %earlyRet    = or i1 %ret0, %retDividend
+  // ;   br i1 %earlyRet, label %end, label %bb1
+  Builder.SetInsertPoint(SpecialCases);
+  Value *Ret0_1      = Builder.CreateICmpEQ(Divisor, Zero);
+  Value *Ret0_2      = Builder.CreateICmpEQ(Dividend, Zero);
+  Value *Ret0_3      = Builder.CreateOr(Ret0_1, Ret0_2);
+  Value *Tmp0        = Builder.CreateCall2(CTLZi32, Divisor, True);
+  Value *Tmp1        = Builder.CreateCall2(CTLZi32, Dividend, True);
+  Value *SR          = Builder.CreateSub(Tmp0, Tmp1);
+  Value *Ret0_4      = Builder.CreateICmpUGT(SR, ThirtyOne);
+  Value *Ret0        = Builder.CreateOr(Ret0_3, Ret0_4);
+  Value *RetDividend = Builder.CreateICmpEQ(SR, ThirtyOne);
+  Value *RetVal      = Builder.CreateSelect(Ret0, Zero, Dividend);
+  Value *EarlyRet    = Builder.CreateOr(Ret0, RetDividend);
+  Builder.CreateCondBr(EarlyRet, End, BB1);
+
+  // ; bb1:                                             ; preds = %special-cases
+  // ;   %sr_1     = add i32 %sr, 1
+  // ;   %tmp2     = sub i32 31, %sr
+  // ;   %q        = shl i32 %dividend, %tmp2
+  // ;   %skipLoop = icmp eq i32 %sr_1, 0
+  // ;   br i1 %skipLoop, label %loop-exit, label %preheader
+  Builder.SetInsertPoint(BB1);
+  Value *SR_1     = Builder.CreateAdd(SR, One);
+  Value *Tmp2     = Builder.CreateSub(ThirtyOne, SR);
+  Value *Q        = Builder.CreateShl(Dividend, Tmp2);
+  Value *SkipLoop = Builder.CreateICmpEQ(SR_1, Zero);
+  Builder.CreateCondBr(SkipLoop, LoopExit, Preheader);
+
+  // ; preheader:                                           ; preds = %bb1
+  // ;   %tmp3 = lshr i32 %dividend, %sr_1
+  // ;   %tmp4 = add i32 %divisor, -1
+  // ;   br label %do-while
+  Builder.SetInsertPoint(Preheader);
+  Value *Tmp3 = Builder.CreateLShr(Dividend, SR_1);
+  Value *Tmp4 = Builder.CreateAdd(Divisor, NegOne);
+  Builder.CreateBr(DoWhile);
+
+  // ; do-while:                                 ; preds = %do-while, %preheader
+  // ;   %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ]
+  // ;   %sr_3    = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ]
+  // ;   %r_1     = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ]
+  // ;   %q_2     = phi i32 [ %q, %preheader ], [ %q_1, %do-while ]
+  // ;   %tmp5  = shl i32 %r_1, 1
+  // ;   %tmp6  = lshr i32 %q_2, 31
+  // ;   %tmp7  = or i32 %tmp5, %tmp6
+  // ;   %tmp8  = shl i32 %q_2, 1
+  // ;   %q_1   = or i32 %carry_1, %tmp8
+  // ;   %tmp9  = sub i32 %tmp4, %tmp7
+  // ;   %tmp10 = ashr i32 %tmp9, 31
+  // ;   %carry = and i32 %tmp10, 1
+  // ;   %tmp11 = and i32 %tmp10, %divisor
+  // ;   %r     = sub i32 %tmp7, %tmp11
+  // ;   %sr_2  = add i32 %sr_3, -1
+  // ;   %tmp12 = icmp eq i32 %sr_2, 0
+  // ;   br i1 %tmp12, label %loop-exit, label %do-while
+  Builder.SetInsertPoint(DoWhile);
+  PHINode *Carry_1 = Builder.CreatePHI(I32Ty, 2);
+  PHINode *SR_3    = Builder.CreatePHI(I32Ty, 2);
+  PHINode *R_1     = Builder.CreatePHI(I32Ty, 2);
+  PHINode *Q_2     = Builder.CreatePHI(I32Ty, 2);
+  Value *Tmp5  = Builder.CreateShl(R_1, One);
+  Value *Tmp6  = Builder.CreateLShr(Q_2, ThirtyOne);
+  Value *Tmp7  = Builder.CreateOr(Tmp5, Tmp6);
+  Value *Tmp8  = Builder.CreateShl(Q_2, One);
+  Value *Q_1   = Builder.CreateOr(Carry_1, Tmp8);
+  Value *Tmp9  = Builder.CreateSub(Tmp4, Tmp7);
+  Value *Tmp10 = Builder.CreateAShr(Tmp9, 31);
+  Value *Carry = Builder.CreateAnd(Tmp10, One);
+  Value *Tmp11 = Builder.CreateAnd(Tmp10, Divisor);
+  Value *R     = Builder.CreateSub(Tmp7, Tmp11);
+  Value *SR_2  = Builder.CreateAdd(SR_3, NegOne);
+  Value *Tmp12 = Builder.CreateICmpEQ(SR_2, Zero);
+  Builder.CreateCondBr(Tmp12, LoopExit, DoWhile);
+
+  // ; loop-exit:                                      ; preds = %do-while, %bb1
+  // ;   %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ]
+  // ;   %q_3     = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ]
+  // ;   %tmp13 = shl i32 %q_3, 1
+  // ;   %q_4   = or i32 %carry_2, %tmp13
+  // ;   br label %end
+  Builder.SetInsertPoint(LoopExit);
+  PHINode *Carry_2 = Builder.CreatePHI(I32Ty, 2);
+  PHINode *Q_3     = Builder.CreatePHI(I32Ty, 2);
+  Value *Tmp13 = Builder.CreateShl(Q_3, One);
+  Value *Q_4   = Builder.CreateOr(Carry_2, Tmp13);
+  Builder.CreateBr(End);
+
+  // ; end:                                 ; preds = %loop-exit, %special-cases
+  // ;   %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ]
+  // ;   ret i32 %q_5
+  Builder.SetInsertPoint(End, End->begin());
+  PHINode *Q_5 = Builder.CreatePHI(I32Ty, 2);
+
+  // Populate the Phis, since all values have now been created. Our Phis were:
+  // ;   %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ]
+  Carry_1->addIncoming(Zero, Preheader);
+  Carry_1->addIncoming(Carry, DoWhile);
+  // ;   %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ]
+  SR_3->addIncoming(SR_1, Preheader);
+  SR_3->addIncoming(SR_2, DoWhile);
+  // ;   %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ]
+  R_1->addIncoming(Tmp3, Preheader);
+  R_1->addIncoming(R, DoWhile);
+  // ;   %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ]
+  Q_2->addIncoming(Q, Preheader);
+  Q_2->addIncoming(Q_1, DoWhile);
+  // ;   %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ]
+  Carry_2->addIncoming(Zero, BB1);
+  Carry_2->addIncoming(Carry, DoWhile);
+  // ;   %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ]
+  Q_3->addIncoming(Q, BB1);
+  Q_3->addIncoming(Q_1, DoWhile);
+  // ;   %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ]
+  Q_5->addIncoming(Q_4, LoopExit);
+  Q_5->addIncoming(RetVal, SpecialCases);
+
+  return Q_5;
+}
+
+/// Generate code to divide two integers, replacing Div with the generated
+/// code. This currently generates code similarly to compiler-rt's
+/// implementations, but future work includes generating more specialized code
+/// when more information about the operands are known. Currently only
+/// implements 32bit scalar division, but future work is removing this
+/// limitation.
+///
+/// @brief Replace Div with generated code.
+bool llvm::expandDivision(BinaryOperator *Div) {
+  assert((Div->getOpcode() == Instruction::SDiv ||
+          Div->getOpcode() == Instruction::UDiv) &&
+         "Trying to expand division from a non-division function");
+
+  IRBuilder<> Builder(Div);
+
+  if (Div->getType()->isVectorTy())
+    llvm_unreachable("Div over vectors not supported");
+
+  // First prepare the sign if it's a signed division
+  if (Div->getOpcode() == Instruction::SDiv) {
+    // Lower the code to unsigned division, and reset Div to point to the udiv.
+    Value *Quotient = generateSignedDivisionCode(Div->getOperand(0),
+                                                Div->getOperand(1), Builder);
+    Div->replaceAllUsesWith(Quotient);
+    Div->dropAllReferences();
+    Div->eraseFromParent();
+
+    // If we didn't actually generate a udiv instruction, we're done
+    BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
+    if (!BO || BO->getOpcode() != Instruction::UDiv)
+      return true;
+
+    Div = BO;
+  }
+
+  // Insert the unsigned division code
+  Value *Quotient = generateUnsignedDivisionCode(Div->getOperand(0),
+                                                 Div->getOperand(1),
+                                                 Builder);
+  Div->replaceAllUsesWith(Quotient);
+  Div->dropAllReferences();
+  Div->eraseFromParent();
+
+  return true;
+}

Modified: llvm/branches/R600/lib/Transforms/Utils/SimplifyCFG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Utils/SimplifyCFG.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Utils/SimplifyCFG.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Utils/SimplifyCFG.cpp Fri Sep 21 12:29:50 2012
@@ -54,8 +54,13 @@
 DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
        cl::desc("Duplicate return instructions into unconditional branches"));
 
+static cl::opt<bool>
+SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
+       cl::desc("Sink common instructions down to the end block"));
+
 STATISTIC(NumSpeculations, "Number of speculative executed instructions");
 STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
+STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
 
 namespace {
   /// ValueEqualityComparisonCase - Represents a case of a switch.
@@ -1156,6 +1161,171 @@
   return true;
 }
 
+/// SinkThenElseCodeToEnd - Given an unconditional branch that goes to BBEnd,
+/// check whether BBEnd has only two predecessors and the other predecessor
+/// ends with an unconditional branch. If it is true, sink any common code
+/// in the two predecessors to BBEnd.
+static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
+  assert(BI1->isUnconditional());
+  BasicBlock *BB1 = BI1->getParent();
+  BasicBlock *BBEnd = BI1->getSuccessor(0);
+
+  // Check that BBEnd has two predecessors and the other predecessor ends with
+  // an unconditional branch.
+  SmallVector<BasicBlock*, 16> Preds(pred_begin(BBEnd), pred_end(BBEnd));
+  if (Preds.size() != 2)
+    return false;
+  BasicBlock *BB2 = (Preds[0] == BB1) ? Preds[1] : Preds[0];
+  BranchInst *BI2 = dyn_cast<BranchInst>(BB2->getTerminator());
+  if (!BI2 || !BI2->isUnconditional())
+    return false;
+
+  // Gather the PHI nodes in BBEnd.
+  std::map<Value*, std::pair<Value*, PHINode*> > MapValueFromBB1ToBB2;
+  Instruction *FirstNonPhiInBBEnd = 0;
+  for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end();
+       I != E; ++I) {
+    if (PHINode *PN = dyn_cast<PHINode>(I)) {
+      Value *BB1V = PN->getIncomingValueForBlock(BB1);
+      Value *BB2V = PN->getIncomingValueForBlock(BB2); 
+      MapValueFromBB1ToBB2[BB1V] = std::make_pair(BB2V, PN);
+    } else {
+      FirstNonPhiInBBEnd = &*I;
+      break;
+    }
+  }
+  if (!FirstNonPhiInBBEnd)
+    return false;
+  
+
+  // This does very trivial matching, with limited scanning, to find identical
+  // instructions in the two blocks.  We scan backward for obviously identical
+  // instructions in an identical order.
+  BasicBlock::InstListType::reverse_iterator RI1 = BB1->getInstList().rbegin(),
+      RE1 = BB1->getInstList().rend(), RI2 = BB2->getInstList().rbegin(),
+      RE2 = BB2->getInstList().rend();
+  // Skip debug info.
+  while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1;
+  if (RI1 == RE1)
+    return false;
+  while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2;
+  if (RI2 == RE2)
+    return false;
+  // Skip the unconditional branches.
+  ++RI1;
+  ++RI2;
+
+  bool Changed = false;
+  while (RI1 != RE1 && RI2 != RE2) {
+    // Skip debug info.
+    while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1;
+    if (RI1 == RE1)
+      return Changed;
+    while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2;
+    if (RI2 == RE2)
+      return Changed;
+
+    Instruction *I1 = &*RI1, *I2 = &*RI2;
+    // I1 and I2 should have a single use in the same PHI node, and they
+    // perform the same operation.
+    // Cannot move control-flow-involving, volatile loads, vaarg, etc.
+    if (isa<PHINode>(I1) || isa<PHINode>(I2) ||
+        isa<TerminatorInst>(I1) || isa<TerminatorInst>(I2) ||
+        isa<LandingPadInst>(I1) || isa<LandingPadInst>(I2) ||
+        isa<AllocaInst>(I1) || isa<AllocaInst>(I2) ||
+        I1->mayHaveSideEffects() || I2->mayHaveSideEffects() ||
+        I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() ||
+        !I1->hasOneUse() || !I2->hasOneUse() ||
+        MapValueFromBB1ToBB2.find(I1) == MapValueFromBB1ToBB2.end() ||
+        MapValueFromBB1ToBB2[I1].first != I2)
+      return Changed;
+
+    // Check whether we should swap the operands of ICmpInst.
+    ICmpInst *ICmp1 = dyn_cast<ICmpInst>(I1), *ICmp2 = dyn_cast<ICmpInst>(I2);
+    bool SwapOpnds = false;
+    if (ICmp1 && ICmp2 &&
+        ICmp1->getOperand(0) != ICmp2->getOperand(0) &&
+        ICmp1->getOperand(1) != ICmp2->getOperand(1) &&
+        (ICmp1->getOperand(0) == ICmp2->getOperand(1) ||
+         ICmp1->getOperand(1) == ICmp2->getOperand(0))) {
+      ICmp2->swapOperands();
+      SwapOpnds = true;
+    }
+    if (!I1->isSameOperationAs(I2)) {
+      if (SwapOpnds)
+        ICmp2->swapOperands();
+      return Changed;
+    }
+
+    // The operands should be either the same or they need to be generated
+    // with a PHI node after sinking. We only handle the case where there is
+    // a single pair of different operands.
+    Value *DifferentOp1 = 0, *DifferentOp2 = 0;
+    unsigned Op1Idx = 0;
+    for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) {
+      if (I1->getOperand(I) == I2->getOperand(I))
+        continue;
+      // Early exit if we have more-than one pair of different operands or
+      // the different operand is already in MapValueFromBB1ToBB2.
+      // Early exit if we need a PHI node to replace a constant.
+      if (DifferentOp1 ||
+          MapValueFromBB1ToBB2.find(I1->getOperand(I)) !=
+          MapValueFromBB1ToBB2.end() ||
+          isa<Constant>(I1->getOperand(I)) ||
+          isa<Constant>(I2->getOperand(I))) {
+        // If we can't sink the instructions, undo the swapping.
+        if (SwapOpnds)
+          ICmp2->swapOperands();
+        return Changed;
+      }
+      DifferentOp1 = I1->getOperand(I);
+      Op1Idx = I;
+      DifferentOp2 = I2->getOperand(I);
+    }
+
+    // We insert the pair of different operands to MapValueFromBB1ToBB2 and
+    // remove (I1, I2) from MapValueFromBB1ToBB2.
+    if (DifferentOp1) {
+      PHINode *NewPN = PHINode::Create(DifferentOp1->getType(), 2,
+                                       DifferentOp1->getName() + ".sink",
+                                       BBEnd->begin());
+      MapValueFromBB1ToBB2[DifferentOp1] = std::make_pair(DifferentOp2, NewPN);
+      // I1 should use NewPN instead of DifferentOp1.
+      I1->setOperand(Op1Idx, NewPN);
+      NewPN->addIncoming(DifferentOp1, BB1);
+      NewPN->addIncoming(DifferentOp2, BB2);
+      DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";);
+    }
+    PHINode *OldPN = MapValueFromBB1ToBB2[I1].second;
+    MapValueFromBB1ToBB2.erase(I1);
+
+    DEBUG(dbgs() << "SINK common instructions " << *I1 << "\n";);
+    DEBUG(dbgs() << "                         " << *I2 << "\n";);
+    // We need to update RE1 and RE2 if we are going to sink the first
+    // instruction in the basic block down.
+    bool UpdateRE1 = (I1 == BB1->begin()), UpdateRE2 = (I2 == BB2->begin());
+    // Sink the instruction.
+    BBEnd->getInstList().splice(FirstNonPhiInBBEnd, BB1->getInstList(), I1);
+    if (!OldPN->use_empty())
+      OldPN->replaceAllUsesWith(I1);
+    OldPN->eraseFromParent();
+
+    if (!I2->use_empty())
+      I2->replaceAllUsesWith(I1);
+    I1->intersectOptionalDataWith(I2);
+    I2->eraseFromParent();
+
+    if (UpdateRE1)
+      RE1 = BB1->getInstList().rend();
+    if (UpdateRE2)
+      RE2 = BB2->getInstList().rend();
+    FirstNonPhiInBBEnd = I1;
+    NumSinkCommons++;
+    Changed = true;
+  }
+  return Changed;
+}
+
 /// SpeculativelyExecuteBB - Given a conditional branch that goes to BB1
 /// and an BB2 and the only successor of BB1 is BB2, hoist simple code
 /// (for now, restricted to a single instruction that's side effect free) from
@@ -3258,17 +3428,16 @@
                                            "switch.gep");
     Value *Result = Builder.CreateLoad(GEP, "switch.load");
 
-    // If the result is only going to be used to return from the function,
-    // we want to do that right here.
-    if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin())) {
-      if (CommonDest->getFirstNonPHIOrDbg() == CommonDest->getTerminator()) {
-        Builder.CreateRet(Result);
-        ReturnedEarly = true;
-      }
+    // If the result is used to return immediately from the function, we want to
+    // do that right here.
+    if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin()) &&
+        *PHI->use_begin() == CommonDest->getFirstNonPHIOrDbg()) {
+      Builder.CreateRet(Result);
+      ReturnedEarly = true;
+      break;
     }
 
-    if (!ReturnedEarly)
-      PHI->addIncoming(Result, LookupBB);
+    PHI->addIncoming(Result, LookupBB);
   }
 
   if (!ReturnedEarly)
@@ -3371,6 +3540,9 @@
 bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
   BasicBlock *BB = BI->getParent();
 
+  if (SinkCommon && SinkThenElseCodeToEnd(BI))
+    return true;
+
   // If the Terminator is the only non-phi instruction, simplify the block.
   BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime();
   if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&

Modified: llvm/branches/R600/lib/VMCore/AsmWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/AsmWriter.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/AsmWriter.cpp (original)
+++ llvm/branches/R600/lib/VMCore/AsmWriter.cpp Fri Sep 21 12:29:50 2012
@@ -1244,7 +1244,7 @@
   TypePrinter.print(Operand->getType(), Out);
   // Print parameter attributes list
   if (Attrs != Attribute::None)
-    Out << ' ' << Attribute::getAsString(Attrs);
+    Out << ' ' << Attrs.getAsString();
   Out << ' ';
   // Print the operand
   WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
@@ -1557,7 +1557,7 @@
   const AttrListPtr &Attrs = F->getAttributes();
   Attributes RetAttrs = Attrs.getRetAttributes();
   if (RetAttrs != Attribute::None)
-    Out <<  Attribute::getAsString(Attrs.getRetAttributes()) << ' ';
+    Out <<  Attrs.getRetAttributes().getAsString() << ' ';
   TypePrinter.print(F->getReturnType(), Out);
   Out << ' ';
   WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent());
@@ -1587,7 +1587,7 @@
 
       Attributes ArgAttrs = Attrs.getParamAttributes(i+1);
       if (ArgAttrs != Attribute::None)
-        Out << ' ' << Attribute::getAsString(ArgAttrs);
+        Out << ' ' << ArgAttrs.getAsString();
     }
   }
 
@@ -1601,7 +1601,7 @@
     Out << " unnamed_addr";
   Attributes FnAttrs = Attrs.getFnAttributes();
   if (FnAttrs != Attribute::None)
-    Out << ' ' << Attribute::getAsString(Attrs.getFnAttributes());
+    Out << ' ' << Attrs.getFnAttributes().getAsString();
   if (F->hasSection()) {
     Out << " section \"";
     PrintEscapedString(F->getSection(), Out);
@@ -1635,7 +1635,7 @@
 
   // Output parameter attributes list
   if (Attrs != Attribute::None)
-    Out << ' ' << Attribute::getAsString(Attrs);
+    Out << ' ' << Attrs.getAsString();
 
   // Output name, if available...
   if (Arg->hasName()) {
@@ -1850,7 +1850,7 @@
     const AttrListPtr &PAL = CI->getAttributes();
 
     if (PAL.getRetAttributes() != Attribute::None)
-      Out << ' ' << Attribute::getAsString(PAL.getRetAttributes());
+      Out << ' ' << PAL.getRetAttributes().getAsString();
 
     // If possible, print out the short form of the call instruction.  We can
     // only do this if the first argument is a pointer to a nonvararg function,
@@ -1874,7 +1874,7 @@
     }
     Out << ')';
     if (PAL.getFnAttributes() != Attribute::None)
-      Out << ' ' << Attribute::getAsString(PAL.getFnAttributes());
+      Out << ' ' << PAL.getFnAttributes().getAsString();
   } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
     Operand = II->getCalledValue();
     PointerType *PTy = cast<PointerType>(Operand->getType());
@@ -1889,7 +1889,7 @@
     }
 
     if (PAL.getRetAttributes() != Attribute::None)
-      Out << ' ' << Attribute::getAsString(PAL.getRetAttributes());
+      Out << ' ' << PAL.getRetAttributes().getAsString();
 
     // If possible, print out the short form of the invoke instruction. We can
     // only do this if the first argument is a pointer to a nonvararg function,
@@ -1914,7 +1914,7 @@
 
     Out << ')';
     if (PAL.getFnAttributes() != Attribute::None)
-      Out << ' ' << Attribute::getAsString(PAL.getFnAttributes());
+      Out << ' ' << PAL.getFnAttributes().getAsString();
 
     Out << "\n          to ";
     writeOperand(II->getNormalDest(), true);

Modified: llvm/branches/R600/lib/VMCore/Attributes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/Attributes.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/Attributes.cpp (original)
+++ llvm/branches/R600/lib/VMCore/Attributes.cpp Fri Sep 21 12:29:50 2012
@@ -26,66 +26,66 @@
 // Attribute Function Definitions
 //===----------------------------------------------------------------------===//
 
-std::string Attribute::getAsString(Attributes Attrs) {
+std::string Attributes::getAsString() const {
   std::string Result;
-  if (Attrs & Attribute::ZExt)
+  if (hasZExtAttr())
     Result += "zeroext ";
-  if (Attrs & Attribute::SExt)
+  if (hasSExtAttr())
     Result += "signext ";
-  if (Attrs & Attribute::NoReturn)
+  if (hasNoReturnAttr())
     Result += "noreturn ";
-  if (Attrs & Attribute::NoUnwind)
+  if (hasNoUnwindAttr())
     Result += "nounwind ";
-  if (Attrs & Attribute::UWTable)
+  if (hasUWTableAttr())
     Result += "uwtable ";
-  if (Attrs & Attribute::ReturnsTwice)
+  if (hasReturnsTwiceAttr())
     Result += "returns_twice ";
-  if (Attrs & Attribute::InReg)
+  if (hasInRegAttr())
     Result += "inreg ";
-  if (Attrs & Attribute::NoAlias)
+  if (hasNoAliasAttr())
     Result += "noalias ";
-  if (Attrs & Attribute::NoCapture)
+  if (hasNoCaptureAttr())
     Result += "nocapture ";
-  if (Attrs & Attribute::StructRet)
+  if (hasStructRetAttr())
     Result += "sret ";
-  if (Attrs & Attribute::ByVal)
+  if (hasByValAttr())
     Result += "byval ";
-  if (Attrs & Attribute::Nest)
+  if (hasNestAttr())
     Result += "nest ";
-  if (Attrs & Attribute::ReadNone)
+  if (hasReadNoneAttr())
     Result += "readnone ";
-  if (Attrs & Attribute::ReadOnly)
+  if (hasReadOnlyAttr())
     Result += "readonly ";
-  if (Attrs & Attribute::OptimizeForSize)
+  if (hasOptimizeForSizeAttr())
     Result += "optsize ";
-  if (Attrs & Attribute::NoInline)
+  if (hasNoInlineAttr())
     Result += "noinline ";
-  if (Attrs & Attribute::InlineHint)
+  if (hasInlineHintAttr())
     Result += "inlinehint ";
-  if (Attrs & Attribute::AlwaysInline)
+  if (hasAlwaysInlineAttr())
     Result += "alwaysinline ";
-  if (Attrs & Attribute::StackProtect)
+  if (hasStackProtectAttr())
     Result += "ssp ";
-  if (Attrs & Attribute::StackProtectReq)
+  if (hasStackProtectReqAttr())
     Result += "sspreq ";
-  if (Attrs & Attribute::NoRedZone)
+  if (hasNoRedZoneAttr())
     Result += "noredzone ";
-  if (Attrs & Attribute::NoImplicitFloat)
+  if (hasNoImplicitFloatAttr())
     Result += "noimplicitfloat ";
-  if (Attrs & Attribute::Naked)
+  if (hasNakedAttr())
     Result += "naked ";
-  if (Attrs & Attribute::NonLazyBind)
+  if (hasNonLazyBindAttr())
     Result += "nonlazybind ";
-  if (Attrs & Attribute::AddressSafety)
+  if (hasAddressSafetyAttr())
     Result += "address_safety ";
-  if (Attrs & Attribute::StackAlignment) {
+  if (hasStackAlignmentAttr()) {
     Result += "alignstack(";
-    Result += utostr(Attribute::getStackAlignmentFromAttrs(Attrs));
+    Result += utostr(getStackAlignment());
     Result += ") ";
   }
-  if (Attrs & Attribute::Alignment) {
+  if (hasAlignmentAttr()) {
     Result += "align ";
-    Result += utostr(Attribute::getAlignmentFromAttrs(Attrs));
+    Result += utostr(getAlignment());
     Result += " ";
   }
   // Trim the trailing space.
@@ -174,7 +174,7 @@
   
 #ifndef NDEBUG
   for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
-    assert(Attrs[i].Attrs != Attribute::None && 
+    assert(Attrs[i].Attrs.hasAttributes() && 
            "Pointless attribute!");
     assert((!i || Attrs[i-1].Index < Attrs[i].Index) &&
            "Misordered AttributesList!");
@@ -247,13 +247,14 @@
 /// returned.  Attributes for the result are denoted with Idx = 0.
 /// Function notes are denoted with idx = ~0.
 Attributes AttrListPtr::getAttributes(unsigned Idx) const {
-  if (AttrList == 0) return Attribute::None;
+  if (AttrList == 0) return Attributes();
   
   const SmallVector<AttributeWithIndex, 4> &Attrs = AttrList->Attrs;
   for (unsigned i = 0, e = Attrs.size(); i != e && Attrs[i].Index <= Idx; ++i)
     if (Attrs[i].Index == Idx)
       return Attrs[i].Attrs;
-  return Attribute::None;
+
+  return Attributes();
 }
 
 /// hasAttrSomewhere - Return true if the specified attribute is set for at
@@ -263,7 +264,7 @@
   
   const SmallVector<AttributeWithIndex, 4> &Attrs = AttrList->Attrs;
   for (unsigned i = 0, e = Attrs.size(); i != e; ++i)
-    if (Attrs[i].Attrs & Attr)
+    if (Attrs[i].Attrs.hasAttributes(Attr))
       return true;
   return false;
 }
@@ -274,8 +275,8 @@
 #ifndef NDEBUG
   // FIXME it is not obvious how this should work for alignment.
   // For now, say we can't change a known alignment.
-  Attributes OldAlign = OldAttrs & Attribute::Alignment;
-  Attributes NewAlign = Attrs & Attribute::Alignment;
+  unsigned OldAlign = OldAttrs.getAlignment();
+  unsigned NewAlign = Attrs.getAlignment();
   assert((!OldAlign || !NewAlign || OldAlign == NewAlign) &&
          "Attempt to change alignment!");
 #endif
@@ -314,7 +315,7 @@
 #ifndef NDEBUG
   // FIXME it is not obvious how this should work for alignment.
   // For now, say we can't pass in alignment, which no current use does.
-  assert(!(Attrs & Attribute::Alignment) && "Attempt to exclude alignment!");
+  assert(!Attrs.hasAlignmentAttr() && "Attempt to exclude alignment!");
 #endif
   if (AttrList == 0) return AttrListPtr();
   

Modified: llvm/branches/R600/lib/VMCore/Core.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/Core.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/Core.cpp (original)
+++ llvm/branches/R600/lib/VMCore/Core.cpp Fri Sep 21 12:29:50 2012
@@ -568,6 +568,19 @@
   return 0;
 }
 
+unsigned LLVMGetMDNodeNumOperands(LLVMValueRef V)
+{
+  return cast<MDNode>(unwrap(V))->getNumOperands();
+}
+
+void LLVMGetMDNodeOperands(LLVMValueRef V, LLVMValueRef *Dest)
+{
+  const MDNode *N = cast<MDNode>(unwrap(V));
+  const unsigned numOperands = N->getNumOperands();
+  for (unsigned i = 0; i < numOperands; i++)
+    Dest[i] = wrap(N->getOperand(i));
+}
+
 unsigned LLVMGetNamedMetadataNumOperands(LLVMModuleRef M, const char* name)
 {
   if (NamedMDNode *N = unwrap(M)->getNamedMetadata(name)) {
@@ -1462,7 +1475,7 @@
 
 void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) {
   unwrap<Argument>(Arg)->addAttr(
-          Attribute::constructAlignmentFromInt(align));
+          Attributes::constructAlignmentFromInt(align));
 }
 
 /*--.. Operations on basic blocks ..........................................--*/
@@ -1667,7 +1680,7 @@
   CallSite Call = CallSite(unwrap<Instruction>(Instr));
   Call.setAttributes(
     Call.getAttributes().addAttr(index, 
-        Attribute::constructAlignmentFromInt(align)));
+        Attributes::constructAlignmentFromInt(align)));
 }
 
 /*--.. Operations on call instructions (only) ..............................--*/

Modified: llvm/branches/R600/lib/VMCore/Type.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/Type.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/Type.cpp (original)
+++ llvm/branches/R600/lib/VMCore/Type.cpp Fri Sep 21 12:29:50 2012
@@ -220,8 +220,6 @@
   return cast<StructType>(this)->getElementType(N);
 }
 
-
-
 Type *Type::getSequentialElementType() const {
   return cast<SequentialType>(this)->getElementType();
 }
@@ -239,8 +237,6 @@
 }
 
 
-
-
 //===----------------------------------------------------------------------===//
 //                          Primitive 'Type' data
 //===----------------------------------------------------------------------===//
@@ -400,12 +396,10 @@
   return FT;
 }
 
-
 FunctionType *FunctionType::get(Type *Result, bool isVarArg) {
   return get(Result, ArrayRef<Type *>(), isVarArg);
 }
 
-
 /// isValidReturnType - Return true if the specified type is valid as a return
 /// type.
 bool FunctionType::isValidReturnType(Type *RetTy) {
@@ -553,7 +547,6 @@
   return create(Context, StringRef());
 }
 
-
 StructType *StructType::create(ArrayRef<Type*> Elements, StringRef Name,
                                bool isPacked) {
   assert(!Elements.empty() &&
@@ -637,7 +630,6 @@
   return std::equal(element_begin(), element_end(), Other->element_begin());
 }
 
-
 /// getTypeByName - Return the type with the specified name, or null if there
 /// is none by that name.
 StructType *Module::getTypeByName(StringRef Name) const {
@@ -700,7 +692,6 @@
   NumElements = NumEl;
 }
 
-
 ArrayType *ArrayType::get(Type *elementType, uint64_t NumElements) {
   Type *ElementType = const_cast<Type*>(elementType);
   assert(isValidElementType(ElementType) && "Invalid type for array element!");

Modified: llvm/branches/R600/lib/VMCore/ValueTypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/ValueTypes.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/ValueTypes.cpp (original)
+++ llvm/branches/R600/lib/VMCore/ValueTypes.cpp Fri Sep 21 12:29:50 2012
@@ -55,6 +55,14 @@
   return LLVMTy->isVectorTy();
 }
 
+bool EVT::isExtended16BitVector() const {
+  return isExtendedVector() && getSizeInBits() == 16;
+}
+
+bool EVT::isExtended32BitVector() const {
+  return isExtendedVector() && getSizeInBits() == 32;
+}
+
 bool EVT::isExtended64BitVector() const {
   return isExtendedVector() && getSizeInBits() == 64;
 }
@@ -120,15 +128,21 @@
   case MVT::Other:   return "ch";
   case MVT::Glue:    return "glue";
   case MVT::x86mmx:  return "x86mmx";
+  case MVT::v2i1:    return "v2i1";
+  case MVT::v4i1:    return "v4i1";
+  case MVT::v8i1:    return "v8i1";
+  case MVT::v16i1:   return "v16i1";
   case MVT::v2i8:    return "v2i8";
   case MVT::v4i8:    return "v4i8";
   case MVT::v8i8:    return "v8i8";
   case MVT::v16i8:   return "v16i8";
   case MVT::v32i8:   return "v32i8";
+  case MVT::v1i16:   return "v1i16";
   case MVT::v2i16:   return "v2i16";
   case MVT::v4i16:   return "v4i16";
   case MVT::v8i16:   return "v8i16";
   case MVT::v16i16:  return "v16i16";
+  case MVT::v1i32:   return "v1i32";
   case MVT::v2i32:   return "v2i32";
   case MVT::v4i32:   return "v4i32";
   case MVT::v8i32:   return "v8i32";
@@ -171,15 +185,21 @@
   case MVT::f128:    return Type::getFP128Ty(Context);
   case MVT::ppcf128: return Type::getPPC_FP128Ty(Context);
   case MVT::x86mmx:  return Type::getX86_MMXTy(Context);
+  case MVT::v2i1:    return VectorType::get(Type::getInt1Ty(Context), 2);
+  case MVT::v4i1:    return VectorType::get(Type::getInt1Ty(Context), 4);
+  case MVT::v8i1:    return VectorType::get(Type::getInt1Ty(Context), 8);
+  case MVT::v16i1:   return VectorType::get(Type::getInt1Ty(Context), 16);
   case MVT::v2i8:    return VectorType::get(Type::getInt8Ty(Context), 2);
   case MVT::v4i8:    return VectorType::get(Type::getInt8Ty(Context), 4);
   case MVT::v8i8:    return VectorType::get(Type::getInt8Ty(Context), 8);
   case MVT::v16i8:   return VectorType::get(Type::getInt8Ty(Context), 16);
   case MVT::v32i8:   return VectorType::get(Type::getInt8Ty(Context), 32);
+  case MVT::v1i16:   return VectorType::get(Type::getInt16Ty(Context), 1);
   case MVT::v2i16:   return VectorType::get(Type::getInt16Ty(Context), 2);
   case MVT::v4i16:   return VectorType::get(Type::getInt16Ty(Context), 4);
   case MVT::v8i16:   return VectorType::get(Type::getInt16Ty(Context), 8);
   case MVT::v16i16:  return VectorType::get(Type::getInt16Ty(Context), 16);
+  case MVT::v1i32:   return VectorType::get(Type::getInt32Ty(Context), 1);
   case MVT::v2i32:   return VectorType::get(Type::getInt32Ty(Context), 2);
   case MVT::v4i32:   return VectorType::get(Type::getInt32Ty(Context), 4);
   case MVT::v8i32:   return VectorType::get(Type::getInt32Ty(Context), 8);

Modified: llvm/branches/R600/lib/VMCore/Verifier.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/Verifier.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/Verifier.cpp (original)
+++ llvm/branches/R600/lib/VMCore/Verifier.cpp Fri Sep 21 12:29:50 2012
@@ -530,12 +530,12 @@
     return;
 
   Attributes FnCheckAttr = Attrs & Attribute::FunctionOnly;
-  Assert1(!FnCheckAttr, "Attribute " + Attribute::getAsString(FnCheckAttr) +
+  Assert1(!FnCheckAttr, "Attribute " + FnCheckAttr.getAsString() +
           " only applies to the function!", V);
 
   if (isReturnValue) {
     Attributes RetI = Attrs & Attribute::ParameterOnly;
-    Assert1(!RetI, "Attribute " + Attribute::getAsString(RetI) +
+    Assert1(!RetI, "Attribute " + RetI.getAsString() +
             " does not apply to return values!", V);
   }
 
@@ -543,21 +543,21 @@
        i < array_lengthof(Attribute::MutuallyIncompatible); ++i) {
     Attributes MutI = Attrs & Attribute::MutuallyIncompatible[i];
     Assert1(MutI.isEmptyOrSingleton(), "Attributes " +
-            Attribute::getAsString(MutI) + " are incompatible!", V);
+            MutI.getAsString() + " are incompatible!", V);
   }
 
   Attributes TypeI = Attrs & Attribute::typeIncompatible(Ty);
   Assert1(!TypeI, "Wrong type for attribute " +
-          Attribute::getAsString(TypeI), V);
+          TypeI.getAsString(), V);
 
   Attributes ByValI = Attrs & Attribute::ByVal;
   if (PointerType *PTy = dyn_cast<PointerType>(Ty)) {
     Assert1(!ByValI || PTy->getElementType()->isSized(),
-            "Attribute " + Attribute::getAsString(ByValI) +
+            "Attribute " + ByValI.getAsString() +
             " does not support unsized types!", V);
   } else {
     Assert1(!ByValI,
-            "Attribute " + Attribute::getAsString(ByValI) +
+            "Attribute " + ByValI.getAsString() +
             " only applies to parameters with pointer type!", V);
   }
 }
@@ -585,25 +585,25 @@
 
     VerifyParameterAttrs(Attr.Attrs, Ty, Attr.Index == 0, V);
 
-    if (Attr.Attrs & Attribute::Nest) {
+    if (Attr.Attrs.hasNestAttr()) {
       Assert1(!SawNest, "More than one parameter has attribute nest!", V);
       SawNest = true;
     }
 
-    if (Attr.Attrs & Attribute::StructRet)
+    if (Attr.Attrs.hasStructRetAttr())
       Assert1(Attr.Index == 1, "Attribute sret not on first parameter!", V);
   }
 
   Attributes FAttrs = Attrs.getFnAttributes();
   Attributes NotFn = FAttrs & (~Attribute::FunctionOnly);
-  Assert1(!NotFn, "Attribute " + Attribute::getAsString(NotFn) +
+  Assert1(!NotFn, "Attribute " + NotFn.getAsString() +
           " does not apply to the function!", V);
 
   for (unsigned i = 0;
        i < array_lengthof(Attribute::MutuallyIncompatible); ++i) {
     Attributes MutI = FAttrs & Attribute::MutuallyIncompatible[i];
     Assert1(MutI.isEmptyOrSingleton(), "Attributes " +
-            Attribute::getAsString(MutI) + " are incompatible!", V);
+            MutI.getAsString() + " are incompatible!", V);
   }
 }
 
@@ -1171,7 +1171,7 @@
       VerifyParameterAttrs(Attr, CS.getArgument(Idx-1)->getType(), false, I);
 
       Attributes VArgI = Attr & Attribute::VarArgsIncompatible;
-      Assert1(!VArgI, "Attribute " + Attribute::getAsString(VArgI) +
+      Assert1(!VArgI, "Attribute " + VArgI.getAsString() +
               " cannot be used for vararg call arguments!", I);
     }
 

Modified: llvm/branches/R600/test/Bitcode/blockaddress.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Bitcode/blockaddress.ll?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/Bitcode/blockaddress.ll (original)
+++ llvm/branches/R600/test/Bitcode/blockaddress.ll Fri Sep 21 12:29:50 2012
@@ -28,3 +28,18 @@
 end:
   ret void
 }
+
+; PR13895
+define void @doitagain(i8** nocapture %pptr) {
+; CHECK: define void @doitagain
+entry:
+  br label %here
+
+here:
+  store i8* blockaddress(@doit, %here), i8** %pptr, align 8
+; CHECK: blockaddress(@doit, %here)
+  br label %end
+
+end:
+  ret void
+}

Added: llvm/branches/R600/test/CodeGen/ARM/2012-09-18-ARMv4ISelBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/2012-09-18-ARMv4ISelBug.ll?rev=164384&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/2012-09-18-ARMv4ISelBug.ll (added)
+++ llvm/branches/R600/test/CodeGen/ARM/2012-09-18-ARMv4ISelBug.ll Fri Sep 21 12:29:50 2012
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mcpu=arm7tdmi | FileCheck %s
+
+; movw is only legal for V6T2 and later.
+; rdar://12300648
+
+define i32 @t(i32 %x) {
+; CHECK: t:
+; CHECK-NOT: movw
+  %tmp = add i32 %x, -65535
+  ret i32 %tmp
+}

Modified: llvm/branches/R600/test/CodeGen/ARM/coalesce-subregs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/coalesce-subregs.ll?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/coalesce-subregs.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/coalesce-subregs.ll Fri Sep 21 12:29:50 2012
@@ -114,3 +114,78 @@
 }
 
 declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) nounwind
+declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32) nounwind readonly
+
+; CHECK: f4
+; This function inserts a lane into a fully defined vector.
+; The destination lane isn't read, so the subregs can coalesce.
+; CHECK-NOT: vmov
+; CHECK-NOT: vorr
+define void @f4(float* %p, float* %q) nounwind ssp {
+entry:
+  %0 = bitcast float* %p to i8*
+  %vld1 = tail call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %0, i32 4)
+  %tobool = icmp eq float* %q, null
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %1 = load float* %q, align 4
+  %arrayidx1 = getelementptr inbounds float* %q, i32 1
+  %2 = load float* %arrayidx1, align 4
+  %add = fadd float %1, %2
+  %vecins = insertelement <2 x float> %vld1, float %add, i32 1
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %x.0 = phi <2 x float> [ %vecins, %if.then ], [ %vld1, %entry ]
+  tail call void @llvm.arm.neon.vst1.v2f32(i8* %0, <2 x float> %x.0, i32 4)
+  ret void
+}
+
+; CHECK: f5
+; Coalesce vector lanes through phis.
+; CHECK: vmov.f32 {{.*}}, #1.0
+; CHECK-NOT: vmov
+; CHECK-NOT: vorr
+; CHECK: %if.end
+; We may leave the last insertelement in the if.end block.
+; It is inserting the %add value into a dead lane, but %add causes interference
+; in the entry block, and we don't do dead lane checks across basic blocks.
+define void @f5(float* %p, float* %q) nounwind ssp {
+entry:
+  %0 = bitcast float* %p to i8*
+  %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %0, i32 4)
+  %vecext = extractelement <4 x float> %vld1, i32 0
+  %vecext1 = extractelement <4 x float> %vld1, i32 1
+  %vecext2 = extractelement <4 x float> %vld1, i32 2
+  %vecext3 = extractelement <4 x float> %vld1, i32 3
+  %add = fadd float %vecext3, 1.000000e+00
+  %tobool = icmp eq float* %q, null
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %arrayidx = getelementptr inbounds float* %q, i32 1
+  %1 = load float* %arrayidx, align 4
+  %add4 = fadd float %vecext, %1
+  %2 = load float* %q, align 4
+  %add6 = fadd float %vecext1, %2
+  %arrayidx7 = getelementptr inbounds float* %q, i32 2
+  %3 = load float* %arrayidx7, align 4
+  %add8 = fadd float %vecext2, %3
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %a.0 = phi float [ %add4, %if.then ], [ %vecext, %entry ]
+  %b.0 = phi float [ %add6, %if.then ], [ %vecext1, %entry ]
+  %c.0 = phi float [ %add8, %if.then ], [ %vecext2, %entry ]
+  %vecinit = insertelement <4 x float> undef, float %a.0, i32 0
+  %vecinit9 = insertelement <4 x float> %vecinit, float %b.0, i32 1
+  %vecinit10 = insertelement <4 x float> %vecinit9, float %c.0, i32 2
+  %vecinit11 = insertelement <4 x float> %vecinit10, float %add, i32 3
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* %0, <4 x float> %vecinit11, i32 4)
+  ret void
+}
+
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind

Modified: llvm/branches/R600/test/CodeGen/ARM/fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/fast-isel.ll?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/fast-isel.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/fast-isel.ll Fri Sep 21 12:29:50 2012
@@ -1,5 +1,7 @@
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
 
 ; Very basic fast-isel functionality.
 define i32 @add(i32 %a, i32 %b) nounwind {
@@ -238,3 +240,31 @@
 }
 
 declare void @llvm.trap() nounwind
+
+define void @unaligned_i16_store(i16 %x, i16* %y) nounwind {
+entry:
+; ARM-STRICT-ALIGN: @unaligned_i16_store
+; ARM-STRICT-ALIGN: strb
+; ARM-STRICT-ALIGN: strb
+
+; THUMB-STRICT-ALIGN: @unaligned_i16_store
+; THUMB-STRICT-ALIGN: strb
+; THUMB-STRICT-ALIGN: strb
+
+  store i16 %x, i16* %y, align 1
+  ret void
+}
+
+define i16 @unaligned_i16_load(i16* %x) nounwind {
+entry:
+; ARM-STRICT-ALIGN: @unaligned_i16_load
+; ARM-STRICT-ALIGN: ldrb
+; ARM-STRICT-ALIGN: ldrb
+
+; THUMB-STRICT-ALIGN: @unaligned_i16_load
+; THUMB-STRICT-ALIGN: ldrb
+; THUMB-STRICT-ALIGN: ldrb
+
+  %0 = load i16* %x, align 1
+  ret i16 %0
+}

Modified: llvm/branches/R600/test/CodeGen/ARM/reg_sequence.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/reg_sequence.ll?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/reg_sequence.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/reg_sequence.ll Fri Sep 21 12:29:50 2012
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s
 ; Implementing vld / vst as REG_SEQUENCE eliminates the extra vmov's.
 
 %struct.int16x8_t = type { <8 x i16> }

Modified: llvm/branches/R600/test/CodeGen/ARM/sub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/sub.ll?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/sub.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/sub.ll Fri Sep 21 12:29:50 2012
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm < %s | FileCheck %s
+; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s
 
 ; 171 = 0x000000ab
 define i64 @f1(i64 %a) {

Modified: llvm/branches/R600/test/CodeGen/ARM/twoaddrinstr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/twoaddrinstr.ll?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/twoaddrinstr.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/twoaddrinstr.ll Fri Sep 21 12:29:50 2012
@@ -12,10 +12,10 @@
 ; CHECK-NEXT:   vst1.32
 
 entry:
-  %0 = load <4 x float>* undef
-  store <4 x float> zeroinitializer, <4 x float>* undef
-  store <4 x float> %0, <4 x float>* undef
+  %0 = load <4 x float>* undef, align 4
+  store <4 x float> zeroinitializer, <4 x float>* undef, align 4
+  store <4 x float> %0, <4 x float>* undef, align 4
   %1 = insertelement <4 x float> %0, float 1.000000e+00, i32 3
-  store <4 x float> %1, <4 x float>* undef
+  store <4 x float> %1, <4 x float>* undef, align 4
   unreachable
 }

Modified: llvm/branches/R600/test/CodeGen/ARM/vbsl-constant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/vbsl-constant.ll?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/vbsl-constant.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/vbsl-constant.ll Fri Sep 21 12:29:50 2012
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+neon | FileCheck %s
 
 define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK: v_bsli8:

Modified: llvm/branches/R600/test/CodeGen/ARM/vbsl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/vbsl.ll?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/vbsl.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/vbsl.ll Fri Sep 21 12:29:50 2012
@@ -103,3 +103,52 @@
 	%tmp7 = or <2 x i64> %tmp4, %tmp6
 	ret <2 x i64> %tmp7
 }
+
+define <8 x i8> @f1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) nounwind readnone optsize ssp {
+; CHECK: f1:
+; CHECK: vbsl
+  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) nounwind
+  ret <8 x i8> %vbsl.i
+}
+
+define <4 x i16> @f2(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp {
+; CHECK: f2:
+; CHECK: vbsl
+  %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) nounwind
+  ret <4 x i16> %vbsl3.i
+}
+
+define <2 x i32> @f3(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
+; CHECK: f3:
+; CHECK: vbsl
+  %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) nounwind
+  ret <2 x i32> %vbsl3.i
+}
+
+define <16 x i8> @g1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) nounwind readnone optsize ssp {
+; CHECK: g1:
+; CHECK: vbsl
+  %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) nounwind
+  ret <16 x i8> %vbsl.i
+}
+
+define <8 x i16> @g2(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind readnone optsize ssp {
+; CHECK: g2:
+; CHECK: vbsl
+  %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind
+  ret <8 x i16> %vbsl3.i
+}
+
+define <4 x i32> @g3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
+; CHECK: g3:
+; CHECK: vbsl
+  %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind
+  ret <4 x i32> %vbsl3.i
+}
+
+declare <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone

Modified: llvm/branches/R600/test/CodeGen/PowerPC/ppc64-calls.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/PowerPC/ppc64-calls.ll?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/PowerPC/ppc64-calls.ll (original)
+++ llvm/branches/R600/test/CodeGen/PowerPC/ppc64-calls.ll Fri Sep 21 12:29:50 2012
@@ -6,6 +6,10 @@
   ret void
 }
 
+define weak void @foo_weak() nounwind {
+  ret void
+}
+
 ; Calls to local function does not require the TOC restore 'nop'
 define void @test_direct() nounwind readnone {
 ; CHECK: test_direct:
@@ -15,6 +19,16 @@
   ret void
 }
 
+; Calls to weak function requires a TOC restore 'nop' because they
+; may be overridden in a different module.
+define void @test_weak() nounwind readnone {
+; CHECK: test_weak:
+  tail call void @foo_weak() nounwind
+; CHECK: bl foo
+; CHECK-NEXT: nop
+  ret void
+}
+
 ; Indirect calls requires a full stub creation
 define void @test_indirect(void ()* nocapture %fp) nounwind {
 ; CHECK: test_indirect:

Added: llvm/branches/R600/test/CodeGen/PowerPC/structsinregs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/PowerPC/structsinregs.ll?rev=164384&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/PowerPC/structsinregs.ll (added)
+++ llvm/branches/R600/test/CodeGen/PowerPC/structsinregs.ll Fri Sep 21 12:29:50 2012
@@ -0,0 +1,205 @@
+; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.s1 = type { i8 }
+%struct.s2 = type { i16 }
+%struct.s4 = type { i32 }
+%struct.t1 = type { i8 }
+%struct.t3 = type <{ i16, i8 }>
+%struct.t5 = type <{ i32, i8 }>
+%struct.t6 = type <{ i32, i16 }>
+%struct.t7 = type <{ i32, i16, i8 }>
+%struct.s3 = type { i16, i8 }
+%struct.s5 = type { i32, i8 }
+%struct.s6 = type { i32, i16 }
+%struct.s7 = type { i32, i16, i8 }
+%struct.t2 = type <{ i16 }>
+%struct.t4 = type <{ i32 }>
+
+ at caller1.p1 = private unnamed_addr constant %struct.s1 { i8 1 }, align 1
+ at caller1.p2 = private unnamed_addr constant %struct.s2 { i16 2 }, align 2
+ at caller1.p3 = private unnamed_addr constant { i16, i8, i8 } { i16 4, i8 8, i8 undef }, align 2
+ at caller1.p4 = private unnamed_addr constant %struct.s4 { i32 16 }, align 4
+ at caller1.p5 = private unnamed_addr constant { i32, i8, [3 x i8] } { i32 32, i8 64, [3 x i8] undef }, align 4
+ at caller1.p6 = private unnamed_addr constant { i32, i16, [2 x i8] } { i32 128, i16 256, [2 x i8] undef }, align 4
+ at caller1.p7 = private unnamed_addr constant { i32, i16, i8, i8 } { i32 512, i16 1024, i8 -3, i8 undef }, align 4
+ at caller2.p1 = private unnamed_addr constant %struct.t1 { i8 1 }, align 1
+ at caller2.p2 = private unnamed_addr constant { i16 } { i16 2 }, align 1
+ at caller2.p3 = private unnamed_addr constant %struct.t3 <{ i16 4, i8 8 }>, align 1
+ at caller2.p4 = private unnamed_addr constant { i32 } { i32 16 }, align 1
+ at caller2.p5 = private unnamed_addr constant %struct.t5 <{ i32 32, i8 64 }>, align 1
+ at caller2.p6 = private unnamed_addr constant %struct.t6 <{ i32 128, i16 256 }>, align 1
+ at caller2.p7 = private unnamed_addr constant %struct.t7 <{ i32 512, i16 1024, i8 -3 }>, align 1
+
+define i32 @caller1() nounwind {
+entry:
+  %p1 = alloca %struct.s1, align 1
+  %p2 = alloca %struct.s2, align 2
+  %p3 = alloca %struct.s3, align 2
+  %p4 = alloca %struct.s4, align 4
+  %p5 = alloca %struct.s5, align 4
+  %p6 = alloca %struct.s6, align 4
+  %p7 = alloca %struct.s7, align 4
+  %0 = bitcast %struct.s1* %p1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds (%struct.s1* @caller1.p1, i32 0, i32 0), i64 1, i32 1, i1 false)
+  %1 = bitcast %struct.s2* %p2 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s2* @caller1.p2 to i8*), i64 2, i32 2, i1 false)
+  %2 = bitcast %struct.s3* %p3 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* bitcast ({ i16, i8, i8 }* @caller1.p3 to i8*), i64 4, i32 2, i1 false)
+  %3 = bitcast %struct.s4* %p4 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* bitcast (%struct.s4* @caller1.p4 to i8*), i64 4, i32 4, i1 false)
+  %4 = bitcast %struct.s5* %p5 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* bitcast ({ i32, i8, [3 x i8] }* @caller1.p5 to i8*), i64 8, i32 4, i1 false)
+  %5 = bitcast %struct.s6* %p6 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* bitcast ({ i32, i16, [2 x i8] }* @caller1.p6 to i8*), i64 8, i32 4, i1 false)
+  %6 = bitcast %struct.s7* %p7 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* bitcast ({ i32, i16, i8, i8 }* @caller1.p7 to i8*), i64 8, i32 4, i1 false)
+  %call = call i32 @callee1(%struct.s1* byval %p1, %struct.s2* byval %p2, %struct.s3* byval %p3, %struct.s4* byval %p4, %struct.s5* byval %p5, %struct.s6* byval %p6, %struct.s7* byval %p7)
+  ret i32 %call
+
+; CHECK: ld 9, 128(31)
+; CHECK: ld 8, 136(31)
+; CHECK: ld 7, 144(31)
+; CHECK: lwz 6, 152(31)
+; CHECK: lwz 5, 160(31)
+; CHECK: lhz 4, 168(31)
+; CHECK: lbz 3, 176(31)
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+define internal i32 @callee1(%struct.s1* byval %v1, %struct.s2* byval %v2, %struct.s3* byval %v3, %struct.s4* byval %v4, %struct.s5* byval %v5, %struct.s6* byval %v6, %struct.s7* byval %v7) nounwind {
+entry:
+  %a = getelementptr inbounds %struct.s1* %v1, i32 0, i32 0
+  %0 = load i8* %a, align 1
+  %conv = zext i8 %0 to i32
+  %a1 = getelementptr inbounds %struct.s2* %v2, i32 0, i32 0
+  %1 = load i16* %a1, align 2
+  %conv2 = sext i16 %1 to i32
+  %add = add nsw i32 %conv, %conv2
+  %a3 = getelementptr inbounds %struct.s3* %v3, i32 0, i32 0
+  %2 = load i16* %a3, align 2
+  %conv4 = sext i16 %2 to i32
+  %add5 = add nsw i32 %add, %conv4
+  %a6 = getelementptr inbounds %struct.s4* %v4, i32 0, i32 0
+  %3 = load i32* %a6, align 4
+  %add7 = add nsw i32 %add5, %3
+  %a8 = getelementptr inbounds %struct.s5* %v5, i32 0, i32 0
+  %4 = load i32* %a8, align 4
+  %add9 = add nsw i32 %add7, %4
+  %a10 = getelementptr inbounds %struct.s6* %v6, i32 0, i32 0
+  %5 = load i32* %a10, align 4
+  %add11 = add nsw i32 %add9, %5
+  %a12 = getelementptr inbounds %struct.s7* %v7, i32 0, i32 0
+  %6 = load i32* %a12, align 4
+  %add13 = add nsw i32 %add11, %6
+  ret i32 %add13
+
+; CHECK: std 9, 96(1)
+; CHECK: std 8, 88(1)
+; CHECK: std 7, 80(1)
+; CHECK: stw 6, 72(1)
+; CHECK: stw 5, 64(1)
+; CHECK: sth 4, 58(1)
+; CHECK: stb 3, 51(1)
+; CHECK: lha {{[0-9]+}}, 58(1)
+; CHECK: lbz {{[0-9]+}}, 51(1)
+; CHECK: lha {{[0-9]+}}, 64(1)
+; CHECK: lwz {{[0-9]+}}, 72(1)
+; CHECK: lwz {{[0-9]+}}, 80(1)
+; CHECK: lwz {{[0-9]+}}, 88(1)
+; CHECK: lwz {{[0-9]+}}, 96(1)
+}
+
+define i32 @caller2() nounwind {
+entry:
+  %p1 = alloca %struct.t1, align 1
+  %p2 = alloca %struct.t2, align 1
+  %p3 = alloca %struct.t3, align 1
+  %p4 = alloca %struct.t4, align 1
+  %p5 = alloca %struct.t5, align 1
+  %p6 = alloca %struct.t6, align 1
+  %p7 = alloca %struct.t7, align 1
+  %0 = bitcast %struct.t1* %p1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds (%struct.t1* @caller2.p1, i32 0, i32 0), i64 1, i32 1, i1 false)
+  %1 = bitcast %struct.t2* %p2 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ({ i16 }* @caller2.p2 to i8*), i64 2, i32 1, i1 false)
+  %2 = bitcast %struct.t3* %p3 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* bitcast (%struct.t3* @caller2.p3 to i8*), i64 3, i32 1, i1 false)
+  %3 = bitcast %struct.t4* %p4 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* bitcast ({ i32 }* @caller2.p4 to i8*), i64 4, i32 1, i1 false)
+  %4 = bitcast %struct.t5* %p5 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* bitcast (%struct.t5* @caller2.p5 to i8*), i64 5, i32 1, i1 false)
+  %5 = bitcast %struct.t6* %p6 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* bitcast (%struct.t6* @caller2.p6 to i8*), i64 6, i32 1, i1 false)
+  %6 = bitcast %struct.t7* %p7 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* bitcast (%struct.t7* @caller2.p7 to i8*), i64 7, i32 1, i1 false)
+  %call = call i32 @callee2(%struct.t1* byval %p1, %struct.t2* byval %p2, %struct.t3* byval %p3, %struct.t4* byval %p4, %struct.t5* byval %p5, %struct.t6* byval %p6, %struct.t7* byval %p7)
+  ret i32 %call
+
+; CHECK: stb {{[0-9]+}}, 71(1)
+; CHECK: sth {{[0-9]+}}, 69(1)
+; CHECK: stb {{[0-9]+}}, 87(1)
+; CHECK: stw {{[0-9]+}}, 83(1)
+; CHECK: sth {{[0-9]+}}, 94(1)
+; CHECK: stw {{[0-9]+}}, 90(1)
+; CHECK: stb {{[0-9]+}}, 103(1)
+; CHECK: sth {{[0-9]+}}, 101(1)
+; CHECK: stw {{[0-9]+}}, 97(1)
+; CHECK: ld 9, 96(1)
+; CHECK: ld 8, 88(1)
+; CHECK: ld 7, 80(1)
+; CHECK: lwz 6, 152(31)
+; CHECK: ld 5, 64(1)
+; CHECK: lhz 4, 168(31)
+; CHECK: lbz 3, 176(31)
+}
+
+define internal i32 @callee2(%struct.t1* byval %v1, %struct.t2* byval %v2, %struct.t3* byval %v3, %struct.t4* byval %v4, %struct.t5* byval %v5, %struct.t6* byval %v6, %struct.t7* byval %v7) nounwind {
+entry:
+  %a = getelementptr inbounds %struct.t1* %v1, i32 0, i32 0
+  %0 = load i8* %a, align 1
+  %conv = zext i8 %0 to i32
+  %a1 = getelementptr inbounds %struct.t2* %v2, i32 0, i32 0
+  %1 = load i16* %a1, align 1
+  %conv2 = sext i16 %1 to i32
+  %add = add nsw i32 %conv, %conv2
+  %a3 = getelementptr inbounds %struct.t3* %v3, i32 0, i32 0
+  %2 = load i16* %a3, align 1
+  %conv4 = sext i16 %2 to i32
+  %add5 = add nsw i32 %add, %conv4
+  %a6 = getelementptr inbounds %struct.t4* %v4, i32 0, i32 0
+  %3 = load i32* %a6, align 1
+  %add7 = add nsw i32 %add5, %3
+  %a8 = getelementptr inbounds %struct.t5* %v5, i32 0, i32 0
+  %4 = load i32* %a8, align 1
+  %add9 = add nsw i32 %add7, %4
+  %a10 = getelementptr inbounds %struct.t6* %v6, i32 0, i32 0
+  %5 = load i32* %a10, align 1
+  %add11 = add nsw i32 %add9, %5
+  %a12 = getelementptr inbounds %struct.t7* %v7, i32 0, i32 0
+  %6 = load i32* %a12, align 1
+  %add13 = add nsw i32 %add11, %6
+  ret i32 %add13
+
+; CHECK: sldi 9, 9, 8
+; CHECK: sldi 8, 8, 16
+; CHECK: sldi 7, 7, 24
+; CHECK: sldi 5, 5, 40
+; CHECK: stw 6, 72(1)
+; CHECK: sth 4, 58(1)
+; CHECK: stb 3, 51(1)
+; CHECK: std 9, 96(1)
+; CHECK: std 8, 88(1)
+; CHECK: std 7, 80(1)
+; CHECK: std 5, 64(1)
+; CHECK: lha {{[0-9]+}}, 58(1)
+; CHECK: lbz {{[0-9]+}}, 51(1)
+; CHECK: lha {{[0-9]+}}, 64(1)
+; CHECK: lwz {{[0-9]+}}, 72(1)
+; CHECK: lwz {{[0-9]+}}, 80(1)
+; CHECK: lwz {{[0-9]+}}, 88(1)
+; CHECK: lwz {{[0-9]+}}, 96(1)
+}

Added: llvm/branches/R600/test/CodeGen/SPARC/load_to_switch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/SPARC/load_to_switch.ll?rev=164384&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/SPARC/load_to_switch.ll (added)
+++ llvm/branches/R600/test/CodeGen/SPARC/load_to_switch.ll Fri Sep 21 12:29:50 2012
@@ -0,0 +1,84 @@
+; RUN: llc -march=sparc < %s | FileCheck %s
+
+; Check that all the switches turned into lookup tables by SimplifyCFG are
+; turned back into switches for targets that don't like lookup tables.
+
+ at .str = private unnamed_addr constant [4 x i8] c"foo\00", align 1
+ at .str1 = private unnamed_addr constant [4 x i8] c"bar\00", align 1
+ at .str2 = private unnamed_addr constant [4 x i8] c"baz\00", align 1
+ at .str3 = private unnamed_addr constant [4 x i8] c"qux\00", align 1
+ at .str4 = private unnamed_addr constant [6 x i8] c"error\00", align 1
+ at switch.table = private unnamed_addr constant [7 x i32] [i32 55, i32 123, i32 0, i32 -1, i32 27, i32 62, i32 1]
+ at switch.table1 = private unnamed_addr constant [4 x i8] c"*\09X\05"
+ at switch.table2 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000]
+ at switch.table3 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)]
+
+define i32 @f(i32 %c)  {
+entry:
+  %switch.tableidx = sub i32 %c, 42
+  %0 = icmp ult i32 %switch.tableidx, 7
+  br i1 %0, label %switch.lookup, label %return
+
+switch.lookup:
+  %switch.gep = getelementptr inbounds [7 x i32]* @switch.table, i32 0, i32 %switch.tableidx
+  %switch.load = load i32* %switch.gep
+  ret i32 %switch.load
+
+return:
+  ret i32 15
+
+; CHECK: f:
+; CHECK: %switch.lookup
+; CHECK-NOT: sethi %hi(.Lswitch.table)
+}
+
+declare void @dummy(i8 signext, float)
+
+define void @h(i32 %x) {
+entry:
+  %switch.tableidx = sub i32 %x, 0
+  %0 = icmp ult i32 %switch.tableidx, 4
+  br i1 %0, label %switch.lookup, label %sw.epilog
+
+switch.lookup:
+  %switch.gep = getelementptr inbounds [4 x i8]* @switch.table1, i32 0, i32 %switch.tableidx
+  %switch.load = load i8* %switch.gep
+  %switch.gep1 = getelementptr inbounds [4 x float]* @switch.table2, i32 0, i32 %switch.tableidx
+  %switch.load2 = load float* %switch.gep1
+  br label %sw.epilog
+
+sw.epilog:
+  %a.0 = phi i8 [ %switch.load, %switch.lookup ], [ 7, %entry ]
+  %b.0 = phi float [ %switch.load2, %switch.lookup ], [ 0x4023FAE140000000, %entry ]
+  call void @dummy(i8 signext %a.0, float %b.0)
+  ret void
+
+; CHECK: h:
+; CHECK: %switch.lookup
+; CHECK-NOT: sethi %hi(.Lswitch.table{{[0-9]}})
+; CHECK-NOT: sethi %hi(.Lswitch.table{{[0-9]}})
+}
+
+define i8* @foostring(i32 %x) {
+entry:
+  %switch.tableidx = sub i32 %x, 0
+  %0 = icmp ult i32 %switch.tableidx, 4
+  br i1 %0, label %switch.lookup, label %return
+
+switch.lookup:
+  %switch.gep = getelementptr inbounds [4 x i8*]* @switch.table3, i32 0, i32 %switch.tableidx
+  %switch.load = load i8** %switch.gep
+  ret i8* %switch.load
+
+return:
+  ret i8* getelementptr inbounds ([6 x i8]* @.str4, i64 0, i64 0)
+
+; CHECK: foostring:
+; CHECK: %switch.lookup
+; CHECK-NOT: sethi %hi(.Lswitch.table3)
+}
+
+; CHECK-NOT: .Lswitch.table
+; CHECK-NOT: .Lswitch.table1
+; CHECK-NOT: .Lswitch.table2
+; CHECK-NOT: .Lswitch.table3

Modified: llvm/branches/R600/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll Fri Sep 21 12:29:50 2012
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=corei7 | FileCheck %s
 ; rdar://r7512579
 
 ; PHI defs in the atomic loop should be used by the add / adc
@@ -7,17 +7,16 @@
 define void @t(i64* nocapture %p) nounwind ssp {
 entry:
 ; CHECK: t:
-; CHECK: movl $1
-; CHECK: movl (%ebp), %eax
-; CHECK: movl 4(%ebp), %edx
+; CHECK: movl ([[REG:%[a-z]+]]), %eax
+; CHECK: movl 4([[REG]]), %edx
 ; CHECK: LBB0_1:
-; CHECK-NOT: movl $1
-; CHECK-NOT: movl $0
+; CHECK: movl $1
 ; CHECK: addl
+; CHECK: movl $0
 ; CHECK: adcl
 ; CHECK: lock
-; CHECK: cmpxchg8b
-; CHECK: jne
+; CHECK-NEXT: cmpxchg8b ([[REG]])
+; CHECK-NEXT: jne
   %0 = atomicrmw add i64* %p, i64 1 seq_cst
   ret void
 }

Added: llvm/branches/R600/test/CodeGen/X86/atomic16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/atomic16.ll?rev=164384&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/atomic16.ll (added)
+++ llvm/branches/R600/test/CodeGen/X86/atomic16.ll Fri Sep 21 12:29:50 2012
@@ -0,0 +1,250 @@
+; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64
+; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32
+
+ at sc16 = external global i16
+
+define void @atomic_fetch_add16() nounwind {
+; X64:   atomic_fetch_add16
+; X32:   atomic_fetch_add16
+entry:
+; 32-bit
+  %t1 = atomicrmw add  i16* @sc16, i16 1 acquire
+; X64:       lock
+; X64:       incw
+; X32:       lock
+; X32:       incw
+  %t2 = atomicrmw add  i16* @sc16, i16 3 acquire
+; X64:       lock
+; X64:       addw $3
+; X32:       lock
+; X32:       addw $3
+  %t3 = atomicrmw add  i16* @sc16, i16 5 acquire
+; X64:       lock
+; X64:       xaddw
+; X32:       lock
+; X32:       xaddw
+  %t4 = atomicrmw add  i16* @sc16, i16 %t3 acquire
+; X64:       lock
+; X64:       addw
+; X32:       lock
+; X32:       addw
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_sub16() nounwind {
+; X64:   atomic_fetch_sub16
+; X32:   atomic_fetch_sub16
+  %t1 = atomicrmw sub  i16* @sc16, i16 1 acquire
+; X64:       lock
+; X64:       decw
+; X32:       lock
+; X32:       decw
+  %t2 = atomicrmw sub  i16* @sc16, i16 3 acquire
+; X64:       lock
+; X64:       subw $3
+; X32:       lock
+; X32:       subw $3
+  %t3 = atomicrmw sub  i16* @sc16, i16 5 acquire
+; X64:       lock
+; X64:       xaddw
+; X32:       lock
+; X32:       xaddw
+  %t4 = atomicrmw sub  i16* @sc16, i16 %t3 acquire
+; X64:       lock
+; X64:       subw
+; X32:       lock
+; X32:       subw
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_and16() nounwind {
+; X64:   atomic_fetch_and16
+; X32:   atomic_fetch_and16
+  %t1 = atomicrmw and  i16* @sc16, i16 3 acquire
+; X64:       lock
+; X64:       andw $3
+; X32:       lock
+; X32:       andw $3
+  %t2 = atomicrmw and  i16* @sc16, i16 5 acquire
+; X64:       andw
+; X64:       lock
+; X64:       cmpxchgw
+; X32:       andw
+; X32:       lock
+; X32:       cmpxchgw
+  %t3 = atomicrmw and  i16* @sc16, i16 %t2 acquire
+; X64:       lock
+; X64:       andw
+; X32:       lock
+; X32:       andw
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_or16() nounwind {
+; X64:   atomic_fetch_or16
+; X32:   atomic_fetch_or16
+  %t1 = atomicrmw or   i16* @sc16, i16 3 acquire
+; X64:       lock
+; X64:       orw $3
+; X32:       lock
+; X32:       orw $3
+  %t2 = atomicrmw or   i16* @sc16, i16 5 acquire
+; X64:       orw
+; X64:       lock
+; X64:       cmpxchgw
+; X32:       orw
+; X32:       lock
+; X32:       cmpxchgw
+  %t3 = atomicrmw or   i16* @sc16, i16 %t2 acquire
+; X64:       lock
+; X64:       orw
+; X32:       lock
+; X32:       orw
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_xor16() nounwind {
+; X64:   atomic_fetch_xor16
+; X32:   atomic_fetch_xor16
+  %t1 = atomicrmw xor  i16* @sc16, i16 3 acquire
+; X64:       lock
+; X64:       xorw $3
+; X32:       lock
+; X32:       xorw $3
+  %t2 = atomicrmw xor  i16* @sc16, i16 5 acquire
+; X64:       xorw
+; X64:       lock
+; X64:       cmpxchgw
+; X32:       xorw
+; X32:       lock
+; X32:       cmpxchgw
+  %t3 = atomicrmw xor  i16* @sc16, i16 %t2 acquire
+; X64:       lock
+; X64:       xorw
+; X32:       lock
+; X32:       xorw
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_nand16(i16 %x) nounwind {
+; X64:   atomic_fetch_nand16
+; X32:   atomic_fetch_nand16
+  %t1 = atomicrmw nand i16* @sc16, i16 %x acquire
+; X64:       andw
+; X64:       notw
+; X64:       lock
+; X64:       cmpxchgw
+; X32:       andw
+; X32:       notw
+; X32:       lock
+; X32:       cmpxchgw
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_max16(i16 %x) nounwind {
+  %t1 = atomicrmw max  i16* @sc16, i16 %x acquire
+; X64:       cmpw
+; X64:       cmov
+; X64:       lock
+; X64:       cmpxchgw
+
+; X32:       cmpw
+; X32:       cmov
+; X32:       lock
+; X32:       cmpxchgw
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_min16(i16 %x) nounwind {
+  %t1 = atomicrmw min  i16* @sc16, i16 %x acquire
+; X64:       cmpw
+; X64:       cmov
+; X64:       lock
+; X64:       cmpxchgw
+
+; X32:       cmpw
+; X32:       cmov
+; X32:       lock
+; X32:       cmpxchgw
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_umax16(i16 %x) nounwind {
+  %t1 = atomicrmw umax i16* @sc16, i16 %x acquire
+; X64:       cmpw
+; X64:       cmov
+; X64:       lock
+; X64:       cmpxchgw
+
+; X32:       cmpw
+; X32:       cmov
+; X32:       lock
+; X32:       cmpxchgw
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_umin16(i16 %x) nounwind {
+  %t1 = atomicrmw umin i16* @sc16, i16 %x acquire
+; X64:       cmpw
+; X64:       cmov
+; X64:       lock
+; X64:       cmpxchgw
+; X32:       cmpw
+; X32:       cmov
+; X32:       lock
+; X32:       cmpxchgw
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_cmpxchg16() nounwind {
+  %t1 = cmpxchg i16* @sc16, i16 0, i16 1 acquire
+; X64:       lock
+; X64:       cmpxchgw
+; X32:       lock
+; X32:       cmpxchgw
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_store16(i16 %x) nounwind {
+  store atomic i16 %x, i16* @sc16 release, align 4
+; X64-NOT:   lock
+; X64:       movw
+; X32-NOT:   lock
+; X32:       movw
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_swap16(i16 %x) nounwind {
+  %t1 = atomicrmw xchg i16* @sc16, i16 %x acquire
+; X64-NOT:   lock
+; X64:       xchgw
+; X32-NOT:   lock
+; X32:       xchgw
+  ret void
+; X64:       ret
+; X32:       ret
+}

Added: llvm/branches/R600/test/CodeGen/X86/atomic32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/atomic32.ll?rev=164384&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/atomic32.ll (added)
+++ llvm/branches/R600/test/CodeGen/X86/atomic32.ll Fri Sep 21 12:29:50 2012
@@ -0,0 +1,250 @@
+; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64
+; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32
+
+ at sc32 = external global i32
+
+define void @atomic_fetch_add32() nounwind {
+; X64:   atomic_fetch_add32
+; X32:   atomic_fetch_add32
+entry:
+; 32-bit
+  %t1 = atomicrmw add  i32* @sc32, i32 1 acquire
+; X64:       lock
+; X64:       incl
+; X32:       lock
+; X32:       incl
+  %t2 = atomicrmw add  i32* @sc32, i32 3 acquire
+; X64:       lock
+; X64:       addl $3
+; X32:       lock
+; X32:       addl $3
+  %t3 = atomicrmw add  i32* @sc32, i32 5 acquire
+; X64:       lock
+; X64:       xaddl
+; X32:       lock
+; X32:       xaddl
+  %t4 = atomicrmw add  i32* @sc32, i32 %t3 acquire
+; X64:       lock
+; X64:       addl
+; X32:       lock
+; X32:       addl
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_sub32() nounwind {
+; X64:   atomic_fetch_sub32
+; X32:   atomic_fetch_sub32
+  %t1 = atomicrmw sub  i32* @sc32, i32 1 acquire
+; X64:       lock
+; X64:       decl
+; X32:       lock
+; X32:       decl
+  %t2 = atomicrmw sub  i32* @sc32, i32 3 acquire
+; X64:       lock
+; X64:       subl $3
+; X32:       lock
+; X32:       subl $3
+  %t3 = atomicrmw sub  i32* @sc32, i32 5 acquire
+; X64:       lock
+; X64:       xaddl
+; X32:       lock
+; X32:       xaddl
+  %t4 = atomicrmw sub  i32* @sc32, i32 %t3 acquire
+; X64:       lock
+; X64:       subl
+; X32:       lock
+; X32:       subl
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_and32() nounwind {
+; X64:   atomic_fetch_and32
+; X32:   atomic_fetch_and32
+  %t1 = atomicrmw and  i32* @sc32, i32 3 acquire
+; X64:       lock
+; X64:       andl $3
+; X32:       lock
+; X32:       andl $3
+  %t2 = atomicrmw and  i32* @sc32, i32 5 acquire
+; X64:       andl
+; X64:       lock
+; X64:       cmpxchgl
+; X32:       andl
+; X32:       lock
+; X32:       cmpxchgl
+  %t3 = atomicrmw and  i32* @sc32, i32 %t2 acquire
+; X64:       lock
+; X64:       andl
+; X32:       lock
+; X32:       andl
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_or32() nounwind {
+; X64:   atomic_fetch_or32
+; X32:   atomic_fetch_or32
+  %t1 = atomicrmw or   i32* @sc32, i32 3 acquire
+; X64:       lock
+; X64:       orl $3
+; X32:       lock
+; X32:       orl $3
+  %t2 = atomicrmw or   i32* @sc32, i32 5 acquire
+; X64:       orl
+; X64:       lock
+; X64:       cmpxchgl
+; X32:       orl
+; X32:       lock
+; X32:       cmpxchgl
+  %t3 = atomicrmw or   i32* @sc32, i32 %t2 acquire
+; X64:       lock
+; X64:       orl
+; X32:       lock
+; X32:       orl
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_xor32() nounwind {
+; X64:   atomic_fetch_xor32
+; X32:   atomic_fetch_xor32
+  %t1 = atomicrmw xor  i32* @sc32, i32 3 acquire
+; X64:       lock
+; X64:       xorl $3
+; X32:       lock
+; X32:       xorl $3
+  %t2 = atomicrmw xor  i32* @sc32, i32 5 acquire
+; X64:       xorl
+; X64:       lock
+; X64:       cmpxchgl
+; X32:       xorl
+; X32:       lock
+; X32:       cmpxchgl
+  %t3 = atomicrmw xor  i32* @sc32, i32 %t2 acquire
+; X64:       lock
+; X64:       xorl
+; X32:       lock
+; X32:       xorl
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_nand32(i32 %x) nounwind {
+; X64:   atomic_fetch_nand32
+; X32:   atomic_fetch_nand32
+  %t1 = atomicrmw nand i32* @sc32, i32 %x acquire
+; X64:       andl
+; X64:       notl
+; X64:       lock
+; X64:       cmpxchgl
+; X32:       andl
+; X32:       notl
+; X32:       lock
+; X32:       cmpxchgl
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_max32(i32 %x) nounwind {
+  %t1 = atomicrmw max  i32* @sc32, i32 %x acquire
+; X64:       cmpl
+; X64:       cmov
+; X64:       lock
+; X64:       cmpxchgl
+
+; X32:       cmpl
+; X32:       cmov
+; X32:       lock
+; X32:       cmpxchgl
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_min32(i32 %x) nounwind {
+  %t1 = atomicrmw min  i32* @sc32, i32 %x acquire
+; X64:       cmpl
+; X64:       cmov
+; X64:       lock
+; X64:       cmpxchgl
+
+; X32:       cmpl
+; X32:       cmov
+; X32:       lock
+; X32:       cmpxchgl
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_umax32(i32 %x) nounwind {
+  %t1 = atomicrmw umax i32* @sc32, i32 %x acquire
+; X64:       cmpl
+; X64:       cmov
+; X64:       lock
+; X64:       cmpxchgl
+
+; X32:       cmpl
+; X32:       cmov
+; X32:       lock
+; X32:       cmpxchgl
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_umin32(i32 %x) nounwind {
+  %t1 = atomicrmw umin i32* @sc32, i32 %x acquire
+; X64:       cmpl
+; X64:       cmov
+; X64:       lock
+; X64:       cmpxchgl
+; X32:       cmpl
+; X32:       cmov
+; X32:       lock
+; X32:       cmpxchgl
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_cmpxchg32() nounwind {
+  %t1 = cmpxchg i32* @sc32, i32 0, i32 1 acquire
+; X64:       lock
+; X64:       cmpxchgl
+; X32:       lock
+; X32:       cmpxchgl
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_store32(i32 %x) nounwind {
+  store atomic i32 %x, i32* @sc32 release, align 4
+; X64-NOT:   lock
+; X64:       movl
+; X32-NOT:   lock
+; X32:       movl
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_swap32(i32 %x) nounwind {
+  %t1 = atomicrmw xchg i32* @sc32, i32 %x acquire
+; X64-NOT:   lock
+; X64:       xchgl
+; X32-NOT:   lock
+; X32:       xchgl
+  ret void
+; X64:       ret
+; X32:       ret
+}

Added: llvm/branches/R600/test/CodeGen/X86/atomic64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/atomic64.ll?rev=164384&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/atomic64.ll (added)
+++ llvm/branches/R600/test/CodeGen/X86/atomic64.ll Fri Sep 21 12:29:50 2012
@@ -0,0 +1,216 @@
+; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64
+
+ at sc64 = external global i64
+
+define void @atomic_fetch_add64() nounwind {
+; X64:   atomic_fetch_add64
+entry:
+  %t1 = atomicrmw add  i64* @sc64, i64 1 acquire
+; X64:       lock
+; X64:       incq
+  %t2 = atomicrmw add  i64* @sc64, i64 3 acquire
+; X64:       lock
+; X64:       addq $3
+  %t3 = atomicrmw add  i64* @sc64, i64 5 acquire
+; X64:       lock
+; X64:       xaddq
+  %t4 = atomicrmw add  i64* @sc64, i64 %t3 acquire
+; X64:       lock
+; X64:       addq
+  ret void
+; X64:       ret
+}
+
+define void @atomic_fetch_sub64() nounwind {
+; X64:   atomic_fetch_sub64
+  %t1 = atomicrmw sub  i64* @sc64, i64 1 acquire
+; X64:       lock
+; X64:       decq
+  %t2 = atomicrmw sub  i64* @sc64, i64 3 acquire
+; X64:       lock
+; X64:       subq $3
+  %t3 = atomicrmw sub  i64* @sc64, i64 5 acquire
+; X64:       lock
+; X64:       xaddq
+  %t4 = atomicrmw sub  i64* @sc64, i64 %t3 acquire
+; X64:       lock
+; X64:       subq
+  ret void
+; X64:       ret
+}
+
+define void @atomic_fetch_and64() nounwind {
+; X64:   atomic_fetch_and64
+  %t1 = atomicrmw and  i64* @sc64, i64 3 acquire
+; X64:       lock
+; X64:       andq $3
+  %t2 = atomicrmw and  i64* @sc64, i64 5 acquire
+; X64:       andq
+; X64:       lock
+; X64:       cmpxchgq
+  %t3 = atomicrmw and  i64* @sc64, i64 %t2 acquire
+; X64:       lock
+; X64:       andq
+  ret void
+; X64:       ret
+}
+
+define void @atomic_fetch_or64() nounwind {
+; X64:   atomic_fetch_or64
+  %t1 = atomicrmw or   i64* @sc64, i64 3 acquire
+; X64:       lock
+; X64:       orq $3
+  %t2 = atomicrmw or   i64* @sc64, i64 5 acquire
+; X64:       orq
+; X64:       lock
+; X64:       cmpxchgq
+  %t3 = atomicrmw or   i64* @sc64, i64 %t2 acquire
+; X64:       lock
+; X64:       orq
+  ret void
+; X64:       ret
+}
+
+define void @atomic_fetch_xor64() nounwind {
+; X64:   atomic_fetch_xor64
+  %t1 = atomicrmw xor  i64* @sc64, i64 3 acquire
+; X64:       lock
+; X64:       xorq $3
+  %t2 = atomicrmw xor  i64* @sc64, i64 5 acquire
+; X64:       xorq
+; X64:       lock
+; X64:       cmpxchgq
+  %t3 = atomicrmw xor  i64* @sc64, i64 %t2 acquire
+; X64:       lock
+; X64:       xorq
+  ret void
+; X64:       ret
+}
+
+define void @atomic_fetch_nand64(i64 %x) nounwind {
+; X64:   atomic_fetch_nand64
+; X32:   atomic_fetch_nand64
+  %t1 = atomicrmw nand i64* @sc64, i64 %x acquire
+; X64:       andq
+; X64:       notq
+; X64:       lock
+; X64:       cmpxchgq
+; X32:       andl
+; X32:       andl
+; X32:       notl
+; X32:       notl
+; X32:       lock
+; X32:       cmpxchg8b
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_max64(i64 %x) nounwind {
+  %t1 = atomicrmw max  i64* @sc64, i64 %x acquire
+; X64:       cmpq
+; X64:       cmov
+; X64:       lock
+; X64:       cmpxchgq
+
+; X32:       cmpl
+; X32:       cmpl
+; X32:       cmov
+; X32:       cmov
+; X32:       cmov
+; X32:       lock
+; X32:       cmpxchg8b
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_min64(i64 %x) nounwind {
+  %t1 = atomicrmw min  i64* @sc64, i64 %x acquire
+; X64:       cmpq
+; X64:       cmov
+; X64:       lock
+; X64:       cmpxchgq
+
+; X32:       cmpl
+; X32:       cmpl
+; X32:       cmov
+; X32:       cmov
+; X32:       cmov
+; X32:       lock
+; X32:       cmpxchg8b
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_umax64(i64 %x) nounwind {
+  %t1 = atomicrmw umax i64* @sc64, i64 %x acquire
+; X64:       cmpq
+; X64:       cmov
+; X64:       lock
+; X64:       cmpxchgq
+
+; X32:       cmpl
+; X32:       cmpl
+; X32:       cmov
+; X32:       cmov
+; X32:       cmov
+; X32:       lock
+; X32:       cmpxchg8b
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_umin64(i64 %x) nounwind {
+  %t1 = atomicrmw umin i64* @sc64, i64 %x acquire
+; X64:       cmpq
+; X64:       cmov
+; X64:       lock
+; X64:       cmpxchgq
+
+; X32:       cmpl
+; X32:       cmpl
+; X32:       cmov
+; X32:       cmov
+; X32:       cmov
+; X32:       lock
+; X32:       cmpxchg8b
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_cmpxchg64() nounwind {
+  %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire
+; X64:       lock
+; X64:       cmpxchgq
+; X32:       lock
+; X32:       cmpxchg8b
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_store64(i64 %x) nounwind {
+  store atomic i64 %x, i64* @sc64 release, align 8
+; X64-NOT:   lock
+; X64:       movq
+; X32:       lock
+; X32:       cmpxchg8b
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_swap64(i64 %x) nounwind {
+  %t1 = atomicrmw xchg i64* @sc64, i64 %x acquire
+; X64-NOT:   lock
+; X64:       xchgq
+; X32:       lock
+; X32:       xchg8b
+  ret void
+; X64:       ret
+; X32:       ret
+}

Added: llvm/branches/R600/test/CodeGen/X86/atomic6432.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/atomic6432.ll?rev=164384&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/atomic6432.ll (added)
+++ llvm/branches/R600/test/CodeGen/X86/atomic6432.ll Fri Sep 21 12:29:50 2012
@@ -0,0 +1,209 @@
+; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32
+; XFAIL: *
+
+ at sc64 = external global i64
+
+define void @atomic_fetch_add64() nounwind {
+; X32:   atomic_fetch_add64
+entry:
+  %t1 = atomicrmw add  i64* @sc64, i64 1 acquire
+; X32:       addl
+; X32:       adcl
+; X32:       lock
+; X32:       cmpxchg8b
+  %t2 = atomicrmw add  i64* @sc64, i64 3 acquire
+; X32:       addl
+; X32:       adcl
+; X32:       lock
+; X32:       cmpxchg8b
+  %t3 = atomicrmw add  i64* @sc64, i64 5 acquire
+; X32:       addl
+; X32:       adcl
+; X32:       lock
+; X32:       cmpxchg8b
+  %t4 = atomicrmw add  i64* @sc64, i64 %t3 acquire
+; X32:       addl
+; X32:       adcl
+; X32:       lock
+; X32:       cmpxchg8b
+  ret void
+; X32:       ret
+}
+
+define void @atomic_fetch_sub64() nounwind {
+; X32:   atomic_fetch_sub64
+  %t1 = atomicrmw sub  i64* @sc64, i64 1 acquire
+; X32:       subl
+; X32:       sbbl
+; X32:       lock
+; X32:       cmpxchg8b
+  %t2 = atomicrmw sub  i64* @sc64, i64 3 acquire
+; X32:       subl
+; X32:       sbbl
+; X32:       lock
+; X32:       cmpxchg8b
+  %t3 = atomicrmw sub  i64* @sc64, i64 5 acquire
+; X32:       subl
+; X32:       sbbl
+; X32:       lock
+; X32:       cmpxchg8b
+  %t4 = atomicrmw sub  i64* @sc64, i64 %t3 acquire
+; X32:       subl
+; X32:       sbbl
+; X32:       lock
+; X32:       cmpxchg8b
+  ret void
+; X32:       ret
+}
+
+define void @atomic_fetch_and64() nounwind {
+; X32:   atomic_fetch_and64
+  %t1 = atomicrmw and  i64* @sc64, i64 3 acquire
+; X32:       andl
+; X32:       andl
+; X32:       lock
+; X32:       cmpxchg8b
+  %t2 = atomicrmw and  i64* @sc64, i64 5 acquire
+; X32:       andl
+; X32:       andl
+; X32:       lock
+; X32:       cmpxchg8b
+  %t3 = atomicrmw and  i64* @sc64, i64 %t2 acquire
+; X32:       andl
+; X32:       andl
+; X32:       lock
+; X32:       cmpxchg8b
+  ret void
+; X32:       ret
+}
+
+define void @atomic_fetch_or64() nounwind {
+; X32:   atomic_fetch_or64
+  %t1 = atomicrmw or   i64* @sc64, i64 3 acquire
+; X32:       orl
+; X32:       orl
+; X32:       lock
+; X32:       cmpxchg8b
+  %t2 = atomicrmw or   i64* @sc64, i64 5 acquire
+; X32:       orl
+; X32:       orl
+; X32:       lock
+; X32:       cmpxchg8b
+  %t3 = atomicrmw or   i64* @sc64, i64 %t2 acquire
+; X32:       orl
+; X32:       orl
+; X32:       lock
+; X32:       cmpxchg8b
+  ret void
+; X32:       ret
+}
+
+define void @atomic_fetch_xor64() nounwind {
+; X32:   atomic_fetch_xor64
+  %t1 = atomicrmw xor  i64* @sc64, i64 3 acquire
+; X32:       xorl
+; X32:       xorl
+; X32:       lock
+; X32:       cmpxchg8b
+  %t2 = atomicrmw xor  i64* @sc64, i64 5 acquire
+; X32:       xorl
+; X32:       xorl
+; X32:       lock
+; X32:       cmpxchg8b
+  %t3 = atomicrmw xor  i64* @sc64, i64 %t2 acquire
+; X32:       xorl
+; X32:       xorl
+; X32:       lock
+; X32:       cmpxchg8b
+  ret void
+; X32:       ret
+}
+
+define void @atomic_fetch_nand64(i64 %x) nounwind {
+; X32:   atomic_fetch_nand64
+  %t1 = atomicrmw nand i64* @sc64, i64 %x acquire
+; X32:       andl
+; X32:       andl
+; X32:       notl
+; X32:       notl
+; X32:       lock
+; X32:       cmpxchg8b
+  ret void
+; X32:       ret
+}
+
+define void @atomic_fetch_max64(i64 %x) nounwind {
+  %t1 = atomicrmw max  i64* @sc64, i64 %x acquire
+; X32:       cmpl
+; X32:       cmpl
+; X32:       cmov
+; X32:       cmov
+; X32:       cmov
+; X32:       lock
+; X32:       cmpxchg8b
+  ret void
+; X32:       ret
+}
+
+define void @atomic_fetch_min64(i64 %x) nounwind {
+  %t1 = atomicrmw min  i64* @sc64, i64 %x acquire
+; X32:       cmpl
+; X32:       cmpl
+; X32:       cmov
+; X32:       cmov
+; X32:       cmov
+; X32:       lock
+; X32:       cmpxchg8b
+  ret void
+; X32:       ret
+}
+
+define void @atomic_fetch_umax64(i64 %x) nounwind {
+  %t1 = atomicrmw umax i64* @sc64, i64 %x acquire
+; X32:       cmpl
+; X32:       cmpl
+; X32:       cmov
+; X32:       cmov
+; X32:       cmov
+; X32:       lock
+; X32:       cmpxchg8b
+  ret void
+; X32:       ret
+}
+
+define void @atomic_fetch_umin64(i64 %x) nounwind {
+  %t1 = atomicrmw umin i64* @sc64, i64 %x acquire
+; X32:       cmpl
+; X32:       cmpl
+; X32:       cmov
+; X32:       cmov
+; X32:       cmov
+; X32:       lock
+; X32:       cmpxchg8b
+  ret void
+; X32:       ret
+}
+
+define void @atomic_fetch_cmpxchg64() nounwind {
+  %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire
+; X32:       lock
+; X32:       cmpxchg8b
+  ret void
+; X32:       ret
+}
+
+define void @atomic_fetch_store64(i64 %x) nounwind {
+  store atomic i64 %x, i64* @sc64 release, align 8
+; X32:       lock
+; X32:       cmpxchg8b
+  ret void
+; X32:       ret
+}
+
+define void @atomic_fetch_swap64(i64 %x) nounwind {
+  %t1 = atomicrmw xchg i64* @sc64, i64 %x acquire
+; X32:       lock
+; X32:       xchg8b
+  ret void
+; X32:       ret
+}

Added: llvm/branches/R600/test/CodeGen/X86/atomic8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/atomic8.ll?rev=164384&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/atomic8.ll (added)
+++ llvm/branches/R600/test/CodeGen/X86/atomic8.ll Fri Sep 21 12:29:50 2012
@@ -0,0 +1,250 @@
+; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64
+; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32
+
+ at sc8 = external global i8
+
+define void @atomic_fetch_add8() nounwind {
+; X64:   atomic_fetch_add8
+; X32:   atomic_fetch_add8
+entry:
+; 32-bit
+  %t1 = atomicrmw add  i8* @sc8, i8 1 acquire
+; X64:       lock
+; X64:       incb
+; X32:       lock
+; X32:       incb
+  %t2 = atomicrmw add  i8* @sc8, i8 3 acquire
+; X64:       lock
+; X64:       addb $3
+; X32:       lock
+; X32:       addb $3
+  %t3 = atomicrmw add  i8* @sc8, i8 5 acquire
+; X64:       lock
+; X64:       xaddb
+; X32:       lock
+; X32:       xaddb
+  %t4 = atomicrmw add  i8* @sc8, i8 %t3 acquire
+; X64:       lock
+; X64:       addb
+; X32:       lock
+; X32:       addb
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_sub8() nounwind {
+; X64:   atomic_fetch_sub8
+; X32:   atomic_fetch_sub8
+  %t1 = atomicrmw sub  i8* @sc8, i8 1 acquire
+; X64:       lock
+; X64:       decb
+; X32:       lock
+; X32:       decb
+  %t2 = atomicrmw sub  i8* @sc8, i8 3 acquire
+; X64:       lock
+; X64:       subb $3
+; X32:       lock
+; X32:       subb $3
+  %t3 = atomicrmw sub  i8* @sc8, i8 5 acquire
+; X64:       lock
+; X64:       xaddb
+; X32:       lock
+; X32:       xaddb
+  %t4 = atomicrmw sub  i8* @sc8, i8 %t3 acquire
+; X64:       lock
+; X64:       subb
+; X32:       lock
+; X32:       subb
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_and8() nounwind {
+; X64:   atomic_fetch_and8
+; X32:   atomic_fetch_and8
+  %t1 = atomicrmw and  i8* @sc8, i8 3 acquire
+; X64:       lock
+; X64:       andb $3
+; X32:       lock
+; X32:       andb $3
+  %t2 = atomicrmw and  i8* @sc8, i8 5 acquire
+; X64:       andb
+; X64:       lock
+; X64:       cmpxchgb
+; X32:       andb
+; X32:       lock
+; X32:       cmpxchgb
+  %t3 = atomicrmw and  i8* @sc8, i8 %t2 acquire
+; X64:       lock
+; X64:       andb
+; X32:       lock
+; X32:       andb
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_or8() nounwind {
+; X64:   atomic_fetch_or8
+; X32:   atomic_fetch_or8
+  %t1 = atomicrmw or   i8* @sc8, i8 3 acquire
+; X64:       lock
+; X64:       orb $3
+; X32:       lock
+; X32:       orb $3
+  %t2 = atomicrmw or   i8* @sc8, i8 5 acquire
+; X64:       orb
+; X64:       lock
+; X64:       cmpxchgb
+; X32:       orb
+; X32:       lock
+; X32:       cmpxchgb
+  %t3 = atomicrmw or   i8* @sc8, i8 %t2 acquire
+; X64:       lock
+; X64:       orb
+; X32:       lock
+; X32:       orb
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_xor8() nounwind {
+; X64:   atomic_fetch_xor8
+; X32:   atomic_fetch_xor8
+  %t1 = atomicrmw xor  i8* @sc8, i8 3 acquire
+; X64:       lock
+; X64:       xorb $3
+; X32:       lock
+; X32:       xorb $3
+  %t2 = atomicrmw xor  i8* @sc8, i8 5 acquire
+; X64:       xorb
+; X64:       lock
+; X64:       cmpxchgb
+; X32:       xorb
+; X32:       lock
+; X32:       cmpxchgb
+  %t3 = atomicrmw xor  i8* @sc8, i8 %t2 acquire
+; X64:       lock
+; X64:       xorb
+; X32:       lock
+; X32:       xorb
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_nand8(i8 %x) nounwind {
+; X64:   atomic_fetch_nand8
+; X32:   atomic_fetch_nand8
+  %t1 = atomicrmw nand i8* @sc8, i8 %x acquire
+; X64:       andb
+; X64:       notb
+; X64:       lock
+; X64:       cmpxchgb
+; X32:       andb
+; X32:       notb
+; X32:       lock
+; X32:       cmpxchgb
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_max8(i8 %x) nounwind {
+  %t1 = atomicrmw max  i8* @sc8, i8 %x acquire
+; X64:       cmpb
+; X64:       cmov
+; X64:       lock
+; X64:       cmpxchgb
+
+; X32:       cmpb
+; X32:       cmov
+; X32:       lock
+; X32:       cmpxchgb
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_min8(i8 %x) nounwind {
+  %t1 = atomicrmw min  i8* @sc8, i8 %x acquire
+; X64:       cmpb
+; X64:       cmov
+; X64:       lock
+; X64:       cmpxchgb
+
+; X32:       cmpb
+; X32:       cmov
+; X32:       lock
+; X32:       cmpxchgb
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_umax8(i8 %x) nounwind {
+  %t1 = atomicrmw umax i8* @sc8, i8 %x acquire
+; X64:       cmpb
+; X64:       cmov
+; X64:       lock
+; X64:       cmpxchgb
+
+; X32:       cmpb
+; X32:       cmov
+; X32:       lock
+; X32:       cmpxchgb
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_umin8(i8 %x) nounwind {
+  %t1 = atomicrmw umin i8* @sc8, i8 %x acquire
+; X64:       cmpb
+; X64:       cmov
+; X64:       lock
+; X64:       cmpxchgb
+; X32:       cmpb
+; X32:       cmov
+; X32:       lock
+; X32:       cmpxchgb
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_cmpxchg8() nounwind {
+  %t1 = cmpxchg i8* @sc8, i8 0, i8 1 acquire
+; X64:       lock
+; X64:       cmpxchgb
+; X32:       lock
+; X32:       cmpxchgb
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_store8(i8 %x) nounwind {
+  store atomic i8 %x, i8* @sc8 release, align 4
+; X64-NOT:   lock
+; X64:       movb
+; X32-NOT:   lock
+; X32:       movb
+  ret void
+; X64:       ret
+; X32:       ret
+}
+
+define void @atomic_fetch_swap8(i8 %x) nounwind {
+  %t1 = atomicrmw xchg i8* @sc8, i8 %x acquire
+; X64-NOT:   lock
+; X64:       xchgb
+; X32-NOT:   lock
+; X32:       xchgb
+  ret void
+; X64:       ret
+; X32:       ret
+}

Modified: llvm/branches/R600/test/CodeGen/X86/atomic_add.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/atomic_add.ll?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/atomic_add.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/atomic_add.ll Fri Sep 21 12:29:50 2012
@@ -178,7 +178,8 @@
 define void @sub2(i16* nocapture %p, i32 %v) nounwind ssp {
 entry:
 ; CHECK: sub2:
-; CHECK: negl
+; CHECK-NOT: negl
+; CHECK: subw
 	%0 = trunc i32 %v to i16		; <i16> [#uses=1]
   %1 = atomicrmw sub i16* %p, i16 %0 monotonic
   ret void

Modified: llvm/branches/R600/test/CodeGen/X86/atomic_op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/atomic_op.ll?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/atomic_op.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/atomic_op.ll Fri Sep 21 12:29:50 2012
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+cmov | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
@@ -107,13 +107,12 @@
         ; CHECK: cmpxchgl
   %17 = cmpxchg i32* %val2, i32 1976, i32 1 monotonic
 	store i32 %17, i32* %old
+        ; CHECK: movl  [[R17atomic:.*]], %eax
         ; CHECK: movl	$1401, %[[R17mask:[a-z]*]]
-        ; CHECK: movl	[[R17atomic:.*]], %eax
-        ; CHECK: movl	%eax, %[[R17newval:[a-z]*]]
-        ; CHECK: andl	%[[R17mask]], %[[R17newval]]
-        ; CHECK: notl	%[[R17newval]]
+        ; CHECK: andl	%eax, %[[R17mask]]
+        ; CHECK: notl	%[[R17mask]]
         ; CHECK: lock
-        ; CHECK: cmpxchgl	%[[R17newval]], [[R17atomic]]
+        ; CHECK: cmpxchgl	%[[R17mask]], [[R17atomic]]
         ; CHECK: jne
         ; CHECK: movl	%eax,
   %18 = atomicrmw nand i32* %val2, i32 1401 monotonic

Modified: llvm/branches/R600/test/CodeGen/X86/avx2-shuffle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/avx2-shuffle.ll?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/avx2-shuffle.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/avx2-shuffle.ll Fri Sep 21 12:29:50 2012
@@ -28,7 +28,7 @@
 }
 
 ; CHECK: vpshufb_test
-; CHECK; vpshufb {{.*\(%r.*}}, %ymm
+; CHECK: vpshufb {{.*\(%r.*}}, %ymm
 ; CHECK: ret
 define <32 x i8> @vpshufb_test(<32 x i8> %a) nounwind {
   %S = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15, 
@@ -39,7 +39,7 @@
 }
 
 ; CHECK: vpshufb1_test
-; CHECK; vpshufb {{.*\(%r.*}}, %ymm
+; CHECK: vpshufb {{.*\(%r.*}}, %ymm
 ; CHECK: ret
 define <32 x i8> @vpshufb1_test(<32 x i8> %a) nounwind {
   %S = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15, 
@@ -51,7 +51,7 @@
 
 
 ; CHECK: vpshufb2_test
-; CHECK; vpshufb {{.*\(%r.*}}, %ymm
+; CHECK: vpshufb {{.*\(%r.*}}, %ymm
 ; CHECK: ret
 define <32 x i8> @vpshufb2_test(<32 x i8> %a) nounwind {
   %S = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15, 

Added: llvm/branches/R600/test/CodeGen/X86/pr13458.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/pr13458.ll?rev=164384&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/pr13458.ll (added)
+++ llvm/branches/R600/test/CodeGen/X86/pr13458.ll Fri Sep 21 12:29:50 2012
@@ -0,0 +1,14 @@
+; RUN: llc < %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin11.4.2"
+
+%v8_uniform_Stats.0.2.4.10 = type { i64, i64, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i64, [7 x i32], [7 x i64] }
+
+ at globalStats = external global %v8_uniform_Stats.0.2.4.10
+
+define void @MergeStats() nounwind {
+allocas:
+  %r.i.i720 = atomicrmw max i64* getelementptr inbounds (%v8_uniform_Stats.0.2.4.10* @globalStats, i64 0, i32 30), i64 0 seq_cst
+  ret void
+}

Added: llvm/branches/R600/test/CodeGen/X86/pr5145.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/pr5145.ll?rev=164384&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/pr5145.ll (added)
+++ llvm/branches/R600/test/CodeGen/X86/pr5145.ll Fri Sep 21 12:29:50 2012
@@ -0,0 +1,35 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+ at sc8 = external global i8
+
+define void @atomic_maxmin_i8() {
+; CHECK: atomic_maxmin_i8
+  %1 = atomicrmw max  i8* @sc8, i8 5 acquire
+; CHECK: [[LABEL1:\.?LBB[0-9]+_[0-9]+]]:
+; CHECK: cmpb
+; CHECK: cmovl
+; CHECK: lock
+; CHECK-NEXT: cmpxchgb
+; CHECK: jne [[LABEL1]]
+  %2 = atomicrmw min  i8* @sc8, i8 6 acquire
+; CHECK: [[LABEL3:\.?LBB[0-9]+_[0-9]+]]:
+; CHECK: cmpb
+; CHECK: cmovg
+; CHECK: lock
+; CHECK-NEXT: cmpxchgb
+; CHECK: jne [[LABEL3]]
+  %3 = atomicrmw umax i8* @sc8, i8 7 acquire
+; CHECK: [[LABEL5:\.?LBB[0-9]+_[0-9]+]]:
+; CHECK: cmpb
+; CHECK: cmovb
+; CHECK: lock
+; CHECK-NEXT: cmpxchgb
+; CHECK: jne [[LABEL5]]
+  %4 = atomicrmw umin i8* @sc8, i8 8 acquire
+; CHECK: [[LABEL7:\.?LBB[0-9]+_[0-9]+]]:
+; CHECK: cmpb
+; CHECK: cmova
+; CHECK: lock
+; CHECK-NEXT: cmpxchgb
+; CHECK: jne [[LABEL7]]
+  ret void
+}

Modified: llvm/branches/R600/test/DebugInfo/X86/DW_AT_object_pointer.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/DebugInfo/X86/DW_AT_object_pointer.ll?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/DebugInfo/X86/DW_AT_object_pointer.ll (original)
+++ llvm/branches/R600/test/DebugInfo/X86/DW_AT_object_pointer.ll Fri Sep 21 12:29:50 2012
@@ -1,8 +1,8 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
 ; RUN: llvm-dwarfdump %t | FileCheck %s
 
-; CHECK: DW_AT_object_pointer [DW_FORM_ref4]     (cu + 0x00c3 => {0x000000c3})
-; CHECK: 0x000000c3:     DW_TAG_formal_parameter [12]
+; CHECK: DW_AT_object_pointer [DW_FORM_ref4]     (cu + 0x00bf => {0x000000bf})
+; CHECK: 0x000000bf:     DW_TAG_formal_parameter [12]
 ; CHECK-NEXT: DW_AT_name [DW_FORM_strp]     ( .debug_str[0x00000085] = "this")
 
 %class.A = type { i32 }

Modified: llvm/branches/R600/test/MC/AsmParser/macro-args.s
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/AsmParser/macro-args.s?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/MC/AsmParser/macro-args.s (original)
+++ llvm/branches/R600/test/MC/AsmParser/macro-args.s Fri Sep 21 12:29:50 2012
@@ -4,10 +4,18 @@
     movl   \var at GOTOFF(%ebx),\re2g
 .endm
 
+.macro GET_DEFAULT var, re2g=%ebx, re3g=%ecx
+movl 2(\re2g, \re3g, 2), \var
+.endm
+
+GET         is_sse, %eax
+// CHECK: movl  is_sse at GOTOFF(%ebx), %eax
 
-GET    is_sse, %eax
+GET_DEFAULT %ebx, , %edx
+// CHECK: movl  2(%ebx,%edx,2), %ebx
 
-// CHECK: movl	is_sse at GOTOFF(%ebx), %eax
+GET_DEFAULT %ebx, %edx
+// CHECK: movl  2(%edx,%ecx,2), %ebx
 
 .macro bar
     .long $n

Modified: llvm/branches/R600/test/MC/AsmParser/macro-rept-err1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/AsmParser/macro-rept-err1.s?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/MC/AsmParser/macro-rept-err1.s (original)
+++ llvm/branches/R600/test/MC/AsmParser/macro-rept-err1.s Fri Sep 21 12:29:50 2012
@@ -3,4 +3,4 @@
 
 .endr
 
-// CHECK: unexpected '.endr' directive, no current .rept
+// CHECK: unmatched '.endr' directive

Added: llvm/branches/R600/test/MC/AsmParser/macros-darwin.s
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/AsmParser/macros-darwin.s?rev=164384&view=auto
==============================================================================
--- llvm/branches/R600/test/MC/AsmParser/macros-darwin.s (added)
+++ llvm/branches/R600/test/MC/AsmParser/macros-darwin.s Fri Sep 21 12:29:50 2012
@@ -0,0 +1,9 @@
+// RUN: not llvm-mc -triple i386-apple-darwin10 %s 2> %t.err | FileCheck %s
+
+.macro test1
+.globl "$0 $1 $2 $$3 $n"
+.endmacro
+
+// CHECK: .globl "1 23  $3 2"
+test1 1, 2 3
+

Modified: llvm/branches/R600/test/MC/AsmParser/macros.s
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/AsmParser/macros.s?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/MC/AsmParser/macros.s (original)
+++ llvm/branches/R600/test/MC/AsmParser/macros.s Fri Sep 21 12:29:50 2012
@@ -1,4 +1,4 @@
-// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s 2> %t.err | FileCheck %s
+// RUN: not llvm-mc -triple i386-unknown-unknown %s 2> %t.err | FileCheck %s
 // RUN: FileCheck --check-prefix=CHECK-ERRORS %s < %t.err
 
 .macro .test0
@@ -28,33 +28,66 @@
 .globl "$0 $1 $2 $$3 $n"
 .endmacro
 
-// CHECK: .globl	"1 23  $3 2"
-test3 1,2 3
+// CHECK: .globl	"1 (23)  $3 2"
+test3 1, (2 3)
+
+// CHECK: .globl "1 2  $3 2"
+test3 1 2
 
 .macro test4
 .globl "$0 -- $1"
 .endmacro
 
-// CHECK: .globl	"ab)(,) -- (cd)"
-test4 a b)(,),(cd)
+// CHECK: .globl  "(ab)(,)) -- (cd)"
+test4 (a b)(,)),(cd)
+
+// CHECK: .globl  "(ab)(,)) -- (cd)"
+test4 (a b)(,)),(cd)
 
 .macro test5 _a
 .globl "\_a"
 .endm
 
-test5 zed1
 // CHECK: .globl zed1
+test5 zed1
 
 .macro test6 $a
 .globl "\$a"
 .endm
 
-test6 zed2
 // CHECK: .globl zed2
+test6 zed2
 
 .macro test7 .a
 .globl "\.a"
 .endm
 
-test7 zed3
 // CHECK: .globl zed3
+test7 zed3
+
+.macro test8 _a, _b, _c
+.globl "\_a,\_b,\_c"
+.endmacro
+
+.macro test9 _a _b _c
+.globl "\_a \_b \_c"
+.endmacro
+
+// CHECK: .globl  "a,b,c"
+test8 a, b, c
+// CHECK: .globl  "%1,%2,%3"
+test8 %1 %2 %3 #a comment
+// CHECK: .globl "x-y,z,1"
+test8 x - y z 1
+// CHECK: .globl  "1 2 3"
+test9 1, 2,3
+
+test8 1,2 3
+// CHECK-ERRORS: error: macro argument '_c' is missing
+// CHECK-ERRORS-NEXT: test8 1,2 3
+// CHECK-ERRORS-NEXT:           ^
+
+test8 1 2, 3
+// CHECK-ERRORS: error: expected ' ' for macro argument separator
+// CHECK-ERRORS-NEXT:test8 1 2, 3
+// CHECK-ERRORS-NEXT:         ^

Modified: llvm/branches/R600/test/MC/X86/x86_nop.s
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/X86/x86_nop.s?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/MC/X86/x86_nop.s (original)
+++ llvm/branches/R600/test/MC/X86/x86_nop.s Fri Sep 21 12:29:50 2012
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -filetype=obj -mcpu=geode %s -o %t
+# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=geode %s -o %t
 # RUN: llvm-objdump -disassemble %t | FileCheck %s
 
 # CHECK-NOT: nopw

Modified: llvm/branches/R600/test/Object/nm-shared-object.test
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Object/nm-shared-object.test?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/Object/nm-shared-object.test (original)
+++ llvm/branches/R600/test/Object/nm-shared-object.test Fri Sep 21 12:29:50 2012
@@ -1,15 +1,23 @@
 RUN: llvm-nm -D %p/Inputs/shared-object-test.elf-i386 \
-RUN:         | FileCheck %s -check-prefix ELF
+RUN:         | FileCheck %s -check-prefix ELF-32
 RUN: llvm-nm -D %p/Inputs/shared-object-test.elf-x86-64 \
-RUN:         | FileCheck %s -check-prefix ELF
+RUN:         | FileCheck %s -check-prefix ELF-64
 
 ; Note: tls_sym should be 'D' (not '?'), but TLS is not
 ; yet recognized by ObjectFile.
 
-ELF: {{[0-9a-f]+}} A __bss_start
-ELF: {{[0-9a-f]+}} A _edata
-ELF: {{[0-9a-f]+}} A _end
-ELF: {{[0-9a-f]+}} B common_sym
-ELF: {{[0-9a-f]+}} D defined_sym
-ELF: {{[0-9a-f]+}} T global_func
-ELF:               ? tls_sym
+ELF-32: 0012c8 A __bss_start
+ELF-32: 0012c8 A _edata
+ELF-32: 0012cc A _end
+ELF-32: 0012c8 B common_sym
+ELF-32: 0012c4 D defined_sym
+ELF-32: 0001f0 T global_func
+ELF-32:        ? tls_sym
+
+ELF-64: 200454 A __bss_start
+ELF-64: 200454 A _edata
+ELF-64: 200458 A _end
+ELF-64: 200454 B common_sym
+ELF-64: 200450 D defined_sym
+ELF-64: 0002f0 T global_func
+ELF-64:        ? tls_sym

Modified: llvm/branches/R600/test/Object/objdump-symbol-table.test
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Object/objdump-symbol-table.test?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/Object/objdump-symbol-table.test (original)
+++ llvm/branches/R600/test/Object/objdump-symbol-table.test Fri Sep 21 12:29:50 2012
@@ -4,6 +4,8 @@
 RUN:              | FileCheck %s -check-prefix ELF-i386
 RUN: llvm-objdump -t %p/Inputs/trivial-object-test.macho-i386 \
 RUN:              | FileCheck %s -check-prefix macho-i386
+RUN: llvm-objdump -t %p/Inputs/shared-object-test.elf-i386 \
+RUN:              | FileCheck %s -check-prefix ELF-shared
 
 COFF-i386: file format
 COFF-i386: SYMBOL TABLE:
@@ -31,3 +33,9 @@
 macho-i386: 00000000 g     F __TEXT,__text  00000024 _main
 macho-i386: 00000000         *UND*  00000000 _SomeOtherFunction
 macho-i386: 00000000         *UND*  00000000 _puts
+
+ELF-shared: shared-object-test.elf-i386:     file format
+ELF-shared: SYMBOL TABLE:
+ELF-shared: 00000200 l     F .text 00000003 local_func
+ELF-shared: 000012c4 g       .data 00000004 defined_sym
+ELF-shared: 000001f0 g     F .text 00000003 global_func

Added: llvm/branches/R600/test/Other/FileCheck-space.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Other/FileCheck-space.txt?rev=164384&view=auto
==============================================================================
--- llvm/branches/R600/test/Other/FileCheck-space.txt (added)
+++ llvm/branches/R600/test/Other/FileCheck-space.txt Fri Sep 21 12:29:50 2012
@@ -0,0 +1,9 @@
+RUN: printf "a\nb" | FileCheck %s -check-prefix=TEST1
+RUN: echo oo | FileCheck %s -check-prefix=TEST2
+
+Check that CHECK-NEXT without a space after the colon works.
+TEST1:a
+TEST1-NEXT:b
+
+Check that CHECK-NOT without a space after the colon works.
+TEST2-NOT:foo

Modified: llvm/branches/R600/test/Other/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Other/lit.local.cfg?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/Other/lit.local.cfg (original)
+++ llvm/branches/R600/test/Other/lit.local.cfg Fri Sep 21 12:29:50 2012
@@ -1 +1 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.txt']

Added: llvm/branches/R600/test/Transforms/Inline/recursive.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/Inline/recursive.ll?rev=164384&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/Inline/recursive.ll (added)
+++ llvm/branches/R600/test/Transforms/Inline/recursive.ll Fri Sep 21 12:29:50 2012
@@ -0,0 +1,38 @@
+; RUN: opt %s -inline -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+; rdar://10853263
+
+; Make sure that the callee is still here.
+; CHECK: define i32 @callee
+define i32 @callee(i32 %param) {
+ %yyy = alloca [100000 x i8]
+ %r = bitcast [100000 x i8]* %yyy to i8*
+ call void @foo2(i8* %r)
+ ret i32 4
+}
+
+; CHECK: define i32 @caller
+; CHECK-NEXT: entry:
+; CHECK-NOT: alloca
+; CHECK: ret
+define i32 @caller(i32 %param) {
+entry:
+  %t = call i32 @foo(i32 %param)
+  %cmp = icmp eq i32 %t, -1
+  br i1 %cmp, label %exit, label %cont
+
+cont:
+  %r = call i32 @caller(i32 %t)
+  %f = call i32 @callee(i32 %r)
+  br label %cont
+exit:
+  ret i32 4
+}
+
+declare void @foo2(i8* %in)
+
+declare i32 @foo(i32 %param)
+

Modified: llvm/branches/R600/test/Transforms/InstCombine/div-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/div-shift.ll?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/div-shift.ll (original)
+++ llvm/branches/R600/test/Transforms/InstCombine/div-shift.ll Fri Sep 21 12:29:50 2012
@@ -21,3 +21,17 @@
   %3 = udiv i64 %x, %2
   ret i64 %3
 }
+
+; PR13250
+define i64 @t3(i64 %x, i32 %y) nounwind  {
+; CHECK: t3
+; CHECK-NOT: udiv
+; CHECK-NEXT: %1 = add i32 %y, 2
+; CHECK-NEXT: %2 = zext i32 %1 to i64
+; CHECK-NEXT: %3 = lshr i64 %x, %2
+; CHECK-NEXT: ret i64 %3
+  %1 = shl i32 4, %y
+  %2 = zext i32 %1 to i64
+  %3 = udiv i64 %x, %2
+  ret i64 %3
+}

Modified: llvm/branches/R600/test/Transforms/SROA/basictest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/SROA/basictest.ll?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/SROA/basictest.ll (original)
+++ llvm/branches/R600/test/Transforms/SROA/basictest.ll Fri Sep 21 12:29:50 2012
@@ -754,3 +754,23 @@
   ret void
 }
 
+%opaque = type opaque
+
+define i32 @test19(%opaque* %x) {
+; This input will cause us to try to compute a natural GEP when rewriting
+; pointers in such a way that we try to GEP through the opaque type. Previously,
+; a check for an unsized type was missing and this crashed. Ensure it behaves
+; reasonably now.
+; CHECK: @test19
+; CHECK-NOT: alloca
+; CHECK: ret i32 undef
+
+entry:
+  %a = alloca { i64, i8* }
+  %cast1 = bitcast %opaque* %x to i8*
+  %cast2 = bitcast { i64, i8* }* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast2, i8* %cast1, i32 16, i32 1, i1 false)
+  %gep = getelementptr inbounds { i64, i8* }* %a, i32 0, i32 0
+  %val = load i64* %gep
+  ret i32 undef
+}

Added: llvm/branches/R600/test/Transforms/SimplifyCFG/sink-common-code.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/SimplifyCFG/sink-common-code.ll?rev=164384&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/SimplifyCFG/sink-common-code.ll (added)
+++ llvm/branches/R600/test/Transforms/SimplifyCFG/sink-common-code.ll Fri Sep 21 12:29:50 2012
@@ -0,0 +1,53 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+define zeroext i1 @test1(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+; CHECK: test1
+; CHECK: add
+; CHECK: select
+; CHECK: icmp
+; CHECK-NOT: br
+if.then:
+  %cmp = icmp uge i32 %blksA, %nblks
+  %frombool1 = zext i1 %cmp to i8
+  br label %if.end
+
+if.else:
+  %add = add i32 %nblks, %blksB
+  %cmp2 = icmp ule i32 %add, %blksA
+  %frombool3 = zext i1 %cmp2 to i8
+  br label %if.end
+
+if.end:
+  %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ]
+  %tobool4 = icmp ne i8 %obeys.0, 0
+  ret i1 %tobool4
+}
+
+define zeroext i1 @test2(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+; CHECK: test2
+; CHECK: add
+; CHECK: select
+; CHECK: icmp
+; CHECK-NOT: br
+if.then:
+  %cmp = icmp uge i32 %blksA, %nblks
+  %frombool1 = zext i1 %cmp to i8
+  br label %if.end
+
+if.else:
+  %add = add i32 %nblks, %blksB
+  %cmp2 = icmp uge i32 %blksA, %add
+  %frombool3 = zext i1 %cmp2 to i8
+  br label %if.end
+
+if.end:
+  %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ]
+  %tobool4 = icmp ne i8 %obeys.0, 0
+  ret i1 %tobool4
+}

Modified: llvm/branches/R600/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll (original)
+++ llvm/branches/R600/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll Fri Sep 21 12:29:50 2012
@@ -15,6 +15,9 @@
 ; The table for @foostring
 ; CHECK: @switch.table3 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)]
 
+; The table for @earlyreturncrash
+; CHECK: @switch.table4 = private unnamed_addr constant [4 x i32] [i32 42, i32 9, i32 88, i32 5]
+
 ; A simple int-to-int selection switch.
 ; It is dense enough to be replaced by table lookup.
 ; The result is directly by a ret from an otherwise empty bb,
@@ -138,3 +141,34 @@
 ; CHECK-NEXT: %switch.load = load i8** %switch.gep
 ; CHECK-NEXT: ret i8* %switch.load
 }
+
+; Switch used to initialize two values. The first value is returned, the second
+; value is not used. This used to make the transformation generate illegal code.
+
+define i32 @earlyreturncrash(i32 %x)  {
+entry:
+  switch i32 %x, label %sw.default [
+    i32 0, label %sw.epilog
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
+  ]
+
+sw.bb1: br label %sw.epilog
+sw.bb2: br label %sw.epilog
+sw.bb3: br label %sw.epilog
+sw.default: br label %sw.epilog
+
+sw.epilog:
+  %a.0 = phi i32 [ 7, %sw.default ], [ 5, %sw.bb3 ], [ 88, %sw.bb2 ], [ 9, %sw.bb1 ], [ 42, %entry ]
+  %b.0 = phi i32 [ 10, %sw.default ], [ 5, %sw.bb3 ], [ 1, %sw.bb2 ], [ 4, %sw.bb1 ], [ 3, %entry ]
+  ret i32 %a.0
+
+; CHECK: @earlyreturncrash
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32]* @switch.table4, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.load = load i32* %switch.gep
+; CHECK-NEXT: ret i32 %switch.load
+; CHECK: sw.epilog:
+; CHECK-NEXT: ret i32 7
+}

Modified: llvm/branches/R600/tools/lli/lli.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/lli/lli.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/tools/lli/lli.cpp (original)
+++ llvm/branches/R600/tools/lli/lli.cpp Fri Sep 21 12:29:50 2012
@@ -656,6 +656,9 @@
 
     Target.stop();
   } else {
+    // Trigger compilation separately so code regions that need to be 
+    // invalidated will be known.
+    (void)EE->getPointerToFunction(EntryFn);
     // Clear instruction cache before code will be executed.
     if (JMM)
       static_cast<LLIMCJITMemoryManager*>(JMM)->invalidateInstructionCache();

Modified: llvm/branches/R600/tools/llvm-stress/llvm-stress.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/llvm-stress/llvm-stress.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/tools/llvm-stress/llvm-stress.cpp (original)
+++ llvm/branches/R600/tools/llvm-stress/llvm-stress.cpp Fri Sep 21 12:29:50 2012
@@ -126,6 +126,10 @@
   /// C'tor
   Modifier(BasicBlock *Block, PieceTable *PT, Random *R):
     BB(Block),PT(PT),Ran(R),Context(BB->getContext()) {}
+
+  /// virtual D'tor to silence warnings.
+  virtual ~Modifier() {}
+
   /// Add a new instruction.
   virtual void Act() = 0;
   /// Add N new instructions,

Modified: llvm/branches/R600/unittests/Support/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/unittests/Support/CMakeLists.txt?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/unittests/Support/CMakeLists.txt (original)
+++ llvm/branches/R600/unittests/Support/CMakeLists.txt Fri Sep 21 12:29:50 2012
@@ -17,6 +17,7 @@
   LeakDetectorTest.cpp
   ManagedStatic.cpp
   MathExtrasTest.cpp
+  MemoryTest.cpp
   Path.cpp
   RegexTest.cpp
   SwapByteOrderTest.cpp

Added: llvm/branches/R600/unittests/Support/MemoryTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/unittests/Support/MemoryTest.cpp?rev=164384&view=auto
==============================================================================
--- llvm/branches/R600/unittests/Support/MemoryTest.cpp (added)
+++ llvm/branches/R600/unittests/Support/MemoryTest.cpp Fri Sep 21 12:29:50 2012
@@ -0,0 +1,356 @@
+//===- llvm/unittest/Support/AllocatorTest.cpp - BumpPtrAllocator tests ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/Process.h"
+
+#include "gtest/gtest.h"
+#include <cstdlib>
+
+using namespace llvm;
+using namespace sys;
+
+namespace {
+
+class MappedMemoryTest : public ::testing::TestWithParam<unsigned> {
+public:
+  MappedMemoryTest() {
+    Flags = GetParam();
+    PageSize = sys::Process::GetPageSize();
+  }
+
+protected:
+  // Adds RW flags to permit testing of the resulting memory
+  unsigned getTestableEquivalent(unsigned RequestedFlags) {
+    switch (RequestedFlags) {
+    case Memory::MF_READ:
+    case Memory::MF_WRITE:
+    case Memory::MF_READ|Memory::MF_WRITE:
+      return Memory::MF_READ|Memory::MF_WRITE;
+    case Memory::MF_READ|Memory::MF_EXEC:
+    case Memory::MF_READ|Memory::MF_WRITE|Memory::MF_EXEC:
+    case Memory::MF_EXEC:
+      return Memory::MF_READ|Memory::MF_WRITE|Memory::MF_EXEC;
+    }
+    // Default in case values are added to the enum, as required by some compilers
+    return Memory::MF_READ|Memory::MF_WRITE;
+  }
+
+  // Returns true if the memory blocks overlap
+  bool doesOverlap(MemoryBlock M1, MemoryBlock M2) {
+    if (M1.base() == M2.base())
+      return true;
+
+    if (M1.base() > M2.base())
+      return (unsigned char *)M2.base() + M2.size() > M1.base();
+
+    return (unsigned char *)M1.base() + M1.size() > M2.base();
+  }
+
+  unsigned Flags;
+  size_t   PageSize;
+};
+
+TEST_P(MappedMemoryTest, AllocAndRelease) {
+  error_code EC;
+  MemoryBlock M1 = Memory::allocateMappedMemory(sizeof(int), 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+
+  EXPECT_NE((void*)0, M1.base());
+  EXPECT_LE(sizeof(int), M1.size());
+
+  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+}
+
+TEST_P(MappedMemoryTest, MultipleAllocAndRelease) {
+  error_code EC;
+  MemoryBlock M1 = Memory::allocateMappedMemory(16, 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M2 = Memory::allocateMappedMemory(64, 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M3 = Memory::allocateMappedMemory(32, 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+
+  EXPECT_NE((void*)0, M1.base());
+  EXPECT_LE(16U, M1.size());
+  EXPECT_NE((void*)0, M2.base());
+  EXPECT_LE(64U, M2.size());
+  EXPECT_NE((void*)0, M3.base());
+  EXPECT_LE(32U, M3.size());
+
+  EXPECT_FALSE(doesOverlap(M1, M2));
+  EXPECT_FALSE(doesOverlap(M2, M3));
+  EXPECT_FALSE(doesOverlap(M1, M3));
+
+  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M3));
+  MemoryBlock M4 = Memory::allocateMappedMemory(16, 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  EXPECT_NE((void*)0, M4.base());
+  EXPECT_LE(16U, M4.size());
+  EXPECT_FALSE(Memory::releaseMappedMemory(M4));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M2));
+}
+
+TEST_P(MappedMemoryTest, BasicWrite) {
+  // This test applies only to writeable combinations
+  if (Flags && !(Flags & Memory::MF_WRITE))
+    return;
+
+  error_code EC;
+  MemoryBlock M1 = Memory::allocateMappedMemory(sizeof(int), 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+
+  EXPECT_NE((void*)0, M1.base());
+  EXPECT_LE(sizeof(int), M1.size());
+
+  int *a = (int*)M1.base();
+  *a = 1;
+  EXPECT_EQ(1, *a);
+
+  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+}
+
+TEST_P(MappedMemoryTest, MultipleWrite) {
+  // This test applies only to writeable combinations
+  if (Flags && !(Flags & Memory::MF_WRITE))
+    return;
+  error_code EC;
+  MemoryBlock M1 = Memory::allocateMappedMemory(sizeof(int), 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M2 = Memory::allocateMappedMemory(8 * sizeof(int), 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M3 = Memory::allocateMappedMemory(4 * sizeof(int), 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+
+  EXPECT_FALSE(doesOverlap(M1, M2));
+  EXPECT_FALSE(doesOverlap(M2, M3));
+  EXPECT_FALSE(doesOverlap(M1, M3));
+
+  EXPECT_NE((void*)0, M1.base());
+  EXPECT_LE(1U * sizeof(int), M1.size());
+  EXPECT_NE((void*)0, M2.base());
+  EXPECT_LE(8U * sizeof(int), M2.size());
+  EXPECT_NE((void*)0, M3.base());
+  EXPECT_LE(4U * sizeof(int), M3.size());
+
+  int *x = (int*)M1.base();
+  *x = 1;
+
+  int *y = (int*)M2.base();
+  for (int i = 0; i < 8; i++) {
+    y[i] = i;
+  }
+
+  int *z = (int*)M3.base();
+  *z = 42;
+
+  EXPECT_EQ(1, *x);
+  EXPECT_EQ(7, y[7]);
+  EXPECT_EQ(42, *z);
+
+  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M3));
+
+  MemoryBlock M4 = Memory::allocateMappedMemory(64 * sizeof(int), 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  EXPECT_NE((void*)0, M4.base());
+  EXPECT_LE(64U * sizeof(int), M4.size());
+  x = (int*)M4.base();
+  *x = 4;
+  EXPECT_EQ(4, *x);
+  EXPECT_FALSE(Memory::releaseMappedMemory(M4));
+
+  // Verify that M2 remains unaffected by other activity
+  for (int i = 0; i < 8; i++) {
+    EXPECT_EQ(i, y[i]);
+  }
+  EXPECT_FALSE(Memory::releaseMappedMemory(M2));
+}
+
+TEST_P(MappedMemoryTest, EnabledWrite) {
+  error_code EC;
+  MemoryBlock M1 = Memory::allocateMappedMemory(2 * sizeof(int), 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M2 = Memory::allocateMappedMemory(8 * sizeof(int), 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M3 = Memory::allocateMappedMemory(4 * sizeof(int), 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+
+  EXPECT_NE((void*)0, M1.base());
+  EXPECT_LE(2U * sizeof(int), M1.size());
+  EXPECT_NE((void*)0, M2.base());
+  EXPECT_LE(8U * sizeof(int), M2.size());
+  EXPECT_NE((void*)0, M3.base());
+  EXPECT_LE(4U * sizeof(int), M3.size());
+
+  EXPECT_FALSE(Memory::protectMappedMemory(M1, getTestableEquivalent(Flags)));
+  EXPECT_FALSE(Memory::protectMappedMemory(M2, getTestableEquivalent(Flags)));
+  EXPECT_FALSE(Memory::protectMappedMemory(M3, getTestableEquivalent(Flags)));
+
+  EXPECT_FALSE(doesOverlap(M1, M2));
+  EXPECT_FALSE(doesOverlap(M2, M3));
+  EXPECT_FALSE(doesOverlap(M1, M3));
+
+  int *x = (int*)M1.base();
+  *x = 1;
+  int *y = (int*)M2.base();
+  for (unsigned int i = 0; i < 8; i++) {
+    y[i] = i;
+  }
+  int *z = (int*)M3.base();
+  *z = 42;
+
+  EXPECT_EQ(1, *x);
+  EXPECT_EQ(7, y[7]);
+  EXPECT_EQ(42, *z);
+
+  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M3));
+  EXPECT_EQ(6, y[6]);
+
+  MemoryBlock M4 = Memory::allocateMappedMemory(16, 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  EXPECT_NE((void*)0, M4.base());
+  EXPECT_LE(16U, M4.size());
+  EXPECT_EQ(error_code::success(), Memory::protectMappedMemory(M4, getTestableEquivalent(Flags)));
+  x = (int*)M4.base();
+  *x = 4;
+  EXPECT_EQ(4, *x);
+  EXPECT_FALSE(Memory::releaseMappedMemory(M4));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M2));
+}
+
+TEST_P(MappedMemoryTest, SuccessiveNear) {
+  error_code EC;
+  MemoryBlock M1 = Memory::allocateMappedMemory(16, 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M2 = Memory::allocateMappedMemory(64, &M1, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M3 = Memory::allocateMappedMemory(32, &M2, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+
+  EXPECT_NE((void*)0, M1.base());
+  EXPECT_LE(16U, M1.size());
+  EXPECT_NE((void*)0, M2.base());
+  EXPECT_LE(64U, M2.size());
+  EXPECT_NE((void*)0, M3.base());
+  EXPECT_LE(32U, M3.size());
+
+  EXPECT_FALSE(doesOverlap(M1, M2));
+  EXPECT_FALSE(doesOverlap(M2, M3));
+  EXPECT_FALSE(doesOverlap(M1, M3));
+
+  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M3));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M2));
+}
+
+TEST_P(MappedMemoryTest, DuplicateNear) {
+  error_code EC;
+  MemoryBlock Near((void*)(3*PageSize), 16);
+  MemoryBlock M1 = Memory::allocateMappedMemory(16, &Near, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M2 = Memory::allocateMappedMemory(64, &Near, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M3 = Memory::allocateMappedMemory(32, &Near, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+
+  EXPECT_NE((void*)0, M1.base());
+  EXPECT_LE(16U, M1.size());
+  EXPECT_NE((void*)0, M2.base());
+  EXPECT_LE(64U, M2.size());
+  EXPECT_NE((void*)0, M3.base());
+  EXPECT_LE(32U, M3.size());
+
+  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M3));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M2));
+}
+
+TEST_P(MappedMemoryTest, ZeroNear) {
+  error_code EC;
+  MemoryBlock Near(0, 0);
+  MemoryBlock M1 = Memory::allocateMappedMemory(16, &Near, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M2 = Memory::allocateMappedMemory(64, &Near, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M3 = Memory::allocateMappedMemory(32, &Near, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+
+  EXPECT_NE((void*)0, M1.base());
+  EXPECT_LE(16U, M1.size());
+  EXPECT_NE((void*)0, M2.base());
+  EXPECT_LE(64U, M2.size());
+  EXPECT_NE((void*)0, M3.base());
+  EXPECT_LE(32U, M3.size());
+
+  EXPECT_FALSE(doesOverlap(M1, M2));
+  EXPECT_FALSE(doesOverlap(M2, M3));
+  EXPECT_FALSE(doesOverlap(M1, M3));
+
+  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M3));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M2));
+}
+
+TEST_P(MappedMemoryTest, ZeroSizeNear) {
+  error_code EC;
+  MemoryBlock Near((void*)(4*PageSize), 0);
+  MemoryBlock M1 = Memory::allocateMappedMemory(16, &Near, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M2 = Memory::allocateMappedMemory(64, &Near, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M3 = Memory::allocateMappedMemory(32, &Near, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+
+  EXPECT_NE((void*)0, M1.base());
+  EXPECT_LE(16U, M1.size());
+  EXPECT_NE((void*)0, M2.base());
+  EXPECT_LE(64U, M2.size());
+  EXPECT_NE((void*)0, M3.base());
+  EXPECT_LE(32U, M3.size());
+
+  EXPECT_FALSE(doesOverlap(M1, M2));
+  EXPECT_FALSE(doesOverlap(M2, M3));
+  EXPECT_FALSE(doesOverlap(M1, M3));
+
+  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M3));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M2));
+}
+
+TEST_P(MappedMemoryTest, UnalignedNear) {
+  error_code EC;
+  MemoryBlock Near((void*)(2*PageSize+5), 0);
+  MemoryBlock M1 = Memory::allocateMappedMemory(15, &Near, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+
+  EXPECT_NE((void*)0, M1.base());
+  EXPECT_LE(sizeof(int), M1.size());
+
+  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+}
+
+// Note that Memory::MF_WRITE is not supported exclusively across
+// operating systems and architectures and can imply MF_READ|MF_WRITE
+unsigned MemoryFlags[] = {
+                           Memory::MF_READ,
+                           Memory::MF_WRITE,
+                           Memory::MF_READ|Memory::MF_WRITE,
+                           Memory::MF_EXEC,
+                           Memory::MF_READ|Memory::MF_EXEC,
+                           Memory::MF_READ|Memory::MF_WRITE|Memory::MF_EXEC
+                         };
+
+INSTANTIATE_TEST_CASE_P(AllocationTests,
+                        MappedMemoryTest,
+                        ::testing::ValuesIn(MemoryFlags));
+
+}  // anonymous namespace

Modified: llvm/branches/R600/utils/FileCheck/FileCheck.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/FileCheck/FileCheck.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/utils/FileCheck/FileCheck.cpp (original)
+++ llvm/branches/R600/utils/FileCheck/FileCheck.cpp Fri Sep 21 12:29:50 2012
@@ -470,7 +470,7 @@
       continue;
     }
 
-    // If C is not a horizontal whitespace, skip it.
+    // If current char is not a horizontal whitespace, dump it to output as is.
     if (*Ptr != ' ' && *Ptr != '\t') {
       NewFile.push_back(*Ptr);
       continue;
@@ -537,11 +537,11 @@
       Buffer = Buffer.substr(CheckPrefix.size()+1);
     } else if (Buffer.size() > CheckPrefix.size()+6 &&
                memcmp(Buffer.data()+CheckPrefix.size(), "-NEXT:", 6) == 0) {
-      Buffer = Buffer.substr(CheckPrefix.size()+7);
+      Buffer = Buffer.substr(CheckPrefix.size()+6);
       IsCheckNext = true;
     } else if (Buffer.size() > CheckPrefix.size()+5 &&
                memcmp(Buffer.data()+CheckPrefix.size(), "-NOT:", 5) == 0) {
-      Buffer = Buffer.substr(CheckPrefix.size()+6);
+      Buffer = Buffer.substr(CheckPrefix.size()+5);
       IsCheckNot = true;
     } else {
       Buffer = Buffer.substr(1);

Modified: llvm/branches/R600/utils/TableGen/AsmMatcherEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/AsmMatcherEmitter.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/AsmMatcherEmitter.cpp (original)
+++ llvm/branches/R600/utils/TableGen/AsmMatcherEmitter.cpp Fri Sep 21 12:29:50 2012
@@ -279,6 +279,15 @@
   }
 };
 
+namespace {
+/// Sort ClassInfo pointers independently of pointer value.
+struct LessClassInfoPtr {
+  bool operator()(const ClassInfo *LHS, const ClassInfo *RHS) const {
+    return *LHS < *RHS;
+  }
+};
+}
+
 /// MatchableInfo - Helper class for storing the necessary information for an
 /// instruction or alias which is capable of being matched.
 struct MatchableInfo {
@@ -599,7 +608,8 @@
   std::vector<OperandMatchEntry> OperandMatchInfo;
 
   /// Map of Register records to their class information.
-  std::map<Record*, ClassInfo*> RegisterClasses;
+  typedef std::map<Record*, ClassInfo*, LessRecordByID> RegisterClassesTy;
+  RegisterClassesTy RegisterClasses;
 
   /// Map of Predicate records to their subtarget information.
   std::map<Record*, SubtargetFeatureInfo*> SubtargetFeatures;
@@ -1239,7 +1249,8 @@
 
   /// Map containing a mask with all operands indices that can be found for
   /// that class inside a instruction.
-  std::map<ClassInfo*, unsigned> OpClassMask;
+  typedef std::map<ClassInfo*, unsigned, LessClassInfoPtr> OpClassMaskTy;
+  OpClassMaskTy OpClassMask;
 
   for (std::vector<MatchableInfo*>::const_iterator it =
        Matchables.begin(), ie = Matchables.end();
@@ -1258,7 +1269,7 @@
     }
 
     // Generate operand match info for each mnemonic/operand class pair.
-    for (std::map<ClassInfo*, unsigned>::iterator iit = OpClassMask.begin(),
+    for (OpClassMaskTy::iterator iit = OpClassMask.begin(),
          iie = OpClassMask.end(); iit != iie; ++iit) {
       unsigned OpMask = iit->second;
       ClassInfo *CI = iit->first;
@@ -2043,7 +2054,7 @@
   OS << "    MatchClassKind OpKind;\n";
   OS << "    switch (Operand.getReg()) {\n";
   OS << "    default: OpKind = InvalidMatchClass; break;\n";
-  for (std::map<Record*, ClassInfo*>::iterator
+  for (AsmMatcherInfo::RegisterClassesTy::iterator
          it = Info.RegisterClasses.begin(), ie = Info.RegisterClasses.end();
        it != ie; ++it)
     OS << "    case " << Info.Target.getName() << "::"

Modified: llvm/branches/R600/utils/TableGen/CodeGenDAGPatterns.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/CodeGenDAGPatterns.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/CodeGenDAGPatterns.cpp (original)
+++ llvm/branches/R600/utils/TableGen/CodeGenDAGPatterns.cpp Fri Sep 21 12:29:50 2012
@@ -574,10 +574,6 @@
 //===----------------------------------------------------------------------===//
 // Helpers for working with extended types.
 
-bool RecordPtrCmp::operator()(const Record *LHS, const Record *RHS) const {
-  return LHS->getID() < RHS->getID();
-}
-
 /// Dependent variable map for CodeGenDAGPattern variant generation
 typedef std::map<std::string, int> DepVarMap;
 
@@ -2748,7 +2744,7 @@
   }
 
   // If we can, convert the instructions to be patterns that are matched!
-  for (std::map<Record*, DAGInstruction, RecordPtrCmp>::iterator II =
+  for (std::map<Record*, DAGInstruction, LessRecordByID>::iterator II =
         Instructions.begin(),
        E = Instructions.end(); II != E; ++II) {
     DAGInstruction &TheInst = II->second;
@@ -2804,8 +2800,11 @@
                                            const PatternToMatch &PTM) {
   // Do some sanity checking on the pattern we're about to match.
   std::string Reason;
-  if (!PTM.getSrcPattern()->canPatternMatch(Reason, *this))
-    Pattern->error("Pattern can never match: " + Reason);
+  if (!PTM.getSrcPattern()->canPatternMatch(Reason, *this)) {
+    PrintWarning(Pattern->getRecord()->getLoc(),
+      Twine("Pattern can never match: ") + Reason);
+    return;
+  }
 
   // If the source pattern's root is a complex pattern, that complex pattern
   // must specify the nodes it can potentially match.

Modified: llvm/branches/R600/utils/TableGen/CodeGenDAGPatterns.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/CodeGenDAGPatterns.h?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/CodeGenDAGPatterns.h (original)
+++ llvm/branches/R600/utils/TableGen/CodeGenDAGPatterns.h Fri Sep 21 12:29:50 2012
@@ -661,23 +661,18 @@
   unsigned getPatternComplexity(const CodeGenDAGPatterns &CGP) const;
 };
 
-// Deterministic comparison of Record*.
-struct RecordPtrCmp {
-  bool operator()(const Record *LHS, const Record *RHS) const;
-};
-
 class CodeGenDAGPatterns {
   RecordKeeper &Records;
   CodeGenTarget Target;
   std::vector<CodeGenIntrinsic> Intrinsics;
   std::vector<CodeGenIntrinsic> TgtIntrinsics;
 
-  std::map<Record*, SDNodeInfo, RecordPtrCmp> SDNodes;
-  std::map<Record*, std::pair<Record*, std::string>, RecordPtrCmp> SDNodeXForms;
-  std::map<Record*, ComplexPattern, RecordPtrCmp> ComplexPatterns;
-  std::map<Record*, TreePattern*, RecordPtrCmp> PatternFragments;
-  std::map<Record*, DAGDefaultOperand, RecordPtrCmp> DefaultOperands;
-  std::map<Record*, DAGInstruction, RecordPtrCmp> Instructions;
+  std::map<Record*, SDNodeInfo, LessRecordByID> SDNodes;
+  std::map<Record*, std::pair<Record*, std::string>, LessRecordByID> SDNodeXForms;
+  std::map<Record*, ComplexPattern, LessRecordByID> ComplexPatterns;
+  std::map<Record*, TreePattern*, LessRecordByID> PatternFragments;
+  std::map<Record*, DAGDefaultOperand, LessRecordByID> DefaultOperands;
+  std::map<Record*, DAGInstruction, LessRecordByID> Instructions;
 
   // Specific SDNode definitions:
   Record *intrinsic_void_sdnode;
@@ -708,7 +703,7 @@
     return SDNodeXForms.find(R)->second;
   }
 
-  typedef std::map<Record*, NodeXForm, RecordPtrCmp>::const_iterator
+  typedef std::map<Record*, NodeXForm, LessRecordByID>::const_iterator
           nx_iterator;
   nx_iterator nx_begin() const { return SDNodeXForms.begin(); }
   nx_iterator nx_end() const { return SDNodeXForms.end(); }
@@ -758,7 +753,7 @@
     return PatternFragments.find(R)->second;
   }
 
-  typedef std::map<Record*, TreePattern*, RecordPtrCmp>::const_iterator
+  typedef std::map<Record*, TreePattern*, LessRecordByID>::const_iterator
           pf_iterator;
   pf_iterator pf_begin() const { return PatternFragments.begin(); }
   pf_iterator pf_end() const { return PatternFragments.end(); }

Modified: llvm/branches/R600/utils/TableGen/CodeGenSchedule.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/CodeGenSchedule.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/CodeGenSchedule.cpp (original)
+++ llvm/branches/R600/utils/TableGen/CodeGenSchedule.cpp Fri Sep 21 12:29:50 2012
@@ -270,6 +270,21 @@
   return 0;
 }
 
+bool CodeGenSchedModels::hasReadOfWrite(Record *WriteDef) const {
+  for (unsigned i = 0, e = SchedReads.size(); i < e; ++i) {
+    Record *ReadDef = SchedReads[i].TheDef;
+    if (!ReadDef || !ReadDef->isSubClassOf("ProcReadAdvance"))
+      continue;
+
+    RecVec ValidWrites = ReadDef->getValueAsListOfDefs("ValidWrites");
+    if (std::find(ValidWrites.begin(), ValidWrites.end(), WriteDef)
+        != ValidWrites.end()) {
+      return true;
+    }
+  }
+  return false;
+}
+
 namespace llvm {
 void splitSchedReadWrites(const RecVec &RWDefs,
                           RecVec &WriteDefs, RecVec &ReadDefs) {

Modified: llvm/branches/R600/utils/TableGen/CodeGenSchedule.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/CodeGenSchedule.h?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/CodeGenSchedule.h (original)
+++ llvm/branches/R600/utils/TableGen/CodeGenSchedule.h Fri Sep 21 12:29:50 2012
@@ -284,6 +284,9 @@
 
   unsigned getSchedRWIdx(Record *Def, bool IsRead, unsigned After = 0) const;
 
+  // Return true if the given write record is referenced by a ReadAdvance.
+  bool hasReadOfWrite(Record *WriteDef) const;
+
   // Check if any instructions are assigned to an explicit itinerary class other
   // than NoItinerary.
   bool hasItineraryClasses() const { return NumItineraryClasses > 0; }

Modified: llvm/branches/R600/utils/TableGen/CodeGenTarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/CodeGenTarget.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/CodeGenTarget.cpp (original)
+++ llvm/branches/R600/utils/TableGen/CodeGenTarget.cpp Fri Sep 21 12:29:50 2012
@@ -68,22 +68,30 @@
   case MVT::x86mmx:   return "MVT::x86mmx";
   case MVT::Glue:     return "MVT::Glue";
   case MVT::isVoid:   return "MVT::isVoid";
+  case MVT::v2i1:     return "MVT::v2i1";
+  case MVT::v4i1:     return "MVT::v4i1";
+  case MVT::v8i1:     return "MVT::v8i1";
+  case MVT::v16i1:    return "MVT::v16i1";
   case MVT::v2i8:     return "MVT::v2i8";
   case MVT::v4i8:     return "MVT::v4i8";
   case MVT::v8i8:     return "MVT::v8i8";
   case MVT::v16i8:    return "MVT::v16i8";
   case MVT::v32i8:    return "MVT::v32i8";
+  case MVT::v1i16:    return "MVT::v1i16";
   case MVT::v2i16:    return "MVT::v2i16";
   case MVT::v4i16:    return "MVT::v4i16";
   case MVT::v8i16:    return "MVT::v8i16";
   case MVT::v16i16:   return "MVT::v16i16";
+  case MVT::v1i32:    return "MVT::v1i32";
   case MVT::v2i32:    return "MVT::v2i32";
   case MVT::v4i32:    return "MVT::v4i32";
   case MVT::v8i32:    return "MVT::v8i32";
+  case MVT::v16i32:   return "MVT::v16i32";
   case MVT::v1i64:    return "MVT::v1i64";
   case MVT::v2i64:    return "MVT::v2i64";
   case MVT::v4i64:    return "MVT::v4i64";
   case MVT::v8i64:    return "MVT::v8i64";
+  case MVT::v16i64:   return "MVT::v16i64";
   case MVT::v2f16:    return "MVT::v2f16";
   case MVT::v2f32:    return "MVT::v2f32";
   case MVT::v4f32:    return "MVT::v4f32";

Modified: llvm/branches/R600/utils/TableGen/SubtargetEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/SubtargetEmitter.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/SubtargetEmitter.cpp (original)
+++ llvm/branches/R600/utils/TableGen/SubtargetEmitter.cpp Fri Sep 21 12:29:50 2012
@@ -36,6 +36,7 @@
     std::vector<std::vector<MCSchedClassDesc> > ProcSchedClasses;
     std::vector<MCWriteProcResEntry> WriteProcResources;
     std::vector<MCWriteLatencyEntry> WriteLatencies;
+    std::vector<std::string> WriterNames;
     std::vector<MCReadAdvanceEntry> ReadAdvanceEntries;
 
     // Reserve an invalid entry at index 0
@@ -43,6 +44,7 @@
       ProcSchedClasses.resize(1);
       WriteProcResources.resize(1);
       WriteLatencies.resize(1);
+      WriterNames.push_back("InvalidWrite");
       ReadAdvanceEntries.resize(1);
     }
   };
@@ -774,6 +776,7 @@
     // Sum resources across all operand writes.
     std::vector<MCWriteProcResEntry> WriteProcResources;
     std::vector<MCWriteLatencyEntry> WriteLatencies;
+    std::vector<std::string> WriterNames;
     std::vector<MCReadAdvanceEntry> ReadAdvanceEntries;
     for (IdxIter WI = Writes.begin(), WE = Writes.end(); WI != WE; ++WI) {
       IdxVec WriteSeq;
@@ -782,7 +785,14 @@
       // For each operand, create a latency entry.
       MCWriteLatencyEntry WLEntry;
       WLEntry.Cycles = 0;
-      WLEntry.WriteResourceID = WriteSeq.back();
+      unsigned WriteID = WriteSeq.back();
+      WriterNames.push_back(SchedModels.getSchedWrite(WriteID).Name);
+      // If this Write is not referenced by a ReadAdvance, don't distinguish it
+      // from other WriteLatency entries.
+      if (!SchedModels.hasReadOfWrite(SchedModels.getSchedWrite(WriteID).TheDef)) {
+        WriteID = 0;
+      }
+      WLEntry.WriteResourceID = WriteID;
 
       for (IdxIter WSI = WriteSeq.begin(), WSE = WriteSeq.end();
            WSI != WSE; ++WSI) {
@@ -881,12 +891,22 @@
       std::search(SchedTables.WriteLatencies.begin(),
                   SchedTables.WriteLatencies.end(),
                   WriteLatencies.begin(), WriteLatencies.end());
-    if (WLPos != SchedTables.WriteLatencies.end())
-      SCDesc.WriteLatencyIdx = WLPos - SchedTables.WriteLatencies.begin();
+    if (WLPos != SchedTables.WriteLatencies.end()) {
+      unsigned idx = WLPos - SchedTables.WriteLatencies.begin();
+      SCDesc.WriteLatencyIdx = idx;
+      for (unsigned i = 0, e = WriteLatencies.size(); i < e; ++i)
+        if (SchedTables.WriterNames[idx + i].find(WriterNames[i]) ==
+            std::string::npos) {
+          SchedTables.WriterNames[idx + i] += std::string("_") + WriterNames[i];
+        }
+    }
     else {
       SCDesc.WriteLatencyIdx = SchedTables.WriteLatencies.size();
-      SchedTables.WriteLatencies.insert(WLPos, WriteLatencies.begin(),
-                                     WriteLatencies.end());
+      SchedTables.WriteLatencies.insert(SchedTables.WriteLatencies.end(),
+                                        WriteLatencies.begin(),
+                                        WriteLatencies.end());
+      SchedTables.WriterNames.insert(SchedTables.WriterNames.end(),
+                                     WriterNames.begin(), WriterNames.end());
     }
     // ReadAdvanceEntries must remain in operand order.
     SCDesc.NumReadAdvanceEntries = ReadAdvanceEntries.size();
@@ -935,8 +955,7 @@
        << format("%2d", WLEntry.WriteResourceID) << "}";
     if (WLIdx + 1 < WLEnd)
       OS << ',';
-    OS << " // #" << WLIdx << " "
-       << SchedModels.getSchedWrite(WLEntry.WriteResourceID).Name << '\n';
+    OS << " // #" << WLIdx << " " << SchedTables.WriterNames[WLIdx] << '\n';
   }
   OS << "}; // " << Target << "WriteLatencyTable\n";
 

Modified: llvm/branches/R600/utils/TableGen/X86RecognizableInstr.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/X86RecognizableInstr.cpp?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/X86RecognizableInstr.cpp (original)
+++ llvm/branches/R600/utils/TableGen/X86RecognizableInstr.cpp Fri Sep 21 12:29:50 2012
@@ -244,7 +244,7 @@
   IsSSE            = (HasOpSizePrefix && (Name.find("16") == Name.npos)) ||
                      (Name.find("CRC32") != Name.npos);
   HasFROperands    = hasFROperands();
-  HasVEX_LPrefix   = has256BitOperands() || Rec->getValueAsBit("hasVEX_L");
+  HasVEX_LPrefix   = Rec->getValueAsBit("hasVEX_L");
 
   // Check for 64-bit inst which does not require REX
   Is32Bit = false;
@@ -479,20 +479,6 @@
   return false;
 }
 
-bool RecognizableInstr::has256BitOperands() const {
-  const std::vector<CGIOperandList::OperandInfo> &OperandList = *Operands;
-  unsigned numOperands = OperandList.size();
-
-  for (unsigned operandIndex = 0; operandIndex < numOperands; ++operandIndex) {
-    const std::string &recName = OperandList[operandIndex].Rec->getName();
-
-    if (!recName.compare("VR256")) {
-      return true;
-    }
-  }
-  return false;
-}
-
 void RecognizableInstr::handleOperand(bool optional, unsigned &operandIndex,
                                       unsigned &physicalOperandIndex,
                                       unsigned &numPhysicalOperands,

Modified: llvm/branches/R600/utils/TableGen/X86RecognizableInstr.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/utils/TableGen/X86RecognizableInstr.h?rev=164384&r1=164383&r2=164384&view=diff
==============================================================================
--- llvm/branches/R600/utils/TableGen/X86RecognizableInstr.h (original)
+++ llvm/branches/R600/utils/TableGen/X86RecognizableInstr.h Fri Sep 21 12:29:50 2012
@@ -127,10 +127,7 @@
 
   /// hasFROperands - Returns true if any operand is a FR operand.
   bool hasFROperands() const;
-  
-  /// has256BitOperands - Returns true if any operand is a 256-bit SSE operand.
-  bool has256BitOperands() const;
-  
+
   /// typeFromString - Translates an operand type from the string provided in
   ///   the LLVM tables to an OperandType for use in the operand specifier.
   ///