[llvm-branch-commits] [llvm-branch] r165011 - in /llvm/branches/R600: ./ autoconf/ docs/ docs/_themes/llvm-theme/static/ include/llvm/ include/llvm/ADT/ include/llvm/CodeGen/ include/llvm/Config/ include/llvm/ExecutionEngine/ include/llvm/MC/ include/llvm/Object/ include/llvm/Support/ include/llvm/Target/ include/llvm/Transforms/Utils/ lib/Analysis/ lib/Analysis/IPA/ lib/AsmParser/ lib/Bitcode/Reader/ lib/Bitcode/Writer/ lib/CodeGen/ lib/CodeGen/SelectionDAG/ lib/ExecutionEngine/IntelJITEvents/ lib/MC/ lib/Support/ lib/Suppor...
Tom Stellard
thomas.stellard at amd.com
Tue Oct 2 07:15:34 PDT 2012
Author: tstellar
Date: Tue Oct 2 09:15:33 2012
New Revision: 165011
URL: http://llvm.org/viewvc/llvm-project?rev=165011&view=rev
Log:
Merge master branch
Added:
llvm/branches/R600/docs/HowToBuildOnARM.rst
llvm/branches/R600/docs/HowToSubmitABug.rst
llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h
- copied, changed from r165010, llvm/branches/R600/include/llvm/ExecutionEngine/IntelJITEventsWrapper.h
llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h
llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h
llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c
llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h
llvm/branches/R600/lib/Target/ARM/ARMScheduleSwift.td
llvm/branches/R600/lib/VMCore/AttributesImpl.h
llvm/branches/R600/test/Analysis/CallGraph/do-nothing-intrinsic.ll
llvm/branches/R600/test/CodeGen/ARM/2012-05-04-vmov.ll
llvm/branches/R600/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll
llvm/branches/R600/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll
llvm/branches/R600/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll
llvm/branches/R600/test/CodeGen/ARM/atomicrmw_minmax.ll
llvm/branches/R600/test/CodeGen/ARM/call-noret.ll
llvm/branches/R600/test/CodeGen/ARM/ifcvt12.ll
llvm/branches/R600/test/CodeGen/ARM/neon-fma.ll
llvm/branches/R600/test/CodeGen/Mips/dsp-r1.ll
llvm/branches/R600/test/CodeGen/Mips/dsp-r2.ll
llvm/branches/R600/test/CodeGen/Mips/vector-load-store.ll
llvm/branches/R600/test/CodeGen/X86/2012-09-28-CGPBug.ll
llvm/branches/R600/test/CodeGen/X86/mulx32.ll
llvm/branches/R600/test/CodeGen/X86/mulx64.ll
llvm/branches/R600/test/CodeGen/X86/shift-bmi2.ll
llvm/branches/R600/test/MC/MachO/i386-large-relocations.s
llvm/branches/R600/test/Transforms/InstCombine/memcpy_chk-1.ll
llvm/branches/R600/test/Transforms/InstCombine/memcpy_chk-2.ll
llvm/branches/R600/test/Transforms/InstCombine/memmove_chk-1.ll
llvm/branches/R600/test/Transforms/InstCombine/memmove_chk-2.ll
llvm/branches/R600/test/Transforms/InstCombine/memset_chk-1.ll
llvm/branches/R600/test/Transforms/InstCombine/memset_chk-2.ll
llvm/branches/R600/test/Transforms/InstCombine/strcpy_chk-1.ll
llvm/branches/R600/test/Transforms/InstCombine/strcpy_chk-2.ll
llvm/branches/R600/test/Transforms/InstCombine/strncpy_chk-1.ll
llvm/branches/R600/test/Transforms/InstCombine/strncpy_chk-2.ll
llvm/branches/R600/test/Transforms/PhaseOrdering/gdce.ll
llvm/branches/R600/test/Transforms/SROA/alignment.ll
Removed:
llvm/branches/R600/docs/HowToSubmitABug.html
llvm/branches/R600/include/llvm/ExecutionEngine/IntelJITEventsWrapper.h
llvm/branches/R600/test/Transforms/InstCombine/memset_chk.ll
llvm/branches/R600/test/Transforms/InstCombine/strcpy_chk.ll
Modified:
llvm/branches/R600/CMakeLists.txt
llvm/branches/R600/autoconf/configure.ac
llvm/branches/R600/configure
llvm/branches/R600/docs/AliasAnalysis.rst
llvm/branches/R600/docs/CMake.rst
llvm/branches/R600/docs/CodeGenerator.rst
llvm/branches/R600/docs/CodingStandards.rst
llvm/branches/R600/docs/_themes/llvm-theme/static/llvm-theme.css
llvm/branches/R600/docs/index.rst
llvm/branches/R600/docs/userguides.rst
llvm/branches/R600/include/llvm/ADT/PackedVector.h
llvm/branches/R600/include/llvm/ADT/SparseBitVector.h
llvm/branches/R600/include/llvm/Attributes.h
llvm/branches/R600/include/llvm/CodeGen/MachineModuleInfoImpls.h
llvm/branches/R600/include/llvm/CodeGen/ValueTypes.h
llvm/branches/R600/include/llvm/Config/config.h.cmake
llvm/branches/R600/include/llvm/Function.h
llvm/branches/R600/include/llvm/IRBuilder.h
llvm/branches/R600/include/llvm/MC/MCExpr.h
llvm/branches/R600/include/llvm/MC/MCObjectStreamer.h
llvm/branches/R600/include/llvm/Object/MachOFormat.h
llvm/branches/R600/include/llvm/Operator.h
llvm/branches/R600/include/llvm/Support/TargetFolder.h
llvm/branches/R600/include/llvm/Target/TargetData.h
llvm/branches/R600/include/llvm/Target/TargetLowering.h
llvm/branches/R600/include/llvm/Transforms/Utils/IntegerDivision.h
llvm/branches/R600/include/llvm/Transforms/Utils/ValueMapper.h
llvm/branches/R600/lib/Analysis/CodeMetrics.cpp
llvm/branches/R600/lib/Analysis/IPA/CallGraph.cpp
llvm/branches/R600/lib/Analysis/InlineCost.cpp
llvm/branches/R600/lib/Analysis/Lint.cpp
llvm/branches/R600/lib/Analysis/MemoryDependenceAnalysis.cpp
llvm/branches/R600/lib/AsmParser/LLParser.cpp
llvm/branches/R600/lib/Bitcode/Reader/BitcodeReader.cpp
llvm/branches/R600/lib/Bitcode/Writer/BitcodeWriter.cpp
llvm/branches/R600/lib/CodeGen/BranchFolding.cpp
llvm/branches/R600/lib/CodeGen/CodePlacementOpt.cpp
llvm/branches/R600/lib/CodeGen/LiveInterval.cpp
llvm/branches/R600/lib/CodeGen/LiveRangeEdit.cpp
llvm/branches/R600/lib/CodeGen/MachineBlockPlacement.cpp
llvm/branches/R600/lib/CodeGen/MachineFunction.cpp
llvm/branches/R600/lib/CodeGen/MachineModuleInfoImpls.cpp
llvm/branches/R600/lib/CodeGen/PrologEpilogInserter.cpp
llvm/branches/R600/lib/CodeGen/RegisterCoalescer.cpp
llvm/branches/R600/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/branches/R600/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/branches/R600/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/branches/R600/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/branches/R600/lib/CodeGen/StackProtector.cpp
llvm/branches/R600/lib/CodeGen/TailDuplication.cpp
llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/Makefile
llvm/branches/R600/lib/MC/MCExpr.cpp
llvm/branches/R600/lib/MC/MCObjectStreamer.cpp
llvm/branches/R600/lib/Support/APFloat.cpp
llvm/branches/R600/lib/Support/Errno.cpp
llvm/branches/R600/lib/Support/Host.cpp
llvm/branches/R600/lib/Support/Unix/Path.inc
llvm/branches/R600/lib/Support/Unix/Signals.inc
llvm/branches/R600/lib/Support/YAMLParser.cpp
llvm/branches/R600/lib/Target/ARM/ARM.h
llvm/branches/R600/lib/Target/ARM/ARM.td
llvm/branches/R600/lib/Target/ARM/ARMBaseInstrInfo.cpp
llvm/branches/R600/lib/Target/ARM/ARMBaseInstrInfo.h
llvm/branches/R600/lib/Target/ARM/ARMBaseRegisterInfo.cpp
llvm/branches/R600/lib/Target/ARM/ARMFastISel.cpp
llvm/branches/R600/lib/Target/ARM/ARMFrameLowering.cpp
llvm/branches/R600/lib/Target/ARM/ARMISelDAGToDAG.cpp
llvm/branches/R600/lib/Target/ARM/ARMISelLowering.cpp
llvm/branches/R600/lib/Target/ARM/ARMInstrFormats.td
llvm/branches/R600/lib/Target/ARM/ARMInstrInfo.cpp
llvm/branches/R600/lib/Target/ARM/ARMInstrInfo.td
llvm/branches/R600/lib/Target/ARM/ARMInstrNEON.td
llvm/branches/R600/lib/Target/ARM/ARMInstrThumb2.td
llvm/branches/R600/lib/Target/ARM/ARMInstrVFP.td
llvm/branches/R600/lib/Target/ARM/ARMMachineFunctionInfo.h
llvm/branches/R600/lib/Target/ARM/ARMRegisterInfo.td
llvm/branches/R600/lib/Target/ARM/ARMSchedule.td
llvm/branches/R600/lib/Target/ARM/ARMSubtarget.cpp
llvm/branches/R600/lib/Target/ARM/ARMSubtarget.h
llvm/branches/R600/lib/Target/ARM/ARMTargetMachine.cpp
llvm/branches/R600/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
llvm/branches/R600/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
llvm/branches/R600/lib/Target/ARM/MLxExpansionPass.cpp
llvm/branches/R600/lib/Target/Hexagon/HexagonISelLowering.cpp
llvm/branches/R600/lib/Target/Mips/CMakeLists.txt
llvm/branches/R600/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
llvm/branches/R600/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
llvm/branches/R600/lib/Target/Mips/Makefile
llvm/branches/R600/lib/Target/Mips/Mips16InstrInfo.cpp
llvm/branches/R600/lib/Target/Mips/Mips16InstrInfo.td
llvm/branches/R600/lib/Target/Mips/Mips16RegisterInfo.cpp
llvm/branches/R600/lib/Target/Mips/MipsAsmPrinter.cpp
llvm/branches/R600/lib/Target/Mips/MipsAsmPrinter.h
llvm/branches/R600/lib/Target/Mips/MipsDSPInstrFormats.td
llvm/branches/R600/lib/Target/Mips/MipsDSPInstrInfo.td
llvm/branches/R600/lib/Target/Mips/MipsISelLowering.cpp
llvm/branches/R600/lib/Target/Mips/MipsISelLowering.h
llvm/branches/R600/lib/Target/Mips/MipsInstrInfo.td
llvm/branches/R600/lib/Target/Mips/MipsMCInstLower.h
llvm/branches/R600/lib/Target/Mips/MipsMachineFunction.h
llvm/branches/R600/lib/Target/Mips/MipsRegisterInfo.td
llvm/branches/R600/lib/Target/Mips/MipsSERegisterInfo.cpp
llvm/branches/R600/lib/Target/Mips/MipsSubtarget.cpp
llvm/branches/R600/lib/Target/PowerPC/PPCFrameLowering.cpp
llvm/branches/R600/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/branches/R600/lib/Target/PowerPC/PPCRegisterInfo.cpp
llvm/branches/R600/lib/Target/TargetData.cpp
llvm/branches/R600/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
llvm/branches/R600/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
llvm/branches/R600/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
llvm/branches/R600/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
llvm/branches/R600/lib/Target/X86/X86AsmPrinter.cpp
llvm/branches/R600/lib/Target/X86/X86FrameLowering.cpp
llvm/branches/R600/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/branches/R600/lib/Target/X86/X86ISelLowering.cpp
llvm/branches/R600/lib/Target/X86/X86InstrCompiler.td
llvm/branches/R600/lib/Target/X86/X86InstrInfo.cpp
llvm/branches/R600/lib/Target/X86/X86InstrInfo.td
llvm/branches/R600/lib/Target/X86/X86InstrShiftRotate.td
llvm/branches/R600/lib/Target/X86/X86RegisterInfo.cpp
llvm/branches/R600/lib/Target/X86/X86RegisterInfo.h
llvm/branches/R600/lib/Target/X86/X86Subtarget.h
llvm/branches/R600/lib/Transforms/IPO/ArgumentPromotion.cpp
llvm/branches/R600/lib/Transforms/IPO/DeadArgumentElimination.cpp
llvm/branches/R600/lib/Transforms/IPO/GlobalOpt.cpp
llvm/branches/R600/lib/Transforms/IPO/InlineAlways.cpp
llvm/branches/R600/lib/Transforms/IPO/Inliner.cpp
llvm/branches/R600/lib/Transforms/IPO/PassManagerBuilder.cpp
llvm/branches/R600/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/branches/R600/lib/Transforms/InstCombine/InstCombineSelect.cpp
llvm/branches/R600/lib/Transforms/Instrumentation/AddressSanitizer.cpp
llvm/branches/R600/lib/Transforms/Scalar/CodeGenPrepare.cpp
llvm/branches/R600/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
llvm/branches/R600/lib/Transforms/Scalar/LoopUnrollPass.cpp
llvm/branches/R600/lib/Transforms/Scalar/LoopUnswitch.cpp
llvm/branches/R600/lib/Transforms/Scalar/SROA.cpp
llvm/branches/R600/lib/Transforms/Utils/IntegerDivision.cpp
llvm/branches/R600/lib/Transforms/Utils/SimplifyCFG.cpp
llvm/branches/R600/lib/Transforms/Utils/ValueMapper.cpp
llvm/branches/R600/lib/VMCore/Attributes.cpp
llvm/branches/R600/lib/VMCore/Function.cpp
llvm/branches/R600/lib/VMCore/IRBuilder.cpp
llvm/branches/R600/lib/VMCore/LLVMContextImpl.cpp
llvm/branches/R600/lib/VMCore/LLVMContextImpl.h
llvm/branches/R600/lib/VMCore/ValueTypes.cpp
llvm/branches/R600/lib/VMCore/Verifier.cpp
llvm/branches/R600/test/CodeGen/ARM/2010-12-07-PEIBug.ll
llvm/branches/R600/test/CodeGen/ARM/avoid-cpsr-rmw.ll
llvm/branches/R600/test/CodeGen/ARM/div.ll
llvm/branches/R600/test/CodeGen/ARM/domain-conv-vmovs.ll
llvm/branches/R600/test/CodeGen/ARM/fabss.ll
llvm/branches/R600/test/CodeGen/ARM/fadds.ll
llvm/branches/R600/test/CodeGen/ARM/fast-isel-pic.ll
llvm/branches/R600/test/CodeGen/ARM/fdivs.ll
llvm/branches/R600/test/CodeGen/ARM/fmuls.ll
llvm/branches/R600/test/CodeGen/ARM/fp_convert.ll
llvm/branches/R600/test/CodeGen/ARM/fsubs.ll
llvm/branches/R600/test/CodeGen/ARM/ifcvt1.ll
llvm/branches/R600/test/CodeGen/ARM/ifcvt5.ll
llvm/branches/R600/test/CodeGen/ARM/ldr_post.ll
llvm/branches/R600/test/CodeGen/ARM/ldr_pre.ll
llvm/branches/R600/test/CodeGen/ARM/mls.ll
llvm/branches/R600/test/CodeGen/ARM/neon_ld2.ll
llvm/branches/R600/test/CodeGen/ARM/opt-shuff-tstore.ll
llvm/branches/R600/test/CodeGen/ARM/reg_sequence.ll
llvm/branches/R600/test/CodeGen/ARM/subreg-remat.ll
llvm/branches/R600/test/CodeGen/Thumb2/cortex-fp.ll
llvm/branches/R600/test/CodeGen/Thumb2/div.ll
llvm/branches/R600/test/CodeGen/Thumb2/thumb2-mla.ll
llvm/branches/R600/test/CodeGen/Thumb2/thumb2-smla.ll
llvm/branches/R600/test/CodeGen/X86/crash.ll
llvm/branches/R600/test/CodeGen/X86/jump_sign.ll
llvm/branches/R600/test/CodeGen/X86/phys_subreg_coalesce-3.ll
llvm/branches/R600/test/CodeGen/X86/ptr-rotate.ll
llvm/branches/R600/test/CodeGen/X86/rot32.ll
llvm/branches/R600/test/CodeGen/X86/rot64.ll
llvm/branches/R600/test/CodeGen/X86/rotate2.ll
llvm/branches/R600/test/CodeGen/X86/targetLoweringGeneric.ll
llvm/branches/R600/test/DebugInfo/bug_null_debuginfo.ll
llvm/branches/R600/test/Other/lint.ll
llvm/branches/R600/test/Transforms/CorrelatedValuePropagation/crash.ll
llvm/branches/R600/test/Transforms/GlobalOpt/load-store-global.ll
llvm/branches/R600/test/Transforms/InstCombine/vec_demanded_elts.ll
llvm/branches/R600/test/Transforms/InstCombine/vec_shuffle.ll
llvm/branches/R600/test/Transforms/LoopUnroll/pr11361.ll
llvm/branches/R600/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll
llvm/branches/R600/test/Transforms/SROA/basictest.ll
llvm/branches/R600/test/Transforms/SROA/phi-and-select.ll
llvm/branches/R600/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll
llvm/branches/R600/tools/lli/CMakeLists.txt
llvm/branches/R600/unittests/ADT/APFloatTest.cpp
llvm/branches/R600/unittests/ExecutionEngine/JIT/CMakeLists.txt
llvm/branches/R600/unittests/ExecutionEngine/JIT/IntelJITEventListenerTest.cpp
llvm/branches/R600/unittests/Transforms/Utils/IntegerDivision.cpp
Modified: llvm/branches/R600/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/CMakeLists.txt?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/CMakeLists.txt (original)
+++ llvm/branches/R600/CMakeLists.txt Tue Oct 2 09:15:33 2012
@@ -115,6 +115,11 @@
set(ENABLE_TIMESTAMPS 1)
endif()
+option(LLVM_ENABLE_BACKTRACES "Enable embedding backtraces on crash." ON)
+if(LLVM_ENABLE_BACKTRACES)
+ set(ENABLE_BACKTRACES 1)
+endif()
+
option(LLVM_ENABLE_FFI "Use libffi to call external functions from the interpreter" OFF)
set(FFI_LIBRARY_DIR "" CACHE PATH "Additional directory, where CMake should search for libffi.so")
set(FFI_INCLUDE_DIR "" CACHE PATH "Additional directory, where CMake should search for ffi.h or ffi/ffi.h")
@@ -172,23 +177,7 @@
if( LLVM_USE_INTEL_JITEVENTS )
# Verify we are on a supported platform
- if( CMAKE_SYSTEM_NAME MATCHES "Windows" OR CMAKE_SYSTEM_NAME MATCHES "Linux" )
- # Directory where Intel Parallel Amplifier XE 2011 is installed.
- if ( WIN32 )
- set(LLVM_INTEL_JITEVENTS_DIR $ENV{VTUNE_AMPLIFIER_XE_2011_DIR})
- else ( WIN32 )
- set(LLVM_INTEL_JITEVENTS_DIR "/opt/intel/vtune_amplifier_xe_2011")
- endif ( WIN32 )
-
- # Set include and library search paths for Intel JIT Events API
- set(LLVM_INTEL_JITEVENTS_INCDIR "${LLVM_INTEL_JITEVENTS_DIR}/include")
-
- if ( CMAKE_SIZEOF_VOID_P EQUAL 8 )
- set(LLVM_INTEL_JITEVENTS_LIBDIR "${LLVM_INTEL_JITEVENTS_DIR}/lib64")
- else ( CMAKE_SIZEOF_VOID_P EQUAL 8 )
- set(LLVM_INTEL_JITEVENTS_LIBDIR "${LLVM_INTEL_JITEVENTS_DIR}/lib32")
- endif ( CMAKE_SIZEOF_VOID_P EQUAL 8 )
- else()
+ if( NOT CMAKE_SYSTEM_NAME MATCHES "Windows" AND NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
message(FATAL_ERROR
"Intel JIT API support is available on Linux and Windows only.")
endif()
Modified: llvm/branches/R600/autoconf/configure.ac
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/autoconf/configure.ac?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/autoconf/configure.ac (original)
+++ llvm/branches/R600/autoconf/configure.ac Tue Oct 2 09:15:33 2012
@@ -1290,46 +1290,23 @@
dnl Enable support for Intel JIT Events API.
AC_ARG_WITH(intel-jitevents,
- AS_HELP_STRING([--with-intel-jitevents=<vtune-amplifier-dir>],
- [Specify location of run-time support library for Intel JIT API (default=/opt/intel/vtune_amplifier_xe_2011)]),
+ AS_HELP_STRING([--with-intel-jitevents Notify Intel JIT profiling API of generated code]),
[
+ case "$withval" in
+ yes) AC_SUBST(USE_INTEL_JITEVENTS,[1]);;
+ no) AC_SUBST(USE_INTEL_JITEVENTS,[0]);;
+ *) AC_MSG_ERROR([Invalid setting for --with-intel-jitevents. Use "yes" or "no"]);;
+ esac
+
case $llvm_cv_os_type in
Linux|Win32|Cygwin|MingW) ;;
- *)
- AC_MSG_ERROR([
- Intel JIT API support is available on Linux and Windows only."]) ;;
+ *) AC_MSG_ERROR([Intel JIT API support is available on Linux and Windows only.]);;
esac
- AC_SUBST(USE_INTEL_JITEVENTS, [1])
case "$llvm_cv_target_arch" in
- x86) llvm_intel_jitevents_archdir="lib32";;
- x86_64) llvm_intel_jitevents_archdir="lib64";;
- *) echo "Target architecture $llvm_cv_target_arch does not support Intel JIT Events API"
- exit -1;;
- esac
- INTEL_JITEVENTS_INCDIR="/opt/intel/vtune_amplifier_xe_2011/include"
- INTEL_JITEVENTS_LIBDIR="/opt/intel/vtune_amplifier_xe_2011/$llvm_intel_jitevents_archdir"
- case "$withval" in
- /* | [[A-Za-z]]:[[\\/]]*) INTEL_JITEVENTS_INCDIR=$withval/include
- INTEL_JITEVENTS_LIBDIR=$withval/$llvm_intel_jitevents_archdir ;;
- *) ;;
+ x86|x86_64) ;;
+ *) AC_MSG_ERROR([Target architecture $llvm_cv_target_arch does not support Intel JIT Events API.]);;
esac
-
- AC_SUBST(INTEL_JITEVENTS_INCDIR)
- AC_SUBST(INTEL_JITEVENTS_LIBDIR)
-
- LIBS="$LIBS -L${INTEL_JITEVENTS_LIBDIR}"
- CPPFLAGS="$CPPFLAGS -I$INTEL_JITEVENTS_INCDIR"
-
- AC_SEARCH_LIBS(iJIT_IsProfilingActive, jitprofiling, [], [
- echo "Error! Cannot find libjitprofiling.a. Please check path specified in flag --with-intel-jitevents"
- exit -1
- ])
- AC_CHECK_HEADER([jitprofiling.h], [], [
- echo "Error! Cannot find jitprofiling.h. Please check path specified in flag --with-intel-jitevents"
- exit -1
- ])
-
],
[
AC_SUBST(USE_INTEL_JITEVENTS, [0])
Modified: llvm/branches/R600/configure
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/configure?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/configure (original)
+++ llvm/branches/R600/configure Tue Oct 2 09:15:33 2012
@@ -766,8 +766,6 @@
USE_UDIS86
USE_OPROFILE
USE_INTEL_JITEVENTS
-INTEL_JITEVENTS_INCDIR
-INTEL_JITEVENTS_LIBDIR
XML2CONFIG
LIBXML2_LIBS
LIBXML2_INC
@@ -1462,10 +1460,8 @@
--with-udis86=<path> Use udis86 external x86 disassembler library
--with-oprofile=<prefix>
Tell OProfile >= 0.9.4 how to symbolize JIT output
- --with-intel-jitevents=<vtune-amplifier-dir>
- Specify location of run-time support library for
- Intel JIT API
- (default=/opt/intel/vtune_amplifier_xe_2011)
+ --with-intel-jitevents Notify Intel JIT profiling API of generated code
+
Some influential environment variables:
CC C compiler command
@@ -10316,7 +10312,7 @@
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<EOF
-#line 10319 "configure"
+#line 10315 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -13576,308 +13572,30 @@
# Check whether --with-intel-jitevents was given.
if test "${with_intel_jitevents+set}" = set; then
withval=$with_intel_jitevents;
+ case "$withval" in
+ yes) USE_INTEL_JITEVENTS=1
+;;
+ no) USE_INTEL_JITEVENTS=0
+;;
+ *) { { echo "$as_me:$LINENO: error: Invalid setting for --with-intel-jitevents. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --with-intel-jitevents. Use \"yes\" or \"no\"" >&2;}
+ { (exit 1); exit 1; }; };;
+ esac
+
case $llvm_cv_os_type in
Linux|Win32|Cygwin|MingW) ;;
- *)
- { { echo "$as_me:$LINENO: error:
- Intel JIT API support is available on Linux and Windows only.\"" >&5
-echo "$as_me: error:
- Intel JIT API support is available on Linux and Windows only.\"" >&2;}
- { (exit 1); exit 1; }; } ;;
+ *) { { echo "$as_me:$LINENO: error: Intel JIT API support is available on Linux and Windows only." >&5
+echo "$as_me: error: Intel JIT API support is available on Linux and Windows only." >&2;}
+ { (exit 1); exit 1; }; };;
esac
- USE_INTEL_JITEVENTS=1
-
case "$llvm_cv_target_arch" in
- x86) llvm_intel_jitevents_archdir="lib32";;
- x86_64) llvm_intel_jitevents_archdir="lib64";;
- *) echo "Target architecture $llvm_cv_target_arch does not support Intel JIT Events API"
- exit -1;;
- esac
- INTEL_JITEVENTS_INCDIR="/opt/intel/vtune_amplifier_xe_2011/include"
- INTEL_JITEVENTS_LIBDIR="/opt/intel/vtune_amplifier_xe_2011/$llvm_intel_jitevents_archdir"
- case "$withval" in
- /* | [A-Za-z]:[\\/]*) INTEL_JITEVENTS_INCDIR=$withval/include
- INTEL_JITEVENTS_LIBDIR=$withval/$llvm_intel_jitevents_archdir ;;
- *) ;;
+ x86|x86_64) ;;
+ *) { { echo "$as_me:$LINENO: error: Target architecture $llvm_cv_target_arch does not support Intel JIT Events API." >&5
+echo "$as_me: error: Target architecture $llvm_cv_target_arch does not support Intel JIT Events API." >&2;}
+ { (exit 1); exit 1; }; };;
esac
-
-
-
- LIBS="$LIBS -L${INTEL_JITEVENTS_LIBDIR}"
- CPPFLAGS="$CPPFLAGS -I$INTEL_JITEVENTS_INCDIR"
-
- { echo "$as_me:$LINENO: checking for library containing iJIT_IsProfilingActive" >&5
-echo $ECHO_N "checking for library containing iJIT_IsProfilingActive... $ECHO_C" >&6; }
-if test "${ac_cv_search_iJIT_IsProfilingActive+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- ac_func_search_save_LIBS=$LIBS
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char iJIT_IsProfilingActive ();
-int
-main ()
-{
-return iJIT_IsProfilingActive ();
- ;
- return 0;
-}
-_ACEOF
-for ac_lib in '' jitprofiling; do
- if test -z "$ac_lib"; then
- ac_res="none required"
- else
- ac_res=-l$ac_lib
- LIBS="-l$ac_lib $ac_func_search_save_LIBS"
- fi
- rm -f conftest.$ac_objext conftest$ac_exeext
-if { (ac_try="$ac_link"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
- (eval "$ac_link") 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
- { (case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
- (eval "$ac_try") 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; } &&
- { ac_try='test -s conftest$ac_exeext'
- { (case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
- (eval "$ac_try") 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_search_iJIT_IsProfilingActive=$ac_res
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-
-fi
-
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext
- if test "${ac_cv_search_iJIT_IsProfilingActive+set}" = set; then
- break
-fi
-done
-if test "${ac_cv_search_iJIT_IsProfilingActive+set}" = set; then
- :
-else
- ac_cv_search_iJIT_IsProfilingActive=no
-fi
-rm conftest.$ac_ext
-LIBS=$ac_func_search_save_LIBS
-fi
-{ echo "$as_me:$LINENO: result: $ac_cv_search_iJIT_IsProfilingActive" >&5
-echo "${ECHO_T}$ac_cv_search_iJIT_IsProfilingActive" >&6; }
-ac_res=$ac_cv_search_iJIT_IsProfilingActive
-if test "$ac_res" != no; then
- test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
-
-else
-
- echo "Error! Cannot find libjitprofiling.a. Please check path specified in flag --with-intel-jitevents"
- exit -1
-
-fi
-
- if test "${ac_cv_header_jitprofiling_h+set}" = set; then
- { echo "$as_me:$LINENO: checking for jitprofiling.h" >&5
-echo $ECHO_N "checking for jitprofiling.h... $ECHO_C" >&6; }
-if test "${ac_cv_header_jitprofiling_h+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-fi
-{ echo "$as_me:$LINENO: result: $ac_cv_header_jitprofiling_h" >&5
-echo "${ECHO_T}$ac_cv_header_jitprofiling_h" >&6; }
-else
- # Is the header compilable?
-{ echo "$as_me:$LINENO: checking jitprofiling.h usability" >&5
-echo $ECHO_N "checking jitprofiling.h usability... $ECHO_C" >&6; }
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-#include <jitprofiling.h>
-_ACEOF
-rm -f conftest.$ac_objext
-if { (ac_try="$ac_compile"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
- (eval "$ac_compile") 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
- { (case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
- (eval "$ac_try") 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; } &&
- { ac_try='test -s conftest.$ac_objext'
- { (case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
- (eval "$ac_try") 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_header_compiler=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_header_compiler=no
-fi
-
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
-echo "${ECHO_T}$ac_header_compiler" >&6; }
-
-# Is the header present?
-{ echo "$as_me:$LINENO: checking jitprofiling.h presence" >&5
-echo $ECHO_N "checking jitprofiling.h presence... $ECHO_C" >&6; }
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <jitprofiling.h>
-_ACEOF
-if { (ac_try="$ac_cpp conftest.$ac_ext"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
- (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } >/dev/null; then
- if test -s conftest.err; then
- ac_cpp_err=$ac_c_preproc_warn_flag
- ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
- else
- ac_cpp_err=
- fi
-else
- ac_cpp_err=yes
-fi
-if test -z "$ac_cpp_err"; then
- ac_header_preproc=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_header_preproc=no
-fi
-
-rm -f conftest.err conftest.$ac_ext
-{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
-echo "${ECHO_T}$ac_header_preproc" >&6; }
-
-# So? What about this header?
-case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
- yes:no: )
- { echo "$as_me:$LINENO: WARNING: jitprofiling.h: accepted by the compiler, rejected by the preprocessor!" >&5
-echo "$as_me: WARNING: jitprofiling.h: accepted by the compiler, rejected by the preprocessor!" >&2;}
- { echo "$as_me:$LINENO: WARNING: jitprofiling.h: proceeding with the compiler's result" >&5
-echo "$as_me: WARNING: jitprofiling.h: proceeding with the compiler's result" >&2;}
- ac_header_preproc=yes
- ;;
- no:yes:* )
- { echo "$as_me:$LINENO: WARNING: jitprofiling.h: present but cannot be compiled" >&5
-echo "$as_me: WARNING: jitprofiling.h: present but cannot be compiled" >&2;}
- { echo "$as_me:$LINENO: WARNING: jitprofiling.h: check for missing prerequisite headers?" >&5
-echo "$as_me: WARNING: jitprofiling.h: check for missing prerequisite headers?" >&2;}
- { echo "$as_me:$LINENO: WARNING: jitprofiling.h: see the Autoconf documentation" >&5
-echo "$as_me: WARNING: jitprofiling.h: see the Autoconf documentation" >&2;}
- { echo "$as_me:$LINENO: WARNING: jitprofiling.h: section \"Present But Cannot Be Compiled\"" >&5
-echo "$as_me: WARNING: jitprofiling.h: section \"Present But Cannot Be Compiled\"" >&2;}
- { echo "$as_me:$LINENO: WARNING: jitprofiling.h: proceeding with the preprocessor's result" >&5
-echo "$as_me: WARNING: jitprofiling.h: proceeding with the preprocessor's result" >&2;}
- { echo "$as_me:$LINENO: WARNING: jitprofiling.h: in the future, the compiler will take precedence" >&5
-echo "$as_me: WARNING: jitprofiling.h: in the future, the compiler will take precedence" >&2;}
- ( cat <<\_ASBOX
-## ------------------------------------ ##
-## Report this to http://llvm.org/bugs/ ##
-## ------------------------------------ ##
-_ASBOX
- ) | sed "s/^/$as_me: WARNING: /" >&2
- ;;
-esac
-{ echo "$as_me:$LINENO: checking for jitprofiling.h" >&5
-echo $ECHO_N "checking for jitprofiling.h... $ECHO_C" >&6; }
-if test "${ac_cv_header_jitprofiling_h+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- ac_cv_header_jitprofiling_h=$ac_header_preproc
-fi
-{ echo "$as_me:$LINENO: result: $ac_cv_header_jitprofiling_h" >&5
-echo "${ECHO_T}$ac_cv_header_jitprofiling_h" >&6; }
-
-fi
-if test $ac_cv_header_jitprofiling_h = yes; then
- :
-else
-
- echo "Error! Cannot find jitprofiling.h. Please check path specified in flag --with-intel-jitevents"
- exit -1
-
-fi
-
-
-
-
else
USE_INTEL_JITEVENTS=0
@@ -22308,8 +22026,6 @@
USE_UDIS86!$USE_UDIS86$ac_delim
USE_OPROFILE!$USE_OPROFILE$ac_delim
USE_INTEL_JITEVENTS!$USE_INTEL_JITEVENTS$ac_delim
-INTEL_JITEVENTS_INCDIR!$INTEL_JITEVENTS_INCDIR$ac_delim
-INTEL_JITEVENTS_LIBDIR!$INTEL_JITEVENTS_LIBDIR$ac_delim
XML2CONFIG!$XML2CONFIG$ac_delim
LIBXML2_LIBS!$LIBXML2_LIBS$ac_delim
LIBXML2_INC!$LIBXML2_INC$ac_delim
@@ -22338,7 +22054,7 @@
LTLIBOBJS!$LTLIBOBJS$ac_delim
_ACEOF
- if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 95; then
+ if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 93; then
break
elif $ac_last_try; then
{ { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
Modified: llvm/branches/R600/docs/AliasAnalysis.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/docs/AliasAnalysis.rst?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/docs/AliasAnalysis.rst (original)
+++ llvm/branches/R600/docs/AliasAnalysis.rst Tue Oct 2 09:15:33 2012
@@ -230,7 +230,7 @@
.. code-block:: c++
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const {
AliasAnalysis::getAnalysisUsage(AU);
// declare your dependencies here.
}
Modified: llvm/branches/R600/docs/CMake.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/docs/CMake.rst?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/docs/CMake.rst (original)
+++ llvm/branches/R600/docs/CMake.rst Tue Oct 2 09:15:33 2012
@@ -273,11 +273,6 @@
**LLVM_USE_INTEL_JITEVENTS**:BOOL
Enable building support for Intel JIT Events API. Defaults to OFF
-**LLVM_INTEL_JITEVENTS_DIR**:PATH
- Path to installation of Intel(R) VTune(TM) Amplifier XE 2011, used to locate
- the ``jitprofiling`` library. Default = ``%VTUNE_AMPLIFIER_XE_2011_DIR%``
- (Windows) | ``/opt/intel/vtune_amplifier_xe_2011`` (Linux)
-
Executing the test suite
========================
Modified: llvm/branches/R600/docs/CodeGenerator.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/docs/CodeGenerator.rst?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/docs/CodeGenerator.rst (original)
+++ llvm/branches/R600/docs/CodeGenerator.rst Tue Oct 2 09:15:33 2012
@@ -390,7 +390,7 @@
MachineInstr *MI = BuildMI(X86::MOV32ri, 1, DestReg).addImm(42);
// Create the same instr, but insert it at the end of a basic block.
- MachineBasicBlock &MBB = ...
+ MachineBasicBlock &MBB = ...
BuildMI(MBB, X86::MOV32ri, 1, DestReg).addImm(42);
// Create the same instr, but insert it before a specified iterator point.
@@ -404,7 +404,7 @@
MI = BuildMI(X86::SAHF, 0);
// Create a self looping branch instruction.
- BuildMI(MBB, X86::JNE, 1).addMBB(&MBB);
+ BuildMI(MBB, X86::JNE, 1).addMBB(&MBB);
The key thing to remember with the ``BuildMI`` functions is that you have to
specify the number of operands that the machine instruction will take. This
Modified: llvm/branches/R600/docs/CodingStandards.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/docs/CodingStandards.rst?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/docs/CodingStandards.rst (original)
+++ llvm/branches/R600/docs/CodingStandards.rst Tue Oct 2 09:15:33 2012
@@ -714,7 +714,7 @@
.. code-block:: c++
inline Value *getOperand(unsigned i) {
- assert(i < Operands.size() && "getOperand() out of range!");
+ assert(i < Operands.size() && "getOperand() out of range!");
return Operands[i];
}
Added: llvm/branches/R600/docs/HowToBuildOnARM.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/docs/HowToBuildOnARM.rst?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/docs/HowToBuildOnARM.rst (added)
+++ llvm/branches/R600/docs/HowToBuildOnARM.rst Tue Oct 2 09:15:33 2012
@@ -0,0 +1,34 @@
+.. _how_to_build_on_arm:
+
+===================================================================
+How To Build On ARM
+===================================================================
+
+.. sectionauthor:: Wei-Ren Chen (é³éä»») <chenwj at iis.sinica.edu.tw>
+
+Introduction
+============
+
+This document contains information about building/testing LLVM and
+Clang on ARM.
+
+Notes On Building LLVM/Clang on ARM
+=====================================
+Here are some notes on building/testing LLVM/Clang on ARM. Note that
+ARM encompasses a wide variety of CPUs; this advice is primarily based
+on the ARMv6 and ARMv7 architectures and may be inapplicable to older chips.
+
+#. If you are building LLVM/Clang on an ARM board with 1G of memory or less,
+ please use ``gold`` rather then GNU ``ld``.
+ Building LLVM/Clang with ``--enable-optimized``
+ is prefered since it consumes less memory. Otherwise, the building
+ process will very likely fail due to insufficient memory. In any
+ case it is probably a good idea to set up a swap partition.
+
+#. If you want to run ``make
+ check-all`` after building LLVM/Clang, to avoid false alarms (eg, ARCMT
+ failure) please use the following configuration:
+
+ .. code-block:: bash
+
+ $ ../$LLVM_SRC_DIR/configure --with-abi=aapcs
Removed: llvm/branches/R600/docs/HowToSubmitABug.html
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/docs/HowToSubmitABug.html?rev=165010&view=auto
==============================================================================
--- llvm/branches/R600/docs/HowToSubmitABug.html (original)
+++ llvm/branches/R600/docs/HowToSubmitABug.html (removed)
@@ -1,345 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
- "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
- <title>How to submit an LLVM bug report</title>
- <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-</head>
-<body>
-
-<h1>
- How to submit an LLVM bug report
-</h1>
-
-<table class="layout" style="width: 90%" >
-<tr class="layout">
- <td class="left">
-<ol>
- <li><a href="#introduction">Introduction - Got bugs?</a></li>
- <li><a href="#crashers">Crashing Bugs</a>
- <ul>
- <li><a href="#front-end">Front-end bugs</a>
- <li><a href="#ct_optimizer">Compile-time optimization bugs</a>
- <li><a href="#ct_codegen">Code generator bugs</a>
- </ul></li>
- <li><a href="#miscompilations">Miscompilations</a></li>
- <li><a href="#codegen">Incorrect code generation (JIT and LLC)</a></li>
-</ol>
-<div class="doc_author">
- <p>Written by <a href="mailto:sabre at nondot.org">Chris Lattner</a> and
- <a href="http://misha.brukman.net">Misha Brukman</a></p>
-</div>
-</td>
-</tr>
-</table>
-
-<!-- *********************************************************************** -->
-<h2>
- <a name="introduction">Introduction - Got bugs?</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>If you're working with LLVM and run into a bug, we definitely want to know
-about it. This document describes what you can do to increase the odds of
-getting it fixed quickly.</p>
-
-<p>Basically you have to do two things at a minimum. First, decide whether the
-bug <a href="#crashers">crashes the compiler</a> (or an LLVM pass), or if the
-compiler is <a href="#miscompilations">miscompiling</a> the program (i.e., the
-compiler successfully produces an executable, but it doesn't run right). Based
-on
-what type of bug it is, follow the instructions in the linked section to narrow
-down the bug so that the person who fixes it will be able to find the problem
-more easily.</p>
-
-<p>Once you have a reduced test-case, go to <a
-href="http://llvm.org/bugs/enter_bug.cgi">the LLVM Bug Tracking
-System</a> and fill out the form with the necessary details (note that you don't
-need to pick a category, just use the "new-bugs" category if you're not sure).
-The bug description should contain the following
-information:</p>
-
-<ul>
- <li>All information necessary to reproduce the problem.</li>
- <li>The reduced test-case that triggers the bug.</li>
- <li>The location where you obtained LLVM (if not from our Subversion
- repository).</li>
-</ul>
-
-<p>Thanks for helping us make LLVM better!</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
- <a name="crashers">Crashing Bugs</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>More often than not, bugs in the compiler cause it to crash—often due
-to an assertion failure of some sort. The most important
-piece of the puzzle is to figure out if it is crashing in the GCC front-end
-or if it is one of the LLVM libraries (e.g. the optimizer or code generator)
-that has problems.</p>
-
-<p>To figure out which component is crashing (the front-end,
-optimizer or code generator), run the
-<tt><b>llvm-gcc</b></tt> command line as you were when the crash occurred, but
-with the following extra command line options:</p>
-
-<ul>
- <li><tt><b>-O0 -emit-llvm</b></tt>: If <tt>llvm-gcc</tt> still crashes when
- passed these options (which disable the optimizer and code generator), then
- the crash is in the front-end. Jump ahead to the section on <a
- href="#front-end">front-end bugs</a>.</li>
-
- <li><tt><b>-emit-llvm</b></tt>: If <tt>llvm-gcc</tt> crashes with this option
- (which disables the code generator), you found an optimizer bug. Jump ahead
- to <a href="#ct_optimizer"> compile-time optimization bugs</a>.</li>
-
- <li>Otherwise, you have a code generator crash. Jump ahead to <a
- href="#ct_codegen">code generator bugs</a>.</li>
-
-</ul>
-
-<!-- ======================================================================= -->
-<h3>
- <a name="front-end">Front-end bugs</a>
-</h3>
-
-<div>
-
-<p>If the problem is in the front-end, you should re-run the same
-<tt>llvm-gcc</tt> command that resulted in the crash, but add the
-<tt>-save-temps</tt> option. The compiler will crash again, but it will leave
-behind a <tt><i>foo</i>.i</tt> file (containing preprocessed C source code) and
-possibly <tt><i>foo</i>.s</tt> for each
-compiled <tt><i>foo</i>.c</tt> file. Send us the <tt><i>foo</i>.i</tt> file,
-along with the options you passed to llvm-gcc, and a brief description of the
-error it caused.</p>
-
-<p>The <a href="http://delta.tigris.org/">delta</a> tool helps to reduce the
-preprocessed file down to the smallest amount of code that still replicates the
-problem. You're encouraged to use delta to reduce the code to make the
-developers' lives easier. <a
-href="http://gcc.gnu.org/wiki/A_guide_to_testcase_reduction">This website</a>
-has instructions on the best way to use delta.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
- <a name="ct_optimizer">Compile-time optimization bugs</a>
-</h3>
-
-<div>
-
-<p>If you find that a bug crashes in the optimizer, compile your test-case to a
-<tt>.bc</tt> file by passing "<tt><b>-emit-llvm -O0 -c -o foo.bc</b></tt>".
-Then run:</p>
-
-<div class="doc_code">
-<p><tt><b>opt</b> -std-compile-opts -debug-pass=Arguments foo.bc
- -disable-output</tt></p>
-</div>
-
-<p>This command should do two things: it should print out a list of passes, and
-then it should crash in the same way as llvm-gcc. If it doesn't crash, please
-follow the instructions for a <a href="#front-end">front-end bug</a>.</p>
-
-<p>If this does crash, then you should be able to debug this with the following
-bugpoint command:</p>
-
-<div class="doc_code">
-<p><tt><b>bugpoint</b> foo.bc <list of passes printed by
-<b>opt</b>></tt></p>
-</div>
-
-<p>Please run this, then file a bug with the instructions and reduced .bc files
-that bugpoint emits. If something goes wrong with bugpoint, please submit the
-"foo.bc" file and the list of passes printed by <b>opt</b>.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
- <a name="ct_codegen">Code generator bugs</a>
-</h3>
-
-<div>
-
-<p>If you find a bug that crashes llvm-gcc in the code generator, compile your
-source file to a .bc file by passing "<tt><b>-emit-llvm -c -o foo.bc</b></tt>"
-to llvm-gcc (in addition to the options you already pass). Once your have
-foo.bc, one of the following commands should fail:</p>
-
-<ol>
-<li><tt><b>llc</b> foo.bc</tt></li>
-<li><tt><b>llc</b> foo.bc -relocation-model=pic</tt></li>
-<li><tt><b>llc</b> foo.bc -relocation-model=static</tt></li>
-</ol>
-
-<p>If none of these crash, please follow the instructions for a
-<a href="#front-end">front-end bug</a>. If one of these do crash, you should
-be able to reduce this with one of the following bugpoint command lines (use
-the one corresponding to the command above that failed):</p>
-
-<ol>
-<li><tt><b>bugpoint</b> -run-llc foo.bc</tt></li>
-<li><tt><b>bugpoint</b> -run-llc foo.bc --tool-args
- -relocation-model=pic</tt></li>
-<li><tt><b>bugpoint</b> -run-llc foo.bc --tool-args
- -relocation-model=static</tt></li>
-</ol>
-
-<p>Please run this, then file a bug with the instructions and reduced .bc file
-that bugpoint emits. If something goes wrong with bugpoint, please submit the
-"foo.bc" file and the option that llc crashes with.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
- <a name="miscompilations">Miscompilations</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>If llvm-gcc successfully produces an executable, but that executable doesn't
-run right, this is either a bug in the code or a bug in the
-compiler. The first thing to check is to make sure it is not using undefined
-behavior (e.g. reading a variable before it is defined). In particular, check
-to see if the program <a href="http://valgrind.org/">valgrind</a>s clean,
-passes purify, or some other memory checker tool. Many of the "LLVM bugs" that
-we have chased down ended up being bugs in the program being compiled, not
- LLVM.</p>
-
-<p>Once you determine that the program itself is not buggy, you should choose
-which code generator you wish to compile the program with (e.g. LLC or the JIT)
-and optionally a series of LLVM passes to run. For example:</p>
-
-<div class="doc_code">
-<p><tt>
-<b>bugpoint</b> -run-llc [... optzn passes ...] file-to-test.bc --args -- [program arguments]</tt></p>
-</div>
-
-<p><tt>bugpoint</tt> will try to narrow down your list of passes to the one pass
-that causes an error, and simplify the bitcode file as much as it can to assist
-you. It will print a message letting you know how to reproduce the resulting
-error.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
- <a name="codegen">Incorrect code generation</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Similarly to debugging incorrect compilation by mis-behaving passes, you can
-debug incorrect code generation by either LLC or the JIT, using
-<tt>bugpoint</tt>. The process <tt>bugpoint</tt> follows in this case is to try
-to narrow the code down to a function that is miscompiled by one or the other
-method, but since for correctness, the entire program must be run,
-<tt>bugpoint</tt> will compile the code it deems to not be affected with the C
-Backend, and then link in the shared object it generates.</p>
-
-<p>To debug the JIT:</p>
-
-<div class="doc_code">
-<pre>
-bugpoint -run-jit -output=[correct output file] [bitcode file] \
- --tool-args -- [arguments to pass to lli] \
- --args -- [program arguments]
-</pre>
-</div>
-
-<p>Similarly, to debug the LLC, one would run:</p>
-
-<div class="doc_code">
-<pre>
-bugpoint -run-llc -output=[correct output file] [bitcode file] \
- --tool-args -- [arguments to pass to llc] \
- --args -- [program arguments]
-</pre>
-</div>
-
-<p><b>Special note:</b> if you are debugging MultiSource or SPEC tests that
-already exist in the <tt>llvm/test</tt> hierarchy, there is an easier way to
-debug the JIT, LLC, and CBE, using the pre-written Makefile targets, which
-will pass the program options specified in the Makefiles:</p>
-
-<div class="doc_code">
-<p><tt>
-cd llvm/test/../../program<br>
-make bugpoint-jit
-</tt></p>
-</div>
-
-<p>At the end of a successful <tt>bugpoint</tt> run, you will be presented
-with two bitcode files: a <em>safe</em> file which can be compiled with the C
-backend and the <em>test</em> file which either LLC or the JIT
-mis-codegenerates, and thus causes the error.</p>
-
-<p>To reproduce the error that <tt>bugpoint</tt> found, it is sufficient to do
-the following:</p>
-
-<ol>
-
-<li><p>Regenerate the shared object from the safe bitcode file:</p>
-
-<div class="doc_code">
-<p><tt>
-<b>llc</b> -march=c safe.bc -o safe.c<br>
-<b>gcc</b> -shared safe.c -o safe.so
-</tt></p>
-</div></li>
-
-<li><p>If debugging LLC, compile test bitcode native and link with the shared
- object:</p>
-
-<div class="doc_code">
-<p><tt>
-<b>llc</b> test.bc -o test.s<br>
-<b>gcc</b> test.s safe.so -o test.llc<br>
-./test.llc [program options]
-</tt></p>
-</div></li>
-
-<li><p>If debugging the JIT, load the shared object and supply the test
- bitcode:</p>
-
-<div class="doc_code">
-<p><tt><b>lli</b> -load=safe.so test.bc [program options]</tt></p>
-</div></li>
-
-</ol>
-
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
- <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
- src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
- <a href="http://validator.w3.org/check/referer"><img
- src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
- <a href="mailto:sabre at nondot.org">Chris Lattner</a><br>
- <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a>
- <br>
- Last modified: $Date$
-</address>
-
-</body>
-</html>
Added: llvm/branches/R600/docs/HowToSubmitABug.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/docs/HowToSubmitABug.rst?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/docs/HowToSubmitABug.rst (added)
+++ llvm/branches/R600/docs/HowToSubmitABug.rst Tue Oct 2 09:15:33 2012
@@ -0,0 +1,233 @@
+.. _how-to-submit-a-bug-report:
+
+================================
+How to submit an LLVM bug report
+================================
+
+.. sectionauthor:: Chris Lattner <sabre at nondot.org> and Misha Brukman <http://misha.brukman.net>
+
+Introduction - Got bugs?
+========================
+
+
+If you're working with LLVM and run into a bug, we definitely want to know
+about it. This document describes what you can do to increase the odds of
+getting it fixed quickly.
+
+Basically you have to do two things at a minimum. First, decide whether
+the bug `crashes the compiler`_ (or an LLVM pass), or if the
+compiler is `miscompiling`_ the program (i.e., the
+compiler successfully produces an executable, but it doesn't run right).
+Based on what type of bug it is, follow the instructions in the linked
+section to narrow down the bug so that the person who fixes it will be able
+to find the problem more easily.
+
+Once you have a reduced test-case, go to `the LLVM Bug Tracking System
+<http://llvm.org/bugs/enter_bug.cgi>`_ and fill out the form with the
+necessary details (note that you don't need to pick a category, just use
+the "new-bugs" category if you're not sure). The bug description should
+contain the following information:
+
+* All information necessary to reproduce the problem.
+* The reduced test-case that triggers the bug.
+* The location where you obtained LLVM (if not from our Subversion
+ repository).
+
+Thanks for helping us make LLVM better!
+
+.. _crashes the compiler:
+
+Crashing Bugs
+=============
+
+More often than not, bugs in the compiler cause it to crash---often due to
+an assertion failure of some sort. The most important piece of the puzzle
+is to figure out if it is crashing in the GCC front-end or if it is one of
+the LLVM libraries (e.g. the optimizer or code generator) that has
+problems.
+
+To figure out which component is crashing (the front-end, optimizer or code
+generator), run the ``llvm-gcc`` command line as you were when the crash
+occurred, but with the following extra command line options:
+
+* ``-O0 -emit-llvm``: If ``llvm-gcc`` still crashes when passed these
+ options (which disable the optimizer and code generator), then the crash
+ is in the front-end. Jump ahead to the section on :ref:`front-end bugs
+ <front-end>`.
+
+* ``-emit-llvm``: If ``llvm-gcc`` crashes with this option (which disables
+ the code generator), you found an optimizer bug. Jump ahead to
+ `compile-time optimization bugs`_.
+
+* Otherwise, you have a code generator crash. Jump ahead to `code
+ generator bugs`_.
+
+.. _front-end bug:
+.. _front-end:
+
+Front-end bugs
+--------------
+
+If the problem is in the front-end, you should re-run the same ``llvm-gcc``
+command that resulted in the crash, but add the ``-save-temps`` option.
+The compiler will crash again, but it will leave behind a ``foo.i`` file
+(containing preprocessed C source code) and possibly ``foo.s`` for each
+compiled ``foo.c`` file. Send us the ``foo.i`` file, along with the options
+you passed to ``llvm-gcc``, and a brief description of the error it caused.
+
+The `delta <http://delta.tigris.org/>`_ tool helps to reduce the
+preprocessed file down to the smallest amount of code that still replicates
+the problem. You're encouraged to use delta to reduce the code to make the
+developers' lives easier. `This website
+<http://gcc.gnu.org/wiki/A_guide_to_testcase_reduction>`_ has instructions
+on the best way to use delta.
+
+.. _compile-time optimization bugs:
+
+Compile-time optimization bugs
+------------------------------
+
+If you find that a bug crashes in the optimizer, compile your test-case to a
+``.bc`` file by passing "``-emit-llvm -O0 -c -o foo.bc``".
+Then run:
+
+.. code-block:: bash
+
+ opt -std-compile-opts -debug-pass=Arguments foo.bc -disable-output
+
+This command should do two things: it should print out a list of passes, and
+then it should crash in the same way as llvm-gcc. If it doesn't crash, please
+follow the instructions for a `front-end bug`_.
+
+If this does crash, then you should be able to debug this with the following
+bugpoint command:
+
+.. code-block:: bash
+
+ bugpoint foo.bc <list of passes printed by opt>
+
+Please run this, then file a bug with the instructions and reduced .bc
+files that bugpoint emits. If something goes wrong with bugpoint, please
+submit the "foo.bc" file and the list of passes printed by ``opt``.
+
+.. _code generator bugs:
+
+Code generator bugs
+-------------------
+
+If you find a bug that crashes llvm-gcc in the code generator, compile your
+source file to a .bc file by passing "``-emit-llvm -c -o foo.bc``" to
+llvm-gcc (in addition to the options you already pass). Once your have
+foo.bc, one of the following commands should fail:
+
+#. ``llc foo.bc``
+#. ``llc foo.bc -relocation-model=pic``
+#. ``llc foo.bc -relocation-model=static``
+
+If none of these crash, please follow the instructions for a `front-end
+bug`_. If one of these do crash, you should be able to reduce this with
+one of the following bugpoint command lines (use the one corresponding to
+the command above that failed):
+
+#. ``bugpoint -run-llc foo.bc``
+#. ``bugpoint -run-llc foo.bc --tool-args -relocation-model=pic``
+#. ``bugpoint -run-llc foo.bc --tool-args -relocation-model=static``
+
+Please run this, then file a bug with the instructions and reduced .bc file
+that bugpoint emits. If something goes wrong with bugpoint, please submit
+the "foo.bc" file and the option that llc crashes with.
+
+.. _miscompiling:
+
+Miscompilations
+===============
+
+If llvm-gcc successfully produces an executable, but that executable
+doesn't run right, this is either a bug in the code or a bug in the
+compiler. The first thing to check is to make sure it is not using
+undefined behavior (e.g. reading a variable before it is defined). In
+particular, check to see if the program `valgrind
+<http://valgrind.org/>`_'s clean, passes purify, or some other memory
+checker tool. Many of the "LLVM bugs" that we have chased down ended up
+being bugs in the program being compiled, not LLVM.
+
+Once you determine that the program itself is not buggy, you should choose
+which code generator you wish to compile the program with (e.g. LLC or the JIT)
+and optionally a series of LLVM passes to run. For example:
+
+.. code-block:: bash
+
+ bugpoint -run-llc [... optzn passes ...] file-to-test.bc --args -- [program arguments]
+
+bugpoint will try to narrow down your list of passes to the one pass that
+causes an error, and simplify the bitcode file as much as it can to assist
+you. It will print a message letting you know how to reproduce the
+resulting error.
+
+Incorrect code generation
+=========================
+
+Similarly to debugging incorrect compilation by mis-behaving passes, you
+can debug incorrect code generation by either LLC or the JIT, using
+``bugpoint``. The process ``bugpoint`` follows in this case is to try to
+narrow the code down to a function that is miscompiled by one or the other
+method, but since for correctness, the entire program must be run,
+``bugpoint`` will compile the code it deems to not be affected with the C
+Backend, and then link in the shared object it generates.
+
+To debug the JIT:
+
+.. code-block:: bash
+
+ bugpoint -run-jit -output=[correct output file] [bitcode file] \
+ --tool-args -- [arguments to pass to lli] \
+ --args -- [program arguments]
+
+Similarly, to debug the LLC, one would run:
+
+.. code-block:: bash
+
+ bugpoint -run-llc -output=[correct output file] [bitcode file] \
+ --tool-args -- [arguments to pass to llc] \
+ --args -- [program arguments]
+
+**Special note:** if you are debugging MultiSource or SPEC tests that
+already exist in the ``llvm/test`` hierarchy, there is an easier way to
+debug the JIT, LLC, and CBE, using the pre-written Makefile targets, which
+will pass the program options specified in the Makefiles:
+
+.. code-block:: bash
+
+ cd llvm/test/../../program
+ make bugpoint-jit
+
+At the end of a successful ``bugpoint`` run, you will be presented
+with two bitcode files: a *safe* file which can be compiled with the C
+backend and the *test* file which either LLC or the JIT
+mis-codegenerates, and thus causes the error.
+
+To reproduce the error that ``bugpoint`` found, it is sufficient to do
+the following:
+
+#. Regenerate the shared object from the safe bitcode file:
+
+ .. code-block:: bash
+
+ llc -march=c safe.bc -o safe.c
+ gcc -shared safe.c -o safe.so
+
+#. If debugging LLC, compile test bitcode native and link with the shared
+ object:
+
+ .. code-block:: bash
+
+ llc test.bc -o test.s
+ gcc test.s safe.so -o test.llc
+ ./test.llc [program options]
+
+#. If debugging the JIT, load the shared object and supply the test
+ bitcode:
+
+ .. code-block:: bash
+
+ lli -load=safe.so test.bc [program options]
Modified: llvm/branches/R600/docs/_themes/llvm-theme/static/llvm-theme.css
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/docs/_themes/llvm-theme/static/llvm-theme.css?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/docs/_themes/llvm-theme/static/llvm-theme.css (original)
+++ llvm/branches/R600/docs/_themes/llvm-theme/static/llvm-theme.css Tue Oct 2 09:15:33 2012
@@ -18,7 +18,6 @@
font-family: 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva',
'Verdana', sans-serif;
font-size: 14px;
- letter-spacing: -0.01em;
line-height: 150%;
text-align: center;
background-color: #BFD1D4;
@@ -239,7 +238,6 @@
font-family: 'Consolas', 'Deja Vu Sans Mono',
'Bitstream Vera Sans Mono', monospace;
font-size: 0.95em;
- letter-spacing: 0.01em;
}
:not(a.reference) > tt {
@@ -274,7 +272,6 @@
font-family: 'Consolas', 'Deja Vu Sans Mono',
'Bitstream Vera Sans Mono', monospace;
font-size: 0.95em;
- letter-spacing: 0.015em;
line-height: 120%;
padding: 0.5em;
border: 1px solid #ccc;
Modified: llvm/branches/R600/docs/index.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/docs/index.rst?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/docs/index.rst (original)
+++ llvm/branches/R600/docs/index.rst Tue Oct 2 09:15:33 2012
@@ -15,43 +15,43 @@
Similarly, documentation is broken down into several high-level groupings
targeted at different audiences:
- * **Design & Overview**
+* **Design & Overview**
- Several introductory papers and presentations are available at
- :ref:`design_and_overview`.
+ Several introductory papers and presentations are available at
+ :ref:`design_and_overview`.
- * **Publications**
+* **Publications**
- The list of `publications <http://llvm.org/pubs>`_ based on LLVM.
+ The list of `publications <http://llvm.org/pubs>`_ based on LLVM.
- * **User Guides**
+* **User Guides**
- Those new to the LLVM system should first vist the :ref:`userguides`.
+ Those new to the LLVM system should first vist the :ref:`userguides`.
- NOTE: If you are a user who is only interested in using LLVM-based
- compilers, you should look into `Clang <http://clang.llvm.org>`_ or
- `DragonEgg <http://dragonegg.llvm.org>`_ instead. The documentation here is
- intended for users who have a need to work with the intermediate LLVM
- representation.
+ NOTE: If you are a user who is only interested in using LLVM-based
+ compilers, you should look into `Clang <http://clang.llvm.org>`_ or
+ `DragonEgg <http://dragonegg.llvm.org>`_ instead. The documentation here is
+ intended for users who have a need to work with the intermediate LLVM
+ representation.
- * **API Clients**
+* **API Clients**
- Developers of applications which use LLVM as a library should visit the
- :ref:`programming`.
+ Developers of applications which use LLVM as a library should visit the
+ :ref:`programming`.
- * **Subsystems**
+* **Subsystems**
- API clients and LLVM developers may be interested in the
- :ref:`subsystems` documentation.
+ API clients and LLVM developers may be interested in the
+ :ref:`subsystems` documentation.
- * **Development Process**
+* **Development Process**
- Additional documentation on the LLVM project can be found at
- :ref:`development_process`.
+ Additional documentation on the LLVM project can be found at
+ :ref:`development_process`.
- * **Mailing Lists**
+* **Mailing Lists**
- For more information, consider consulting the LLVM :ref:`mailing_lists`.
+ For more information, consider consulting the LLVM :ref:`mailing_lists`.
.. toctree::
:maxdepth: 2
Modified: llvm/branches/R600/docs/userguides.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/docs/userguides.rst?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/docs/userguides.rst (original)
+++ llvm/branches/R600/docs/userguides.rst Tue Oct 2 09:15:33 2012
@@ -7,6 +7,7 @@
:hidden:
CMake
+ HowToBuildOnARM
CommandGuide/index
DeveloperPolicy
GettingStartedVS
@@ -15,6 +16,7 @@
Packaging
HowToAddABuilder
yaml2obj
+ HowToSubmitABug
* `The LLVM Getting Started Guide <GettingStarted.html>`_
@@ -26,7 +28,11 @@
An addendum to the main Getting Started guide for those using the `CMake
build system <http://www.cmake.org>`_.
-
+
+* :ref:`how_to_build_on_arm`
+
+ Notes on building and testing LLVM/Clang on ARM.
+
* `Getting Started with the LLVM System using Microsoft Visual Studio
<GettingStartedVS.html>`_
@@ -59,7 +65,7 @@
This describes new features, known bugs, and other limitations.
-* `How to Submit A Bug Report <HowToSubmitABug.html>`_
+* :ref:`how-to-submit-a-bug-report`
Instructions for properly submitting information about any bugs you run into
in the LLVM system.
Modified: llvm/branches/R600/include/llvm/ADT/PackedVector.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/ADT/PackedVector.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/ADT/PackedVector.h (original)
+++ llvm/branches/R600/include/llvm/ADT/PackedVector.h Tue Oct 2 09:15:33 2012
@@ -19,32 +19,32 @@
namespace llvm {
-template <typename T, unsigned BitNum, bool isSigned>
+template <typename T, unsigned BitNum, typename BitVectorTy, bool isSigned>
class PackedVectorBase;
// This won't be necessary if we can specialize members without specializing
// the parent template.
-template <typename T, unsigned BitNum>
-class PackedVectorBase<T, BitNum, false> {
+template <typename T, unsigned BitNum, typename BitVectorTy>
+class PackedVectorBase<T, BitNum, BitVectorTy, false> {
protected:
- static T getValue(const llvm::BitVector &Bits, unsigned Idx) {
+ static T getValue(const BitVectorTy &Bits, unsigned Idx) {
T val = T();
for (unsigned i = 0; i != BitNum; ++i)
val = T(val | ((Bits[(Idx << (BitNum-1)) + i] ? 1UL : 0UL) << i));
return val;
}
- static void setValue(llvm::BitVector &Bits, unsigned Idx, T val) {
+ static void setValue(BitVectorTy &Bits, unsigned Idx, T val) {
assert((val >> BitNum) == 0 && "value is too big");
for (unsigned i = 0; i != BitNum; ++i)
Bits[(Idx << (BitNum-1)) + i] = val & (T(1) << i);
}
};
-template <typename T, unsigned BitNum>
-class PackedVectorBase<T, BitNum, true> {
+template <typename T, unsigned BitNum, typename BitVectorTy>
+class PackedVectorBase<T, BitNum, BitVectorTy, true> {
protected:
- static T getValue(const llvm::BitVector &Bits, unsigned Idx) {
+ static T getValue(const BitVectorTy &Bits, unsigned Idx) {
T val = T();
for (unsigned i = 0; i != BitNum-1; ++i)
val = T(val | ((Bits[(Idx << (BitNum-1)) + i] ? 1UL : 0UL) << i));
@@ -53,7 +53,7 @@
return val;
}
- static void setValue(llvm::BitVector &Bits, unsigned Idx, T val) {
+ static void setValue(BitVectorTy &Bits, unsigned Idx, T val) {
if (val < 0) {
val = ~val;
Bits.set((Idx << (BitNum-1)) + BitNum-1);
@@ -71,11 +71,12 @@
/// @endcode
/// will create a vector accepting values -2, -1, 0, 1. Any other value will hit
/// an assertion.
-template <typename T, unsigned BitNum>
-class PackedVector : public PackedVectorBase<T, BitNum,
+template <typename T, unsigned BitNum, typename BitVectorTy = BitVector>
+class PackedVector : public PackedVectorBase<T, BitNum, BitVectorTy,
std::numeric_limits<T>::is_signed> {
- llvm::BitVector Bits;
- typedef PackedVectorBase<T, BitNum, std::numeric_limits<T>::is_signed> base;
+ BitVectorTy Bits;
+ typedef PackedVectorBase<T, BitNum, BitVectorTy,
+ std::numeric_limits<T>::is_signed> base;
public:
class reference {
Modified: llvm/branches/R600/include/llvm/ADT/SparseBitVector.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/ADT/SparseBitVector.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/ADT/SparseBitVector.h (original)
+++ llvm/branches/R600/include/llvm/ADT/SparseBitVector.h Tue Oct 2 09:15:33 2012
@@ -262,6 +262,22 @@
}
};
+template <unsigned ElementSize>
+struct ilist_traits<SparseBitVectorElement<ElementSize> >
+ : public ilist_default_traits<SparseBitVectorElement<ElementSize> > {
+ typedef SparseBitVectorElement<ElementSize> Element;
+
+ Element *createSentinel() const { return static_cast<Element *>(&Sentinel); }
+ static void destroySentinel(Element *) {}
+
+ Element *provideInitialHead() const { return createSentinel(); }
+ Element *ensureHead(Element *) const { return createSentinel(); }
+ static void noteHead(Element *, Element *) {}
+
+private:
+ mutable ilist_half_node<Element> Sentinel;
+};
+
template <unsigned ElementSize = 128>
class SparseBitVector {
typedef ilist<SparseBitVectorElement<ElementSize> > ElementList;
Modified: llvm/branches/R600/include/llvm/Attributes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/Attributes.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/Attributes.h (original)
+++ llvm/branches/R600/include/llvm/Attributes.h Tue Oct 2 09:15:33 2012
@@ -22,6 +22,7 @@
namespace llvm {
+class LLVMContext;
class Type;
namespace Attribute {
@@ -96,16 +97,160 @@
#undef DECLARE_LLVM_ATTRIBUTE
+/// Note that uwtable is about the ABI or the user mandating an entry in the
+/// unwind table. The nounwind attribute is about an exception passing by the
+/// function.
+/// In a theoretical system that uses tables for profiling and sjlj for
+/// exceptions, they would be fully independent. In a normal system that
+/// uses tables for both, the semantics are:
+/// nil = Needs an entry because an exception might pass by.
+/// nounwind = No need for an entry
+/// uwtable = Needs an entry because the ABI says so and because
+/// an exception might pass by.
+/// uwtable + nounwind = Needs an entry because the ABI says so.
+
+/// @brief Attributes that only apply to function parameters.
+const AttrConst ParameterOnly = {ByVal_i | Nest_i |
+ StructRet_i | NoCapture_i};
+
+/// @brief Attributes that may be applied to the function itself. These cannot
+/// be used on return values or function parameters.
+const AttrConst FunctionOnly = {NoReturn_i | NoUnwind_i | ReadNone_i |
+ ReadOnly_i | NoInline_i | AlwaysInline_i | OptimizeForSize_i |
+ StackProtect_i | StackProtectReq_i | NoRedZone_i | NoImplicitFloat_i |
+ Naked_i | InlineHint_i | StackAlignment_i |
+ UWTable_i | NonLazyBind_i | ReturnsTwice_i | AddressSafety_i};
+
+/// @brief Parameter attributes that do not apply to vararg call arguments.
+const AttrConst VarArgsIncompatible = {StructRet_i};
+
+/// @brief Attributes that are mutually incompatible.
+const AttrConst MutuallyIncompatible[5] = {
+ {ByVal_i | Nest_i | StructRet_i},
+ {ByVal_i | Nest_i | InReg_i },
+ {ZExt_i | SExt_i},
+ {ReadNone_i | ReadOnly_i},
+ {NoInline_i | AlwaysInline_i}
+};
+
} // namespace Attribute
+/// AttributeImpl - The internal representation of the Attributes class. This is
+/// uniquified.
+class AttributesImpl;
+
/// Attributes - A bitset of attributes.
class Attributes {
// Currently, we need less than 64 bits.
uint64_t Bits;
+
+ explicit Attributes(AttributesImpl *A);
public:
- Attributes() : Bits(0) { }
- explicit Attributes(uint64_t Val) : Bits(Val) { }
- /*implicit*/ Attributes(Attribute::AttrConst Val) : Bits(Val.v) { }
+ Attributes() : Bits(0) {}
+ explicit Attributes(uint64_t Val) : Bits(Val) {}
+ /*implicit*/ Attributes(Attribute::AttrConst Val) : Bits(Val.v) {}
+
+ class Builder {
+ friend class Attributes;
+ uint64_t Bits;
+ public:
+ Builder() : Bits(0) {}
+ Builder(const Attributes &A) : Bits(A.Bits) {}
+
+ void addZExtAttr() {
+ Bits |= Attribute::ZExt_i;
+ }
+ void addSExtAttr() {
+ Bits |= Attribute::SExt_i;
+ }
+ void addNoReturnAttr() {
+ Bits |= Attribute::NoReturn_i;
+ }
+ void addInRegAttr() {
+ Bits |= Attribute::InReg_i;
+ }
+ void addStructRetAttr() {
+ Bits |= Attribute::StructRet_i;
+ }
+ void addNoUnwindAttr() {
+ Bits |= Attribute::NoUnwind_i;
+ }
+ void addNoAliasAttr() {
+ Bits |= Attribute::NoAlias_i;
+ }
+ void addByValAttr() {
+ Bits |= Attribute::ByVal_i;
+ }
+ void addNestAttr() {
+ Bits |= Attribute::Nest_i;
+ }
+ void addReadNoneAttr() {
+ Bits |= Attribute::ReadNone_i;
+ }
+ void addReadOnlyAttr() {
+ Bits |= Attribute::ReadOnly_i;
+ }
+ void addNoInlineAttr() {
+ Bits |= Attribute::NoInline_i;
+ }
+ void addAlwaysInlineAttr() {
+ Bits |= Attribute::AlwaysInline_i;
+ }
+ void addOptimizeForSizeAttr() {
+ Bits |= Attribute::OptimizeForSize_i;
+ }
+ void addStackProtectAttr() {
+ Bits |= Attribute::StackProtect_i;
+ }
+ void addStackProtectReqAttr() {
+ Bits |= Attribute::StackProtectReq_i;
+ }
+ void addNoCaptureAttr() {
+ Bits |= Attribute::NoCapture_i;
+ }
+ void addNoRedZoneAttr() {
+ Bits |= Attribute::NoRedZone_i;
+ }
+ void addNoImplicitFloatAttr() {
+ Bits |= Attribute::NoImplicitFloat_i;
+ }
+ void addNakedAttr() {
+ Bits |= Attribute::Naked_i;
+ }
+ void addInlineHintAttr() {
+ Bits |= Attribute::InlineHint_i;
+ }
+ void addReturnsTwiceAttr() {
+ Bits |= Attribute::ReturnsTwice_i;
+ }
+ void addUWTableAttr() {
+ Bits |= Attribute::UWTable_i;
+ }
+ void addNonLazyBindAttr() {
+ Bits |= Attribute::NonLazyBind_i;
+ }
+ void addAddressSafetyAttr() {
+ Bits |= Attribute::AddressSafety_i;
+ }
+ void addAlignmentAttr(unsigned Align) {
+ if (Align == 0) return;
+ assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
+ assert(Align <= 0x40000000 && "Alignment too large.");
+ Bits |= (Log2_32(Align) + 1) << 16;
+ }
+ void addStackAlignmentAttr(unsigned Align) {
+ // Default alignment, allow the target to define how to align it.
+ if (Align == 0) return;
+
+ assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
+ assert(Align <= 0x100 && "Alignment too large.");
+ Bits |= (Log2_32(Align) + 1) << 26;
+ }
+ };
+
+ /// get - Return a uniquified Attributes object. This takes the uniquified
+ /// value from the Builder and wraps it in the Attributes class.
+ static Attributes get(LLVMContext &Context, Builder &B);
// Attribute query methods.
// FIXME: StackAlignment & Alignment attributes have no predicate methods.
@@ -198,20 +343,12 @@
return Bits & Attribute::AddressSafety_i;
}
- uint64_t getRawAlignment() const {
- return Bits & Attribute::Alignment_i;
- }
- uint64_t getRawStackAlignment() const {
- return Bits & Attribute::StackAlignment_i;
- }
-
/// This returns the alignment field of an attribute as a byte alignment
/// value.
unsigned getAlignment() const {
if (!hasAlignmentAttr())
return 0;
-
- return 1U << ((getRawAlignment() >> 16) - 1);
+ return 1U << (((Bits & Attribute::Alignment_i) >> 16) - 1);
}
/// This returns the stack alignment field of an attribute as a byte alignment
@@ -219,32 +356,7 @@
unsigned getStackAlignment() const {
if (!hasStackAlignmentAttr())
return 0;
-
- return 1U << ((getRawStackAlignment() >> 26) - 1);
- }
-
- /// This turns an int alignment (a power of 2, normally) into the form used
- /// internally in Attributes.
- static Attributes constructAlignmentFromInt(unsigned i) {
- // Default alignment, allow the target to define how to align it.
- if (i == 0)
- return Attribute::None;
-
- assert(isPowerOf2_32(i) && "Alignment must be a power of two.");
- assert(i <= 0x40000000 && "Alignment too large.");
- return Attributes((Log2_32(i)+1) << 16);
- }
-
- /// This turns an int stack alignment (which must be a power of 2) into the
- /// form used internally in Attributes.
- static Attributes constructStackAlignmentFromInt(unsigned i) {
- // Default alignment, allow the target to define how to align it.
- if (i == 0)
- return Attribute::None;
-
- assert(isPowerOf2_32(i) && "Alignment must be a power of two.");
- assert(i <= 0x100 && "Alignment too large.");
- return Attributes((Log2_32(i)+1) << 26);
+ return 1U << (((Bits & Attribute::StackAlignment_i) >> 26) - 1);
}
// This is a "safe bool() operator".
@@ -276,107 +388,86 @@
Attributes operator ~ () const { return Attributes(~Bits); }
uint64_t Raw() const { return Bits; }
- /// The set of Attributes set in Attributes is converted to a string of
- /// equivalent mnemonics. This is, presumably, for writing out the mnemonics
- /// for the assembly writer.
- /// @brief Convert attribute bits to text
- std::string getAsString() const;
-};
+ /// This turns an int alignment (a power of 2, normally) into the form used
+ /// internally in Attributes.
+ static Attributes constructAlignmentFromInt(unsigned i) {
+ // Default alignment, allow the target to define how to align it.
+ if (i == 0)
+ return Attribute::None;
-namespace Attribute {
+ assert(isPowerOf2_32(i) && "Alignment must be a power of two.");
+ assert(i <= 0x40000000 && "Alignment too large.");
+ return Attributes((Log2_32(i)+1) << 16);
+ }
-/// Note that uwtable is about the ABI or the user mandating an entry in the
-/// unwind table. The nounwind attribute is about an exception passing by the
-/// function.
-/// In a theoretical system that uses tables for profiling and sjlj for
-/// exceptions, they would be fully independent. In a normal system that
-/// uses tables for both, the semantics are:
-/// nil = Needs an entry because an exception might pass by.
-/// nounwind = No need for an entry
-/// uwtable = Needs an entry because the ABI says so and because
-/// an exception might pass by.
-/// uwtable + nounwind = Needs an entry because the ABI says so.
+ /// This turns an int stack alignment (which must be a power of 2) into the
+ /// form used internally in Attributes.
+ static Attributes constructStackAlignmentFromInt(unsigned i) {
+ // Default alignment, allow the target to define how to align it.
+ if (i == 0)
+ return Attribute::None;
-/// @brief Attributes that only apply to function parameters.
-const AttrConst ParameterOnly = {ByVal_i | Nest_i |
- StructRet_i | NoCapture_i};
+ assert(isPowerOf2_32(i) && "Alignment must be a power of two.");
+ assert(i <= 0x100 && "Alignment too large.");
+ return Attributes((Log2_32(i)+1) << 26);
+ }
-/// @brief Attributes that may be applied to the function itself. These cannot
-/// be used on return values or function parameters.
-const AttrConst FunctionOnly = {NoReturn_i | NoUnwind_i | ReadNone_i |
- ReadOnly_i | NoInline_i | AlwaysInline_i | OptimizeForSize_i |
- StackProtect_i | StackProtectReq_i | NoRedZone_i | NoImplicitFloat_i |
- Naked_i | InlineHint_i | StackAlignment_i |
- UWTable_i | NonLazyBind_i | ReturnsTwice_i | AddressSafety_i};
+ /// @brief Which attributes cannot be applied to a type.
+ static Attributes typeIncompatible(Type *Ty);
-/// @brief Parameter attributes that do not apply to vararg call arguments.
-const AttrConst VarArgsIncompatible = {StructRet_i};
+ /// This returns an integer containing an encoding of all the LLVM attributes
+ /// found in the given attribute bitset. Any change to this encoding is a
+ /// breaking change to bitcode compatibility.
+ static uint64_t encodeLLVMAttributesForBitcode(Attributes Attrs) {
+ // FIXME: It doesn't make sense to store the alignment information as an
+ // expanded out value, we should store it as a log2 value. However, we
+ // can't just change that here without breaking bitcode compatibility. If
+ // this ever becomes a problem in practice, we should introduce new tag
+ // numbers in the bitcode file and have those tags use a more efficiently
+ // encoded alignment field.
+
+ // Store the alignment in the bitcode as a 16-bit raw value instead of a
+ // 5-bit log2 encoded value. Shift the bits above the alignment up by 11
+ // bits.
+ uint64_t EncodedAttrs = Attrs.Raw() & 0xffff;
+ if (Attrs.hasAlignmentAttr())
+ EncodedAttrs |= (1ULL << 16) <<
+ (((Attrs.Bits & Attribute::Alignment_i) - 1) >> 16);
+ EncodedAttrs |= (Attrs.Raw() & (0xfffULL << 21)) << 11;
+ return EncodedAttrs;
+ }
+
+ /// This returns an attribute bitset containing the LLVM attributes that have
+ /// been decoded from the given integer. This function must stay in sync with
+ /// 'encodeLLVMAttributesForBitcode'.
+ static Attributes decodeLLVMAttributesForBitcode(uint64_t EncodedAttrs) {
+ // The alignment is stored as a 16-bit raw value from bits 31--16. We shift
+ // the bits above 31 down by 11 bits.
+ unsigned Alignment = (EncodedAttrs & (0xffffULL << 16)) >> 16;
+ assert((!Alignment || isPowerOf2_32(Alignment)) &&
+ "Alignment must be a power of two.");
+
+ Attributes Attrs(EncodedAttrs & 0xffff);
+ if (Alignment)
+ Attrs |= Attributes::constructAlignmentFromInt(Alignment);
+ Attrs |= Attributes((EncodedAttrs & (0xfffULL << 32)) >> 11);
+ return Attrs;
+ }
-/// @brief Attributes that are mutually incompatible.
-const AttrConst MutuallyIncompatible[5] = {
- {ByVal_i | Nest_i | StructRet_i},
- {ByVal_i | Nest_i | InReg_i },
- {ZExt_i | SExt_i},
- {ReadNone_i | ReadOnly_i},
- {NoInline_i | AlwaysInline_i}
+ /// The set of Attributes set in Attributes is converted to a string of
+ /// equivalent mnemonics. This is, presumably, for writing out the mnemonics
+ /// for the assembly writer.
+ /// @brief Convert attribute bits to text
+ std::string getAsString() const;
};
-/// @brief Which attributes cannot be applied to a type.
-Attributes typeIncompatible(Type *Ty);
-
-/// This returns an integer containing an encoding of all the
-/// LLVM attributes found in the given attribute bitset. Any
-/// change to this encoding is a breaking change to bitcode
-/// compatibility.
-inline uint64_t encodeLLVMAttributesForBitcode(Attributes Attrs) {
- // FIXME: It doesn't make sense to store the alignment information as an
- // expanded out value, we should store it as a log2 value. However, we can't
- // just change that here without breaking bitcode compatibility. If this ever
- // becomes a problem in practice, we should introduce new tag numbers in the
- // bitcode file and have those tags use a more efficiently encoded alignment
- // field.
-
- // Store the alignment in the bitcode as a 16-bit raw value instead of a
- // 5-bit log2 encoded value. Shift the bits above the alignment up by
- // 11 bits.
-
- uint64_t EncodedAttrs = Attrs.Raw() & 0xffff;
- if (Attrs.hasAlignmentAttr())
- EncodedAttrs |= (1ull << 16) <<
- ((Attrs.getRawAlignment() - 1) >> 16);
- EncodedAttrs |= (Attrs.Raw() & (0xfffull << 21)) << 11;
-
- return EncodedAttrs;
-}
-
-/// This returns an attribute bitset containing the LLVM attributes
-/// that have been decoded from the given integer. This function
-/// must stay in sync with 'encodeLLVMAttributesForBitcode'.
-inline Attributes decodeLLVMAttributesForBitcode(uint64_t EncodedAttrs) {
- // The alignment is stored as a 16-bit raw value from bits 31--16.
- // We shift the bits above 31 down by 11 bits.
-
- unsigned Alignment = (EncodedAttrs & (0xffffull << 16)) >> 16;
- assert((!Alignment || isPowerOf2_32(Alignment)) &&
- "Alignment must be a power of two.");
-
- Attributes Attrs(EncodedAttrs & 0xffff);
- if (Alignment)
- Attrs |= Attributes::constructAlignmentFromInt(Alignment);
- Attrs |= Attributes((EncodedAttrs & (0xfffull << 32)) >> 11);
-
- return Attrs;
-}
-
-} // end namespace Attribute
-
/// This is just a pair of values to associate a set of attributes
/// with an index.
struct AttributeWithIndex {
- Attributes Attrs; ///< The attributes that are set, or'd together.
- unsigned Index; ///< Index of the parameter for which the attributes apply.
- ///< Index 0 is used for return value attributes.
- ///< Index ~0U is used for function attributes.
+ Attributes Attrs; ///< The attributes that are set, or'd together.
+ unsigned Index; ///< Index of the parameter for which the attributes apply.
+ ///< Index 0 is used for return value attributes.
+ ///< Index ~0U is used for function attributes.
static AttributeWithIndex get(unsigned Idx, Attributes Attrs) {
AttributeWithIndex P;
Modified: llvm/branches/R600/include/llvm/CodeGen/MachineModuleInfoImpls.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/CodeGen/MachineModuleInfoImpls.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/CodeGen/MachineModuleInfoImpls.h (original)
+++ llvm/branches/R600/include/llvm/CodeGen/MachineModuleInfoImpls.h Tue Oct 2 09:15:33 2012
@@ -38,7 +38,7 @@
/// this GV is external.
DenseMap<MCSymbol*, StubValueTy> HiddenGVStubs;
- virtual void Anchor(); // Out of line virtual method.
+ virtual void anchor(); // Out of line virtual method.
public:
MachineModuleInfoMachO(const MachineModuleInfo &) {}
@@ -76,7 +76,7 @@
/// mode.
DenseMap<MCSymbol*, StubValueTy> GVStubs;
- virtual void Anchor(); // Out of line virtual method.
+ virtual void anchor(); // Out of line virtual method.
public:
MachineModuleInfoELF(const MachineModuleInfo &) {}
Modified: llvm/branches/R600/include/llvm/CodeGen/ValueTypes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/CodeGen/ValueTypes.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/CodeGen/ValueTypes.h (original)
+++ llvm/branches/R600/include/llvm/CodeGen/ValueTypes.h Tue Oct 2 09:15:33 2012
@@ -181,6 +181,18 @@
SimpleTy <= MVT::LAST_VECTOR_VALUETYPE);
}
+ /// is16BitVector - Return true if this is a 16-bit vector type.
+ bool is16BitVector() const {
+ return (SimpleTy == MVT::v2i8 || SimpleTy == MVT::v1i16 ||
+ SimpleTy == MVT::v16i1);
+ }
+
+ /// is32BitVector - Return true if this is a 32-bit vector type.
+ bool is32BitVector() const {
+ return (SimpleTy == MVT::v4i8 || SimpleTy == MVT::v2i16 ||
+ SimpleTy == MVT::v1i32);
+ }
+
/// is64BitVector - Return true if this is a 64-bit vector type.
bool is64BitVector() const {
return (SimpleTy == MVT::v8i8 || SimpleTy == MVT::v4i16 ||
@@ -563,19 +575,12 @@
/// is16BitVector - Return true if this is a 16-bit vector type.
bool is16BitVector() const {
- if (!isSimple())
- return isExtended16BitVector();
-
- return (V == MVT::v2i8 || V==MVT::v1i16 || V == MVT::v16i1);
+ return isSimple() ? V.is16BitVector() : isExtended16BitVector();
}
/// is32BitVector - Return true if this is a 32-bit vector type.
bool is32BitVector() const {
- if (!isSimple())
- return isExtended32BitVector();
-
- return (V == MVT::v4i8 || V==MVT::v2i16
- || V==MVT::v1i32);
+ return isSimple() ? V.is32BitVector() : isExtended32BitVector();
}
/// is64BitVector - Return true if this is a 64-bit vector type.
Modified: llvm/branches/R600/include/llvm/Config/config.h.cmake
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/Config/config.h.cmake?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/Config/config.h.cmake (original)
+++ llvm/branches/R600/include/llvm/Config/config.h.cmake Tue Oct 2 09:15:33 2012
@@ -17,6 +17,9 @@
/* Default <path> to all compiler invocations for --sysroot=<path>. */
#undef DEFAULT_SYSROOT
+/* Define if you want backtraces on crash */
+#cmakedefine ENABLE_BACKTRACES
+
/* Define if position independent code is enabled */
#cmakedefine ENABLE_PIC
Removed: llvm/branches/R600/include/llvm/ExecutionEngine/IntelJITEventsWrapper.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/ExecutionEngine/IntelJITEventsWrapper.h?rev=165010&view=auto
==============================================================================
--- llvm/branches/R600/include/llvm/ExecutionEngine/IntelJITEventsWrapper.h (original)
+++ llvm/branches/R600/include/llvm/ExecutionEngine/IntelJITEventsWrapper.h (removed)
@@ -1,102 +0,0 @@
-//===-- IntelJITEventsWrapper.h - Intel JIT Events API Wrapper --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a wrapper for the Intel JIT Events API. It allows for the
-// implementation of the jitprofiling library to be swapped with an alternative
-// implementation (for testing). To include this file, you must have the
-// jitprofiling.h header available; it is available in Intel(R) VTune(TM)
-// Amplifier XE 2011.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef INTEL_JIT_EVENTS_WRAPPER_H
-#define INTEL_JIT_EVENTS_WRAPPER_H
-
-#include <jitprofiling.h>
-
-namespace llvm {
-
-class IntelJITEventsWrapper {
- // Function pointer types for testing implementation of Intel jitprofiling
- // library
- typedef int (*NotifyEventPtr)(iJIT_JVM_EVENT, void*);
- typedef void (*RegisterCallbackExPtr)(void *, iJIT_ModeChangedEx );
- typedef iJIT_IsProfilingActiveFlags (*IsProfilingActivePtr)(void);
- typedef void (*FinalizeThreadPtr)(void);
- typedef void (*FinalizeProcessPtr)(void);
- typedef unsigned int (*GetNewMethodIDPtr)(void);
-
- NotifyEventPtr NotifyEventFunc;
- RegisterCallbackExPtr RegisterCallbackExFunc;
- IsProfilingActivePtr IsProfilingActiveFunc;
- FinalizeThreadPtr FinalizeThreadFunc;
- FinalizeProcessPtr FinalizeProcessFunc;
- GetNewMethodIDPtr GetNewMethodIDFunc;
-
-public:
- bool isAmplifierRunning() {
- return iJIT_IsProfilingActive() == iJIT_SAMPLING_ON;
- }
-
- IntelJITEventsWrapper()
- : NotifyEventFunc(::iJIT_NotifyEvent),
- RegisterCallbackExFunc(::iJIT_RegisterCallbackEx),
- IsProfilingActiveFunc(::iJIT_IsProfilingActive),
- FinalizeThreadFunc(::FinalizeThread),
- FinalizeProcessFunc(::FinalizeProcess),
- GetNewMethodIDFunc(::iJIT_GetNewMethodID) {
- }
-
- IntelJITEventsWrapper(NotifyEventPtr NotifyEventImpl,
- RegisterCallbackExPtr RegisterCallbackExImpl,
- IsProfilingActivePtr IsProfilingActiveImpl,
- FinalizeThreadPtr FinalizeThreadImpl,
- FinalizeProcessPtr FinalizeProcessImpl,
- GetNewMethodIDPtr GetNewMethodIDImpl)
- : NotifyEventFunc(NotifyEventImpl),
- RegisterCallbackExFunc(RegisterCallbackExImpl),
- IsProfilingActiveFunc(IsProfilingActiveImpl),
- FinalizeThreadFunc(FinalizeThreadImpl),
- FinalizeProcessFunc(FinalizeProcessImpl),
- GetNewMethodIDFunc(GetNewMethodIDImpl) {
- }
-
- // Sends an event anncouncing that a function has been emitted
- // return values are event-specific. See Intel documentation for details.
- int iJIT_NotifyEvent(iJIT_JVM_EVENT EventType, void *EventSpecificData) {
- if (!NotifyEventFunc)
- return -1;
- return NotifyEventFunc(EventType, EventSpecificData);
- }
-
- // Registers a callback function to receive notice of profiling state changes
- void iJIT_RegisterCallbackEx(void *UserData,
- iJIT_ModeChangedEx NewModeCallBackFuncEx) {
- if (RegisterCallbackExFunc)
- RegisterCallbackExFunc(UserData, NewModeCallBackFuncEx);
- }
-
- // Returns the current profiler mode
- iJIT_IsProfilingActiveFlags iJIT_IsProfilingActive(void) {
- if (!IsProfilingActiveFunc)
- return iJIT_NOTHING_RUNNING;
- return IsProfilingActiveFunc();
- }
-
- // Generates a locally unique method ID for use in code registration
- unsigned int iJIT_GetNewMethodID(void) {
- if (!GetNewMethodIDFunc)
- return -1;
- return GetNewMethodIDFunc();
- }
-};
-
-} //namespace llvm
-
-#endif //INTEL_JIT_EVENTS_WRAPPER_H
Modified: llvm/branches/R600/include/llvm/Function.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/Function.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/Function.h (original)
+++ llvm/branches/R600/include/llvm/Function.h Tue Oct 2 09:15:33 2012
@@ -168,10 +168,10 @@
///
void setAttributes(const AttrListPtr &attrs) { AttributeList = attrs; }
- /// hasFnAttr - Return true if this function has the given attribute.
- bool hasFnAttr(Attributes N) const {
- // Function Attributes are stored at ~0 index
- return AttributeList.paramHasAttr(~0U, N);
+ /// getFnAttributes - Return the function attributes for querying.
+ ///
+ Attributes getFnAttributes() const {
+ return AttributeList.getFnAttributes();
}
/// addFnAttr - Add function attributes to this function.
@@ -195,6 +195,11 @@
void setGC(const char *Str);
void clearGC();
+ /// getParamAttributes - Return the parameter attributes for querying.
+ Attributes getParamAttributes(unsigned Idx) const {
+ return AttributeList.getParamAttributes(Idx);
+ }
+
/// @brief Determine whether the function has the given attribute.
bool paramHasAttr(unsigned i, Attributes attr) const {
return AttributeList.paramHasAttr(i, attr);
@@ -213,7 +218,7 @@
/// @brief Determine if the function does not access memory.
bool doesNotAccessMemory() const {
- return hasFnAttr(Attribute::ReadNone);
+ return getFnAttributes().hasReadNoneAttr();
}
void setDoesNotAccessMemory(bool DoesNotAccessMemory = true) {
if (DoesNotAccessMemory) addFnAttr(Attribute::ReadNone);
@@ -222,7 +227,7 @@
/// @brief Determine if the function does not access or only reads memory.
bool onlyReadsMemory() const {
- return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly);
+ return doesNotAccessMemory() || getFnAttributes().hasReadOnlyAttr();
}
void setOnlyReadsMemory(bool OnlyReadsMemory = true) {
if (OnlyReadsMemory) addFnAttr(Attribute::ReadOnly);
@@ -231,7 +236,7 @@
/// @brief Determine if the function cannot return.
bool doesNotReturn() const {
- return hasFnAttr(Attribute::NoReturn);
+ return getFnAttributes().hasNoReturnAttr();
}
void setDoesNotReturn(bool DoesNotReturn = true) {
if (DoesNotReturn) addFnAttr(Attribute::NoReturn);
@@ -240,7 +245,7 @@
/// @brief Determine if the function cannot unwind.
bool doesNotThrow() const {
- return hasFnAttr(Attribute::NoUnwind);
+ return getFnAttributes().hasNoUnwindAttr();
}
void setDoesNotThrow(bool DoesNotThrow = true) {
if (DoesNotThrow) addFnAttr(Attribute::NoUnwind);
@@ -250,7 +255,7 @@
/// @brief True if the ABI mandates (or the user requested) that this
/// function be in a unwind table.
bool hasUWTable() const {
- return hasFnAttr(Attribute::UWTable);
+ return getFnAttributes().hasUWTableAttr();
}
void setHasUWTable(bool HasUWTable = true) {
if (HasUWTable)
@@ -267,13 +272,14 @@
/// @brief Determine if the function returns a structure through first
/// pointer argument.
bool hasStructRetAttr() const {
- return paramHasAttr(1, Attribute::StructRet);
+ return getParamAttributes(1).hasStructRetAttr();
}
/// @brief Determine if the parameter does not alias other parameters.
/// @param n The parameter to check. 1 is the first parameter, 0 is the return
bool doesNotAlias(unsigned n) const {
- return paramHasAttr(n, Attribute::NoAlias);
+ return n != 0 ? getParamAttributes(n).hasNoAliasAttr() :
+ AttributeList.getRetAttributes().hasNoAliasAttr();
}
void setDoesNotAlias(unsigned n, bool DoesNotAlias = true) {
if (DoesNotAlias) addAttribute(n, Attribute::NoAlias);
@@ -283,7 +289,7 @@
/// @brief Determine if the parameter can be captured.
/// @param n The parameter to check. 1 is the first parameter, 0 is the return
bool doesNotCapture(unsigned n) const {
- return paramHasAttr(n, Attribute::NoCapture);
+ return getParamAttributes(n).hasNoCaptureAttr();
}
void setDoesNotCapture(unsigned n, bool DoesNotCapture = true) {
if (DoesNotCapture) addAttribute(n, Attribute::NoCapture);
Modified: llvm/branches/R600/include/llvm/IRBuilder.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/IRBuilder.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/IRBuilder.h (original)
+++ llvm/branches/R600/include/llvm/IRBuilder.h Tue Oct 2 09:15:33 2012
@@ -285,12 +285,15 @@
/// If the pointers aren't i8*, they will be converted. If a TBAA tag is
/// specified, it will be added to the instruction.
CallInst *CreateMemCpy(Value *Dst, Value *Src, uint64_t Size, unsigned Align,
- bool isVolatile = false, MDNode *TBAATag = 0) {
- return CreateMemCpy(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag);
+ bool isVolatile = false, MDNode *TBAATag = 0,
+ MDNode *TBAAStructTag = 0) {
+ return CreateMemCpy(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag,
+ TBAAStructTag);
}
CallInst *CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
- bool isVolatile = false, MDNode *TBAATag = 0);
+ bool isVolatile = false, MDNode *TBAATag = 0,
+ MDNode *TBAAStructTag = 0);
/// CreateMemMove - Create and insert a memmove between the specified
/// pointers. If the pointers aren't i8*, they will be converted. If a TBAA
@@ -810,6 +813,31 @@
StoreInst *CreateStore(Value *Val, Value *Ptr, bool isVolatile = false) {
return Insert(new StoreInst(Val, Ptr, isVolatile));
}
+ // Provided to resolve 'CreateAlignedLoad(Ptr, Align, "...")' correctly,
+ // instead of converting the string to 'bool' for the isVolatile parameter.
+ LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name) {
+ LoadInst *LI = CreateLoad(Ptr, Name);
+ LI->setAlignment(Align);
+ return LI;
+ }
+ LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align,
+ const Twine &Name = "") {
+ LoadInst *LI = CreateLoad(Ptr, Name);
+ LI->setAlignment(Align);
+ return LI;
+ }
+ LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align, bool isVolatile,
+ const Twine &Name = "") {
+ LoadInst *LI = CreateLoad(Ptr, isVolatile, Name);
+ LI->setAlignment(Align);
+ return LI;
+ }
+ StoreInst *CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align,
+ bool isVolatile = false) {
+ StoreInst *SI = CreateStore(Val, Ptr, isVolatile);
+ SI->setAlignment(Align);
+ return SI;
+ }
FenceInst *CreateFence(AtomicOrdering Ordering,
SynchronizationScope SynchScope = CrossThread) {
return Insert(new FenceInst(Context, Ordering, SynchScope));
@@ -970,6 +998,30 @@
Value *CreateSExt(Value *V, Type *DestTy, const Twine &Name = "") {
return CreateCast(Instruction::SExt, V, DestTy, Name);
}
+ /// CreateZExtOrTrunc - Create a ZExt or Trunc from the integer value V to
+ /// DestTy. Return the value untouched if the type of V is already DestTy.
+ Value *CreateZExtOrTrunc(Value *V, IntegerType *DestTy,
+ const Twine &Name = "") {
+ assert(isa<IntegerType>(V->getType()) && "Can only zero extend integers!");
+ IntegerType *IntTy = cast<IntegerType>(V->getType());
+ if (IntTy->getBitWidth() < DestTy->getBitWidth())
+ return CreateZExt(V, DestTy, Name);
+ if (IntTy->getBitWidth() > DestTy->getBitWidth())
+ return CreateTrunc(V, DestTy, Name);
+ return V;
+ }
+ /// CreateSExtOrTrunc - Create a SExt or Trunc from the integer value V to
+ /// DestTy. Return the value untouched if the type of V is already DestTy.
+ Value *CreateSExtOrTrunc(Value *V, IntegerType *DestTy,
+ const Twine &Name = "") {
+ assert(isa<IntegerType>(V->getType()) && "Can only sign extend integers!");
+ IntegerType *IntTy = cast<IntegerType>(V->getType());
+ if (IntTy->getBitWidth() < DestTy->getBitWidth())
+ return CreateSExt(V, DestTy, Name);
+ if (IntTy->getBitWidth() > DestTy->getBitWidth())
+ return CreateTrunc(V, DestTy, Name);
+ return V;
+ }
Value *CreateFPToUI(Value *V, Type *DestTy, const Twine &Name = ""){
return CreateCast(Instruction::FPToUI, V, DestTy, Name);
}
Modified: llvm/branches/R600/include/llvm/MC/MCExpr.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/MC/MCExpr.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/MC/MCExpr.h (original)
+++ llvm/branches/R600/include/llvm/MC/MCExpr.h Tue Oct 2 09:15:33 2012
@@ -446,7 +446,7 @@
/// NOTE: All subclasses are required to have trivial destructors because
/// MCExprs are bump pointer allocated and not destructed.
class MCTargetExpr : public MCExpr {
- virtual void Anchor();
+ virtual void anchor();
protected:
MCTargetExpr() : MCExpr(Target) {}
virtual ~MCTargetExpr() {}
Modified: llvm/branches/R600/include/llvm/MC/MCObjectStreamer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/MC/MCObjectStreamer.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/MC/MCObjectStreamer.h (original)
+++ llvm/branches/R600/include/llvm/MC/MCObjectStreamer.h Tue Oct 2 09:15:33 2012
@@ -81,6 +81,8 @@
const MCSymbol *Label);
virtual void EmitGPRel32Value(const MCExpr *Value);
virtual void EmitGPRel64Value(const MCExpr *Value);
+ virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
+ unsigned AddrSpace);
virtual void FinishImpl();
/// @}
Modified: llvm/branches/R600/include/llvm/Object/MachOFormat.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/Object/MachOFormat.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/Object/MachOFormat.h (original)
+++ llvm/branches/R600/include/llvm/Object/MachOFormat.h Tue Oct 2 09:15:33 2012
@@ -61,7 +61,10 @@
CSARM_V6 = 6,
CSARM_V5TEJ = 7,
CSARM_XSCALE = 8,
- CSARM_V7 = 9
+ CSARM_V7 = 9,
+ CSARM_V7F = 10,
+ CSARM_V7S = 11,
+ CSARM_V7K = 12
};
/// \brief PowerPC Machine Subtypes.
Modified: llvm/branches/R600/include/llvm/Operator.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/Operator.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/Operator.h (original)
+++ llvm/branches/R600/include/llvm/Operator.h Tue Oct 2 09:15:33 2012
@@ -35,7 +35,9 @@
void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
void *operator new(size_t s) LLVM_DELETED_FUNCTION;
Operator() LLVM_DELETED_FUNCTION;
- ~Operator() LLVM_DELETED_FUNCTION;
+ // NOTE: cannot use LLVM_DELETED_FUNCTION because gcc errors when deleting
+ // an override of a non-deleted function.
+ ~Operator();
public:
/// getOpcode - Return the opcode for this Instruction or ConstantExpr.
@@ -77,7 +79,7 @@
};
private:
- ~OverflowingBinaryOperator() LLVM_DELETED_FUNCTION;
+ ~OverflowingBinaryOperator(); // DO NOT IMPLEMENT
friend class BinaryOperator;
friend class ConstantExpr;
@@ -131,7 +133,7 @@
};
private:
- ~PossiblyExactOperator() LLVM_DELETED_FUNCTION;
+ ~PossiblyExactOperator(); // DO NOT IMPLEMENT
friend class BinaryOperator;
friend class ConstantExpr;
@@ -168,7 +170,7 @@
/// information about relaxed accuracy requirements attached to them.
class FPMathOperator : public Operator {
private:
- ~FPMathOperator() LLVM_DELETED_FUNCTION;
+ ~FPMathOperator(); // DO NOT IMPLEMENT
public:
Modified: llvm/branches/R600/include/llvm/Support/TargetFolder.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/Support/TargetFolder.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/Support/TargetFolder.h (original)
+++ llvm/branches/R600/include/llvm/Support/TargetFolder.h Tue Oct 2 09:15:33 2012
@@ -177,7 +177,14 @@
return Fold(ConstantExpr::getIntegerCast(C, DestTy, isSigned));
}
Constant *CreatePointerCast(Constant *C, Type *DestTy) const {
- return ConstantExpr::getPointerCast(C, DestTy);
+ if (C->getType() == DestTy)
+ return C; // avoid calling Fold
+ return Fold(ConstantExpr::getPointerCast(C, DestTy));
+ }
+ Constant *CreateFPCast(Constant *C, Type *DestTy) const {
+ if (C->getType() == DestTy)
+ return C; // avoid calling Fold
+ return Fold(ConstantExpr::getFPCast(C, DestTy));
}
Constant *CreateBitCast(Constant *C, Type *DestTy) const {
return CreateCast(Instruction::BitCast, C, DestTy);
Modified: llvm/branches/R600/include/llvm/Target/TargetData.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/Target/TargetData.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/Target/TargetData.h (original)
+++ llvm/branches/R600/include/llvm/Target/TargetData.h Tue Oct 2 09:15:33 2012
@@ -53,10 +53,10 @@
/// @note The unusual order of elements in the structure attempts to reduce
/// padding and make the structure slightly more cache friendly.
struct TargetAlignElem {
- AlignTypeEnum AlignType : 8; ///< Alignment type (AlignTypeEnum)
- unsigned ABIAlign; ///< ABI alignment for this type/bitw
- unsigned PrefAlign; ///< Pref. alignment for this type/bitw
- uint32_t TypeBitWidth; ///< Type bit width
+ unsigned AlignType : 8; ///< Alignment type (AlignTypeEnum)
+ unsigned TypeBitWidth : 24; ///< Type bit width
+ unsigned ABIAlign : 16; ///< ABI alignment for this type/bitw
+ unsigned PrefAlign : 16; ///< Pref. alignment for this type/bitw
/// Initializer
static TargetAlignElem get(AlignTypeEnum align_type, unsigned abi_align,
Modified: llvm/branches/R600/include/llvm/Target/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/Target/TargetLowering.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/Target/TargetLowering.h (original)
+++ llvm/branches/R600/include/llvm/Target/TargetLowering.h Tue Oct 2 09:15:33 2012
@@ -712,6 +712,12 @@
return SupportJumpTables;
}
+ /// getMinimumJumpTableEntries - return integer threshold on number of
+ /// blocks to use jump tables rather than if sequence.
+ int getMinimumJumpTableEntries() const {
+ return MinimumJumpTableEntries;
+ }
+
/// getStackPointerRegisterToSaveRestore - If a physical register, this
/// specifies the register that llvm.savestack/llvm.restorestack should save
/// and restore.
@@ -1032,6 +1038,12 @@
SupportJumpTables = Val;
}
+ /// setMinimumJumpTableEntries - Indicate the number of blocks to generate
+ /// jump tables rather than if sequence.
+ void setMinimumJumpTableEntries(int Val) {
+ MinimumJumpTableEntries = Val;
+ }
+
/// setStackPointerRegisterToSaveRestore - If set to a physical register, this
/// specifies the register that llvm.savestack/llvm.restorestack should save
/// and restore.
@@ -1826,6 +1838,9 @@
/// If it's not true, then each jumptable must be lowered into if-then-else's.
bool SupportJumpTables;
+ /// MinimumJumpTableEntries - Number of blocks threshold to use jump tables.
+ int MinimumJumpTableEntries;
+
/// BooleanContents - Information about the contents of the high-bits in
/// boolean values held in a type wider than i1. See getBooleanContents.
BooleanContent BooleanContents;
Modified: llvm/branches/R600/include/llvm/Transforms/Utils/IntegerDivision.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/Transforms/Utils/IntegerDivision.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/Transforms/Utils/IntegerDivision.h (original)
+++ llvm/branches/R600/include/llvm/Transforms/Utils/IntegerDivision.h Tue Oct 2 09:15:33 2012
@@ -23,6 +23,16 @@
namespace llvm {
+ /// Generate code to calculate the remainder of two integers, replacing Rem
+ /// with the generated code. This currently generates code using the udiv
+ /// expansion, but future work includes generating more specialized code,
+ /// e.g. when more information about the operands are known. Currently only
+ /// implements 32bit scalar division (due to udiv's limitation), but future
+ /// work is removing this limitation.
+ ///
+ /// @brief Replace Rem with generated code.
+ bool expandRemainder(BinaryOperator *Rem);
+
/// Generate code to divide two integers, replacing Div with the generated
/// code. This currently generates code similarly to compiler-rt's
/// implementations, but future work includes generating more specialized code
Modified: llvm/branches/R600/include/llvm/Transforms/Utils/ValueMapper.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/include/llvm/Transforms/Utils/ValueMapper.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/Transforms/Utils/ValueMapper.h (original)
+++ llvm/branches/R600/include/llvm/Transforms/Utils/ValueMapper.h Tue Oct 2 09:15:33 2012
@@ -25,7 +25,7 @@
/// ValueMapTypeRemapper - This is a class that can be implemented by clients
/// to remap types when cloning constants and instructions.
class ValueMapTypeRemapper {
- virtual void Anchor(); // Out of line method.
+ virtual void anchor(); // Out of line method.
public:
virtual ~ValueMapTypeRemapper() {}
Modified: llvm/branches/R600/lib/Analysis/CodeMetrics.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Analysis/CodeMetrics.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Analysis/CodeMetrics.cpp (original)
+++ llvm/branches/R600/lib/Analysis/CodeMetrics.cpp Tue Oct 2 09:15:33 2012
@@ -196,7 +196,7 @@
// as volatile if they are live across a setjmp call, and they probably
// won't do this in callers.
exposesReturnsTwice = F->callsFunctionThatReturnsTwice() &&
- !F->hasFnAttr(Attribute::ReturnsTwice);
+ !F->getFnAttributes().hasReturnsTwiceAttr();
// Look at the size of the callee.
for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
Modified: llvm/branches/R600/lib/Analysis/IPA/CallGraph.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Analysis/IPA/CallGraph.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Analysis/IPA/CallGraph.cpp (original)
+++ llvm/branches/R600/lib/Analysis/IPA/CallGraph.cpp Tue Oct 2 09:15:33 2012
@@ -141,12 +141,13 @@
for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
II != IE; ++II) {
CallSite CS(cast<Value>(II));
- if (CS && !isa<IntrinsicInst>(II)) {
+ if (CS) {
const Function *Callee = CS.getCalledFunction();
- if (Callee)
- Node->addCalledFunction(CS, getOrInsertFunction(Callee));
- else
+ if (!Callee)
+ // Indirect calls of intrinsics are not allowed so no need to check.
Node->addCalledFunction(CS, CallsExternalNode);
+ else if (!Callee->isIntrinsic())
+ Node->addCalledFunction(CS, getOrInsertFunction(Callee));
}
}
}
Modified: llvm/branches/R600/lib/Analysis/InlineCost.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Analysis/InlineCost.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Analysis/InlineCost.cpp (original)
+++ llvm/branches/R600/lib/Analysis/InlineCost.cpp Tue Oct 2 09:15:33 2012
@@ -128,7 +128,7 @@
public:
CallAnalyzer(const TargetData *TD, Function &Callee, int Threshold)
: TD(TD), F(Callee), Threshold(Threshold), Cost(0),
- AlwaysInline(F.hasFnAttr(Attribute::AlwaysInline)),
+ AlwaysInline(F.getFnAttributes().hasAlwaysInlineAttr()),
IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false), AllocatedSize(0),
NumInstructions(0), NumVectorInstructions(0),
@@ -613,7 +613,7 @@
bool CallAnalyzer::visitCallSite(CallSite CS) {
if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() &&
- !F.hasFnAttr(Attribute::ReturnsTwice)) {
+ !F.getFnAttributes().hasReturnsTwiceAttr()) {
// This aborts the entire analysis.
ExposesReturnsTwice = true;
return false;
@@ -1043,7 +1043,7 @@
// something else. Don't inline functions marked noinline or call sites
// marked noinline.
if (!Callee || Callee->mayBeOverridden() ||
- Callee->hasFnAttr(Attribute::NoInline) || CS.isNoInline())
+ Callee->getFnAttributes().hasNoInlineAttr() || CS.isNoInline())
return llvm::InlineCost::getNever();
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
Modified: llvm/branches/R600/lib/Analysis/Lint.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Analysis/Lint.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Analysis/Lint.cpp (original)
+++ llvm/branches/R600/lib/Analysis/Lint.cpp Tue Oct 2 09:15:33 2012
@@ -411,27 +411,51 @@
"Undefined behavior: Branch to non-blockaddress", &I);
}
+ // Check for buffer overflows and misalignment.
if (TD) {
- if (Align == 0 && Ty && Ty->isSized())
- Align = TD->getABITypeAlignment(Ty);
-
- if (Align != 0) {
- int64_t Offset = 0;
- if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *TD)) {
- unsigned BaseAlign = 0;
- if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
- BaseAlign = AI->getAlignment();
- if (BaseAlign == 0 && AI->getAllocatedType()->isSized())
- BaseAlign = TD->getABITypeAlignment(AI->getAllocatedType());
- } else if (GlobalValue *GV = dyn_cast<GlobalVariable>(Base)) {
+ // Only handles memory references that read/write something simple like an
+ // alloca instruction or a global variable.
+ int64_t Offset = 0;
+ if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *TD)) {
+ // OK, so the access is to a constant offset from Ptr. Check that Ptr is
+ // something we can handle and if so extract the size of this base object
+ // along with its alignment.
+ uint64_t BaseSize = AliasAnalysis::UnknownSize;
+ unsigned BaseAlign = 0;
+
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
+ Type *ATy = AI->getAllocatedType();
+ if (!AI->isArrayAllocation() && ATy->isSized())
+ BaseSize = TD->getTypeAllocSize(ATy);
+ BaseAlign = AI->getAlignment();
+ if (BaseAlign == 0 && ATy->isSized())
+ BaseAlign = TD->getABITypeAlignment(ATy);
+ } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
+ // If the global may be defined differently in another compilation unit
+ // then don't warn about funky memory accesses.
+ if (GV->hasDefinitiveInitializer()) {
+ Type *GTy = GV->getType()->getElementType();
+ if (GTy->isSized())
+ BaseSize = TD->getTypeAllocSize(GTy);
BaseAlign = GV->getAlignment();
- if (BaseAlign == 0 && GV->getType()->getElementType()->isSized())
- BaseAlign = TD->getABITypeAlignment(GV->getType()->getElementType());
+ if (BaseAlign == 0 && GTy->isSized())
+ BaseAlign = TD->getABITypeAlignment(GTy);
}
- Assert1((!BaseAlign || Align <= MinAlign(BaseAlign, Offset)),
- "Undefined behavior: Memory reference address is misaligned",
- &I);
}
+
+ // Accesses from before the start or after the end of the object are not
+ // defined.
+ Assert1(Size == AliasAnalysis::UnknownSize ||
+ BaseSize == AliasAnalysis::UnknownSize ||
+ (Offset >= 0 && Offset + Size <= BaseSize),
+ "Undefined behavior: Buffer overflow", &I);
+
+ // Accesses that say that the memory is more aligned than it is are not
+ // defined.
+ if (Align == 0 && Ty && Ty->isSized())
+ Align = TD->getABITypeAlignment(Ty);
+ Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset),
+ "Undefined behavior: Memory reference address is misaligned", &I);
}
}
}
Modified: llvm/branches/R600/lib/Analysis/MemoryDependenceAnalysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Analysis/MemoryDependenceAnalysis.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Analysis/MemoryDependenceAnalysis.cpp (original)
+++ llvm/branches/R600/lib/Analysis/MemoryDependenceAnalysis.cpp Tue Oct 2 09:15:33 2012
@@ -327,7 +327,7 @@
return 0;
if (LIOffs+NewLoadByteSize > MemLocEnd &&
- LI->getParent()->getParent()->hasFnAttr(Attribute::AddressSafety)) {
+ LI->getParent()->getParent()->getFnAttributes().hasAddressSafetyAttr()){
// We will be reading past the location accessed by the original program.
// While this is safe in a regular build, Address Safety analysis tools
// may start reporting false warnings. So, don't do widening.
Modified: llvm/branches/R600/lib/AsmParser/LLParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/AsmParser/LLParser.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/AsmParser/LLParser.cpp (original)
+++ llvm/branches/R600/lib/AsmParser/LLParser.cpp Tue Oct 2 09:15:33 2012
@@ -919,23 +919,13 @@
bool LLParser::ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind) {
Attrs = Attribute::None;
LocTy AttrLoc = Lex.getLoc();
+ bool HaveError = false;
while (1) {
- switch (Lex.getKind()) {
+ lltok::Kind Token = Lex.getKind();
+ switch (Token) {
default: // End of attributes.
- if (AttrKind != 2 && (Attrs & Attribute::FunctionOnly))
- return Error(AttrLoc, "invalid use of function-only attribute");
-
- // As a hack, we allow "align 2" on functions as a synonym for
- // "alignstack 2".
- if (AttrKind == 2 &&
- (Attrs & ~(Attribute::FunctionOnly | Attribute::Alignment)))
- return Error(AttrLoc, "invalid use of attribute on a function");
-
- if (AttrKind != 0 && (Attrs & Attribute::ParameterOnly))
- return Error(AttrLoc, "invalid use of parameter-only attribute");
-
- return false;
+ return HaveError;
case lltok::kw_zeroext: Attrs |= Attribute::ZExt; break;
case lltok::kw_signext: Attrs |= Attribute::SExt; break;
case lltok::kw_inreg: Attrs |= Attribute::InReg; break;
@@ -980,6 +970,51 @@
}
}
+
+ // Perform some error checking.
+ switch (Token) {
+ default:
+ if (AttrKind == 2)
+ HaveError |= Error(AttrLoc, "invalid use of attribute on a function");
+ break;
+ case lltok::kw_align:
+ // As a hack, we allow "align 2" on functions as a synonym for
+ // "alignstack 2".
+ break;
+
+ // Parameter Only:
+ case lltok::kw_sret:
+ case lltok::kw_nocapture:
+ case lltok::kw_byval:
+ case lltok::kw_nest:
+ if (AttrKind != 0)
+ HaveError |= Error(AttrLoc, "invalid use of parameter-only attribute");
+ break;
+
+ // Function Only:
+ case lltok::kw_noreturn:
+ case lltok::kw_nounwind:
+ case lltok::kw_readnone:
+ case lltok::kw_readonly:
+ case lltok::kw_noinline:
+ case lltok::kw_alwaysinline:
+ case lltok::kw_optsize:
+ case lltok::kw_ssp:
+ case lltok::kw_sspreq:
+ case lltok::kw_noredzone:
+ case lltok::kw_noimplicitfloat:
+ case lltok::kw_naked:
+ case lltok::kw_inlinehint:
+ case lltok::kw_alignstack:
+ case lltok::kw_uwtable:
+ case lltok::kw_nonlazybind:
+ case lltok::kw_returns_twice:
+ case lltok::kw_address_safety:
+ if (AttrKind != 2)
+ HaveError |= Error(AttrLoc, "invalid use of function-only attribute");
+ break;
+ }
+
Lex.Lex();
}
}
Modified: llvm/branches/R600/lib/Bitcode/Reader/BitcodeReader.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Bitcode/Reader/BitcodeReader.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Bitcode/Reader/BitcodeReader.cpp (original)
+++ llvm/branches/R600/lib/Bitcode/Reader/BitcodeReader.cpp Tue Oct 2 09:15:33 2012
@@ -477,7 +477,7 @@
for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
Attributes ReconstitutedAttr =
- Attribute::decodeLLVMAttributesForBitcode(Record[i+1]);
+ Attributes::decodeLLVMAttributesForBitcode(Record[i+1]);
Record[i+1] = ReconstitutedAttr.Raw();
}
Modified: llvm/branches/R600/lib/Bitcode/Writer/BitcodeWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Bitcode/Writer/BitcodeWriter.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Bitcode/Writer/BitcodeWriter.cpp (original)
+++ llvm/branches/R600/lib/Bitcode/Writer/BitcodeWriter.cpp Tue Oct 2 09:15:33 2012
@@ -177,7 +177,7 @@
for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) {
const AttributeWithIndex &PAWI = A.getSlot(i);
Record.push_back(PAWI.Index);
- Record.push_back(Attribute::encodeLLVMAttributesForBitcode(PAWI.Attrs));
+ Record.push_back(Attributes::encodeLLVMAttributesForBitcode(PAWI.Attrs));
}
Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record);
Modified: llvm/branches/R600/lib/CodeGen/BranchFolding.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/CodeGen/BranchFolding.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/CodeGen/BranchFolding.cpp (original)
+++ llvm/branches/R600/lib/CodeGen/BranchFolding.cpp Tue Oct 2 09:15:33 2012
@@ -574,7 +574,7 @@
// instructions that would be deleted in the merge.
MachineFunction *MF = MBB1->getParent();
if (EffectiveTailLen >= 2 &&
- MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+ MF->getFunction()->getFnAttributes().hasOptimizeForSizeAttr() &&
(I1 == MBB1->begin() || I2 == MBB2->begin()))
return true;
Modified: llvm/branches/R600/lib/CodeGen/CodePlacementOpt.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/CodeGen/CodePlacementOpt.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/CodeGen/CodePlacementOpt.cpp (original)
+++ llvm/branches/R600/lib/CodeGen/CodePlacementOpt.cpp Tue Oct 2 09:15:33 2012
@@ -373,7 +373,7 @@
///
bool CodePlacementOpt::AlignLoops(MachineFunction &MF) {
const Function *F = MF.getFunction();
- if (F->hasFnAttr(Attribute::OptimizeForSize))
+ if (F->getFnAttributes().hasOptimizeForSizeAttr())
return false;
unsigned Align = TLI->getPrefLoopAlignment();
Modified: llvm/branches/R600/lib/CodeGen/LiveInterval.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/CodeGen/LiveInterval.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/CodeGen/LiveInterval.cpp (original)
+++ llvm/branches/R600/lib/CodeGen/LiveInterval.cpp Tue Oct 2 09:15:33 2012
@@ -427,7 +427,7 @@
// If we have to apply a mapping to our base interval assignment, rewrite it
// now.
- if (MustMapCurValNos) {
+ if (MustMapCurValNos && !empty()) {
// Map the first live range.
iterator OutIt = begin();
Modified: llvm/branches/R600/lib/CodeGen/LiveRangeEdit.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/CodeGen/LiveRangeEdit.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/CodeGen/LiveRangeEdit.cpp (original)
+++ llvm/branches/R600/lib/CodeGen/LiveRangeEdit.cpp Tue Oct 2 09:15:33 2012
@@ -87,7 +87,7 @@
// We can't remat physreg uses, unless it is a constant.
if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
- if (MRI.isConstantPhysReg(MO.getReg(), VRM->getMachineFunction()))
+ if (MRI.isConstantPhysReg(MO.getReg(), *OrigMI->getParent()->getParent()))
continue;
return false;
}
Modified: llvm/branches/R600/lib/CodeGen/MachineBlockPlacement.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/CodeGen/MachineBlockPlacement.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/CodeGen/MachineBlockPlacement.cpp (original)
+++ llvm/branches/R600/lib/CodeGen/MachineBlockPlacement.cpp Tue Oct 2 09:15:33 2012
@@ -1013,7 +1013,7 @@
// exclusively on the loop info here so that we can align backedges in
// unnatural CFGs and backedges that were introduced purely because of the
// loop rotations done during this layout pass.
- if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ if (F.getFunction()->getFnAttributes().hasOptimizeForSizeAttr())
return;
unsigned Align = TLI->getPrefLoopAlignment();
if (!Align)
Modified: llvm/branches/R600/lib/CodeGen/MachineFunction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/CodeGen/MachineFunction.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/CodeGen/MachineFunction.cpp (original)
+++ llvm/branches/R600/lib/CodeGen/MachineFunction.cpp Tue Oct 2 09:15:33 2012
@@ -59,13 +59,13 @@
RegInfo = 0;
MFInfo = 0;
FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering());
- if (Fn->hasFnAttr(Attribute::StackAlignment))
+ if (Fn->getFnAttributes().hasStackAlignmentAttr())
FrameInfo->ensureMaxAlignment(Fn->getAttributes().
getFnAttributes().getStackAlignment());
ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData());
Alignment = TM.getTargetLowering()->getMinFunctionAlignment();
// FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
- if (!Fn->hasFnAttr(Attribute::OptimizeForSize))
+ if (!Fn->getFnAttributes().hasOptimizeForSizeAttr())
Alignment = std::max(Alignment,
TM.getTargetLowering()->getPrefFunctionAlignment());
FunctionNumber = FunctionNum;
Modified: llvm/branches/R600/lib/CodeGen/MachineModuleInfoImpls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/CodeGen/MachineModuleInfoImpls.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/CodeGen/MachineModuleInfoImpls.cpp (original)
+++ llvm/branches/R600/lib/CodeGen/MachineModuleInfoImpls.cpp Tue Oct 2 09:15:33 2012
@@ -21,8 +21,8 @@
//===----------------------------------------------------------------------===//
// Out of line virtual method.
-void MachineModuleInfoMachO::Anchor() {}
-void MachineModuleInfoELF::Anchor() {}
+void MachineModuleInfoMachO::anchor() {}
+void MachineModuleInfoELF::anchor() {}
static int SortSymbolPair(const void *LHS, const void *RHS) {
typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy;
Modified: llvm/branches/R600/lib/CodeGen/PrologEpilogInserter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/CodeGen/PrologEpilogInserter.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/CodeGen/PrologEpilogInserter.cpp (original)
+++ llvm/branches/R600/lib/CodeGen/PrologEpilogInserter.cpp Tue Oct 2 09:15:33 2012
@@ -96,7 +96,7 @@
placeCSRSpillsAndRestores(Fn);
// Add the code to save and restore the callee saved registers
- if (!F->hasFnAttr(Attribute::Naked))
+ if (!F->getFnAttributes().hasNakedAttr())
insertCSRSpillsAndRestores(Fn);
// Allow the target machine to make final modifications to the function
@@ -111,7 +111,7 @@
// called functions. Because of this, calculateCalleeSavedRegisters()
// must be called before this function in order to set the AdjustsStack
// and MaxCallFrameSize variables.
- if (!F->hasFnAttr(Attribute::Naked))
+ if (!F->getFnAttributes().hasNakedAttr())
insertPrologEpilogCode(Fn);
// Replace all MO_FrameIndex operands with physical register references
@@ -221,7 +221,7 @@
return;
// In Naked functions we aren't going to save any registers.
- if (Fn.getFunction()->hasFnAttr(Attribute::Naked))
+ if (Fn.getFunction()->getFnAttributes().hasNakedAttr())
return;
std::vector<CalleeSavedInfo> CSI;
Modified: llvm/branches/R600/lib/CodeGen/RegisterCoalescer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/CodeGen/RegisterCoalescer.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/CodeGen/RegisterCoalescer.cpp (original)
+++ llvm/branches/R600/lib/CodeGen/RegisterCoalescer.cpp Tue Oct 2 09:15:33 2012
@@ -70,7 +70,7 @@
// Temporary option for testing new coalescer algo.
static cl::opt<bool>
-NewCoalescer("new-coalescer", cl::Hidden,
+NewCoalescer("new-coalescer", cl::Hidden, cl::init(true),
cl::desc("Use new coalescer algorithm"));
namespace {
@@ -1732,6 +1732,12 @@
case CR_Replace:
// This value takes precedence over the value in Other.LI.
LIS->pruneValue(&Other.LI, Def, &EndPoints);
+ // Remove <def,read-undef> flags. This def is now a partial redef.
+ if (!Def.isBlock())
+ for (MIOperands MO(Indexes->getInstructionFromIndex(Def));
+ MO.isValid(); ++MO)
+ if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg)
+ MO->setIsUndef(false);
DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def
<< ": " << Other.LI << '\n');
break;
Modified: llvm/branches/R600/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/branches/R600/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Oct 2 09:15:33 2012
@@ -2999,7 +2999,7 @@
SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt);
- else if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
+ if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt);
return DAG.getNode(ISD::OR, N->getDebugLoc(), VT,
DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt),
@@ -3217,11 +3217,8 @@
if ((LShVal + RShVal) != OpSizeInBits)
return 0;
- SDValue Rot;
- if (HasROTL)
- Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt);
- else
- Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt);
+ SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
+ LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
@@ -3254,12 +3251,8 @@
if (ConstantSDNode *SUBC =
dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) {
if (SUBC->getAPIntValue() == OpSizeInBits) {
- if (HasROTL)
- return DAG.getNode(ISD::ROTL, DL, VT,
- LHSShiftArg, LHSShiftAmt).getNode();
- else
- return DAG.getNode(ISD::ROTR, DL, VT,
- LHSShiftArg, RHSShiftAmt).getNode();
+ return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
+ HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
}
}
}
@@ -3271,25 +3264,21 @@
if (ConstantSDNode *SUBC =
dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) {
if (SUBC->getAPIntValue() == OpSizeInBits) {
- if (HasROTR)
- return DAG.getNode(ISD::ROTR, DL, VT,
- LHSShiftArg, RHSShiftAmt).getNode();
- else
- return DAG.getNode(ISD::ROTL, DL, VT,
- LHSShiftArg, LHSShiftAmt).getNode();
+ return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg,
+ HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
}
}
}
// Look for sign/zext/any-extended or truncate cases:
- if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
- || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
- || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
- || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
- (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
- || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
- || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
- || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
+ if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
+ (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
SDValue LExtOp0 = LHSShiftAmt.getOperand(0);
SDValue RExtOp0 = RHSShiftAmt.getOperand(0);
if (RExtOp0.getOpcode() == ISD::SUB &&
@@ -4428,20 +4417,18 @@
// If the desired elements are smaller or larger than the source
// elements we can use a matching integer vector type and then
// truncate/sign extend
- else {
- EVT MatchingElementType =
- EVT::getIntegerVT(*DAG.getContext(),
- N0VT.getScalarType().getSizeInBits());
- EVT MatchingVectorType =
- EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
- N0VT.getVectorNumElements());
+ EVT MatchingElementType =
+ EVT::getIntegerVT(*DAG.getContext(),
+ N0VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType =
+ EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+ N0VT.getVectorNumElements());
- if (SVT == MatchingVectorType) {
- SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType,
- N0.getOperand(0), N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
- }
+ if (SVT == MatchingVectorType) {
+ SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType,
+ N0.getOperand(0), N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
}
}
@@ -5251,13 +5238,12 @@
// if the source is smaller than the dest, we still need an extend
return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
N0.getOperand(0));
- else if (N0.getOperand(0).getValueType().bitsGT(VT))
+ if (N0.getOperand(0).getValueType().bitsGT(VT))
// if the source is larger than the dest, than we just need the truncate
return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
- else
- // if the source and dest are the same type, we can drop both the extend
- // and the truncate.
- return N0.getOperand(0);
+ // if the source and dest are the same type, we can drop both the extend
+ // and the truncate.
+ return N0.getOperand(0);
}
// Fold extract-and-trunc into a narrow extract. For example:
Modified: llvm/branches/R600/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/branches/R600/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Tue Oct 2 09:15:33 2012
@@ -136,13 +136,11 @@
// constants are.
SDValue NotZero = N->getOperand(i);
unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
- if (isa<ConstantSDNode>(NotZero)) {
- if (cast<ConstantSDNode>(NotZero)->getAPIntValue().countTrailingOnes() <
- EltSize)
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) {
+ if (CN->getAPIntValue().countTrailingOnes() < EltSize)
return false;
- } else if (isa<ConstantFPSDNode>(NotZero)) {
- if (cast<ConstantFPSDNode>(NotZero)->getValueAPF()
- .bitcastToAPInt().countTrailingOnes() < EltSize)
+ } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(NotZero)) {
+ if (CFPN->getValueAPF().bitcastToAPInt().countTrailingOnes() < EltSize)
return false;
} else
return false;
@@ -179,11 +177,11 @@
// Do not accept build_vectors that aren't all constants or which have non-0
// elements.
SDValue Zero = N->getOperand(i);
- if (isa<ConstantSDNode>(Zero)) {
- if (!cast<ConstantSDNode>(Zero)->isNullValue())
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) {
+ if (!CN->isNullValue())
return false;
- } else if (isa<ConstantFPSDNode>(Zero)) {
- if (!cast<ConstantFPSDNode>(Zero)->getValueAPF().isPosZero())
+ } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) {
+ if (!CFPN->getValueAPF().isPosZero())
return false;
} else
return false;
@@ -3521,7 +3519,7 @@
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ bool OptSize = MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr();
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -3614,7 +3612,7 @@
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ bool OptSize = MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr();
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -3692,7 +3690,7 @@
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ bool OptSize = MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr();
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
Modified: llvm/branches/R600/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original)
+++ llvm/branches/R600/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Tue Oct 2 09:15:33 2012
@@ -89,7 +89,7 @@
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
const SDValue *Parts, unsigned NumParts,
- EVT PartVT, EVT ValueVT);
+ EVT PartVT, EVT ValueVT, const Value *V);
/// getCopyFromParts - Create a value that contains the specified legal parts
/// combined into the value they represent. If the parts combine to a type
@@ -99,9 +99,11 @@
static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
const SDValue *Parts,
unsigned NumParts, EVT PartVT, EVT ValueVT,
+ const Value *V,
ISD::NodeType AssertOp = ISD::DELETED_NODE) {
if (ValueVT.isVector())
- return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT);
+ return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
+ PartVT, ValueVT, V);
assert(NumParts > 0 && "No parts to assemble!");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -125,9 +127,9 @@
if (RoundParts > 2) {
Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
- PartVT, HalfVT);
+ PartVT, HalfVT, V);
Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
- RoundParts / 2, PartVT, HalfVT);
+ RoundParts / 2, PartVT, HalfVT, V);
} else {
Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
@@ -143,7 +145,7 @@
unsigned OddParts = NumParts - RoundParts;
EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
Hi = getCopyFromParts(DAG, DL,
- Parts + RoundParts, OddParts, PartVT, OddVT);
+ Parts + RoundParts, OddParts, PartVT, OddVT, V);
// Combine the round and odd parts.
Lo = Val;
@@ -172,7 +174,7 @@
assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
!PartVT.isVector() && "Unexpected split");
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
- Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT);
+ Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V);
}
}
@@ -210,14 +212,14 @@
llvm_unreachable("Unknown mismatch!");
}
-/// getCopyFromParts - Create a value that contains the specified legal parts
-/// combined into the value they represent. If the parts combine to a type
-/// larger then ValueVT then AssertOp can be used to specify whether the extra
-/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
-/// (ISD::AssertSext).
+/// getCopyFromPartsVector - Create a value that contains the specified legal
+/// parts combined into the value they represent. If the parts combine to a
+/// type larger then ValueVT then AssertOp can be used to specify whether the
+/// extra bits are known to be zero (ISD::AssertZext) or sign extended from
+/// ValueVT (ISD::AssertSext).
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
const SDValue *Parts, unsigned NumParts,
- EVT PartVT, EVT ValueVT) {
+ EVT PartVT, EVT ValueVT, const Value *V) {
assert(ValueVT.isVector() && "Not a vector value");
assert(NumParts > 0 && "No parts to assemble!");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -243,7 +245,7 @@
// as appropriate.
for (unsigned i = 0; i != NumParts; ++i)
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
- PartVT, IntermediateVT);
+ PartVT, IntermediateVT, V);
} else if (NumParts > 0) {
// If the intermediate type was expanded, build the intermediate
// operands from the parts.
@@ -252,7 +254,7 @@
unsigned Factor = NumParts / NumIntermediates;
for (unsigned i = 0; i != NumIntermediates; ++i)
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
- PartVT, IntermediateVT);
+ PartVT, IntermediateVT, V);
}
// Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
@@ -300,8 +302,19 @@
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
// Handle cases such as i8 -> <1 x i1>
- assert(ValueVT.getVectorNumElements() == 1 &&
- "Only trivial scalar-to-vector conversions should get here!");
+ if (ValueVT.getVectorNumElements() != 1) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Twine ErrMsg("non-trivial scalar-to-vector conversion");
+ if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) {
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (isa<InlineAsm>(CI->getCalledValue()))
+ ErrMsg = ErrMsg + ", possible invalid constraint for vector type";
+ Ctx.emitError(I, ErrMsg);
+ } else {
+ Ctx.emitError(ErrMsg);
+ }
+ report_fatal_error("Cannot handle scalar-to-vector conversion!");
+ }
if (ValueVT.getVectorNumElements() == 1 &&
ValueVT.getVectorElementType() != PartVT) {
@@ -313,25 +326,22 @@
return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
}
-
-
-
static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,
SDValue Val, SDValue *Parts, unsigned NumParts,
- EVT PartVT);
+ EVT PartVT, const Value *V);
/// getCopyToParts - Create a series of nodes that contain the specified value
/// split into legal parts. If the parts contain more bits than Val, then, for
/// integers, ExtendKind can be used to specify how to generate the extra bits.
static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
SDValue Val, SDValue *Parts, unsigned NumParts,
- EVT PartVT,
+ EVT PartVT, const Value *V,
ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
EVT ValueVT = Val.getValueType();
// Handle the vector case separately.
if (ValueVT.isVector())
- return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT);
+ return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned PartBits = PartVT.getSizeInBits();
@@ -383,7 +393,19 @@
"Failed to tile the value with PartVT!");
if (NumParts == 1) {
- assert(PartVT == ValueVT && "Type conversion failed!");
+ if (PartVT != ValueVT) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Twine ErrMsg("scalar-to-vector conversion failed");
+ if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) {
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (isa<InlineAsm>(CI->getCalledValue()))
+ ErrMsg = ErrMsg + ", possible invalid constraint for vector type";
+ Ctx.emitError(I, ErrMsg);
+ } else {
+ Ctx.emitError(ErrMsg);
+ }
+ }
+
Parts[0] = Val;
return;
}
@@ -398,7 +420,7 @@
unsigned OddParts = NumParts - RoundParts;
SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
DAG.getIntPtrConstant(RoundBits));
- getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT);
+ getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V);
if (TLI.isBigEndian())
// The odd parts were reversed by getCopyToParts - unreverse them.
@@ -444,7 +466,7 @@
/// value split into legal parts.
static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
SDValue Val, SDValue *Parts, unsigned NumParts,
- EVT PartVT) {
+ EVT PartVT, const Value *V) {
EVT ValueVT = Val.getValueType();
assert(ValueVT.isVector() && "Not a vector");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -530,7 +552,7 @@
// If the register was not expanded, promote or copy the value,
// as appropriate.
for (unsigned i = 0; i != NumParts; ++i)
- getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT);
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V);
} else if (NumParts > 0) {
// If the intermediate type was expanded, split each the value into
// legal parts.
@@ -538,13 +560,10 @@
"Must expand into a divisible number of parts!");
unsigned Factor = NumParts / NumIntermediates;
for (unsigned i = 0; i != NumIntermediates; ++i)
- getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT);
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V);
}
}
-
-
-
namespace {
/// RegsForValue - This struct represents the registers (physical or virtual)
/// that a particular set of values is assigned, and the type information
@@ -622,14 +641,15 @@
/// If the Flag pointer is NULL, no flag is used.
SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
DebugLoc dl,
- SDValue &Chain, SDValue *Flag) const;
+ SDValue &Chain, SDValue *Flag,
+ const Value *V = 0) const;
/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
/// specified value into the registers specified by this object. This uses
/// Chain/Flag as the input and updates them for the output Chain/Flag.
/// If the Flag pointer is NULL, no flag is used.
void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
- SDValue &Chain, SDValue *Flag) const;
+ SDValue &Chain, SDValue *Flag, const Value *V) const;
/// AddInlineAsmOperands - Add this value to the specified inlineasm node
/// operand list. This adds the code marker, matching input operand index
@@ -648,7 +668,8 @@
SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
FunctionLoweringInfo &FuncInfo,
DebugLoc dl,
- SDValue &Chain, SDValue *Flag) const {
+ SDValue &Chain, SDValue *Flag,
+ const Value *V) const {
// A Value with type {} or [0 x %t] needs no registers.
if (ValueVTs.empty())
return SDValue();
@@ -722,7 +743,7 @@
}
Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
- NumRegs, RegisterVT, ValueVT);
+ NumRegs, RegisterVT, ValueVT, V);
Part += NumRegs;
Parts.clear();
}
@@ -737,7 +758,8 @@
/// Chain/Flag as the input and updates them for the output Chain/Flag.
/// If the Flag pointer is NULL, no flag is used.
void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
- SDValue &Chain, SDValue *Flag) const {
+ SDValue &Chain, SDValue *Flag,
+ const Value *V) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Get the list of the values's legal parts.
@@ -749,7 +771,7 @@
EVT RegisterVT = RegVTs[Value];
getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
- &Parts[Part], NumParts, RegisterVT);
+ &Parts[Part], NumParts, RegisterVT, V);
Part += NumParts;
}
@@ -994,7 +1016,7 @@
unsigned InReg = It->second;
RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
SDValue Chain = DAG.getEntryNode();
- N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
+ N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V);
resolveDanglingDebugInfo(V, N);
return N;
}
@@ -1149,7 +1171,7 @@
unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType());
SDValue Chain = DAG.getEntryNode();
- return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
+ return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V);
}
llvm_unreachable("Can't get register for value!");
@@ -1218,7 +1240,7 @@
SmallVector<SDValue, 4> Parts(NumParts);
getCopyToParts(DAG, getCurDebugLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + j),
- &Parts[0], NumParts, PartVT, ExtendKind);
+ &Parts[0], NumParts, PartVT, &I, ExtendKind);
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
@@ -2093,7 +2115,7 @@
for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
TSize += I->size();
- if (!areJTsAllowed(TLI) || TSize.ult(4))
+ if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries()))
return false;
APInt Range = ComputeRange(First, Last);
@@ -2565,9 +2587,10 @@
if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB))
continue;
- // If the switch has more than 5 blocks, and at least 40% dense, and the
+ // If the switch has more than N blocks, and is at least 40% dense, and the
// target supports indirect branches, then emit a jump table rather than
// lowering the switch to a binary tree of conditional branches.
+ // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries().
if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
continue;
@@ -4377,7 +4400,7 @@
return DAG.getConstantFP(1.0, LHS.getValueType());
const Function *F = DAG.getMachineFunction().getFunction();
- if (!F->hasFnAttr(Attribute::OptimizeForSize) ||
+ if (!F->getFnAttributes().hasOptimizeForSizeAttr() ||
// If optimizing for size, don't insert too many multiplies. This
// inserts up to 5 multiplies.
CountPopulation_32(Val)+Log2_32(Val) < 7) {
@@ -6222,7 +6245,7 @@
// Use the produced MatchedRegs object to
MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
- Chain, &Flag);
+ Chain, &Flag, CS.getInstruction());
MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
true, OpInfo.getMatchedOperand(),
DAG, AsmNodeOperands);
@@ -6304,7 +6327,7 @@
}
OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
- Chain, &Flag);
+ Chain, &Flag, CS.getInstruction());
OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
DAG, AsmNodeOperands);
@@ -6335,7 +6358,7 @@
// and set it as the value of the call.
if (!RetValRegs.Regs.empty()) {
SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
- Chain, &Flag);
+ Chain, &Flag, CS.getInstruction());
// FIXME: Why don't we do this for inline asms with MRVs?
if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
@@ -6375,7 +6398,7 @@
RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
const Value *Ptr = IndirectStoresToEmit[i].second;
SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
- Chain, &Flag);
+ Chain, &Flag, IA);
StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
}
@@ -6490,7 +6513,7 @@
ExtendKind = ISD::ZERO_EXTEND;
getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts,
- PartVT, ExtendKind);
+ PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind);
for (unsigned j = 0; j != NumParts; ++j) {
// if it isn't first piece, alignment must be 1
@@ -6571,7 +6594,7 @@
unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
- NumRegs, RegisterVT, VT,
+ NumRegs, RegisterVT, VT, NULL,
AssertOp));
CurReg += NumRegs;
}
@@ -6610,7 +6633,7 @@
RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
SDValue Chain = DAG.getEntryNode();
- RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
+ RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0, V);
PendingExports.push_back(Chain);
}
@@ -6752,7 +6775,7 @@
EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
ISD::NodeType AssertOp = ISD::DELETED_NODE;
SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
- RegVT, VT, AssertOp);
+ RegVT, VT, NULL, AssertOp);
MachineFunction& MF = SDB->DAG.getMachineFunction();
MachineRegisterInfo& RegInfo = MF.getRegInfo();
@@ -6793,7 +6816,7 @@
ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
NumParts, PartVT, VT,
- AssertOp));
+ NULL, AssertOp));
}
i += NumParts;
Modified: llvm/branches/R600/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/branches/R600/lib/CodeGen/SelectionDAG/TargetLowering.cpp Tue Oct 2 09:15:33 2012
@@ -613,6 +613,7 @@
ShouldFoldAtomicFences = false;
InsertFencesForAtomic = false;
SupportJumpTables = true;
+ MinimumJumpTableEntries = 4;
InitLibcallNames(LibcallRoutineNames);
InitCmpLibcallCCs(CmpLibcallCCs);
Modified: llvm/branches/R600/lib/CodeGen/StackProtector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/CodeGen/StackProtector.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/CodeGen/StackProtector.cpp (original)
+++ llvm/branches/R600/lib/CodeGen/StackProtector.cpp Tue Oct 2 09:15:33 2012
@@ -137,10 +137,10 @@
/// add a guard variable to functions that call alloca, and functions with
/// buffers larger than SSPBufferSize bytes.
bool StackProtector::RequiresStackProtector() const {
- if (F->hasFnAttr(Attribute::StackProtectReq))
+ if (F->getFnAttributes().hasStackProtectReqAttr())
return true;
- if (!F->hasFnAttr(Attribute::StackProtect))
+ if (!F->getFnAttributes().hasStackProtectAttr())
return false;
for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
Modified: llvm/branches/R600/lib/CodeGen/TailDuplication.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/CodeGen/TailDuplication.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/CodeGen/TailDuplication.cpp (original)
+++ llvm/branches/R600/lib/CodeGen/TailDuplication.cpp Tue Oct 2 09:15:33 2012
@@ -552,7 +552,7 @@
// compensate for the duplication.
unsigned MaxDuplicateCount;
if (TailDuplicateSize.getNumOccurrences() == 0 &&
- MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr())
MaxDuplicateCount = 1;
else
MaxDuplicateCount = TailDuplicateSize;
Modified: llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt (original)
+++ llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt Tue Oct 2 09:15:33 2012
@@ -1,11 +1,6 @@
-
-include_directories( ${LLVM_INTEL_JITEVENTS_INCDIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-set(system_libs
- ${system_libs}
- jitprofiling
- )
+include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMIntelJITEvents
IntelJITEventListener.cpp
+ jitprofiling.c
)
Modified: llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp (original)
+++ llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp Tue Oct 2 09:15:33 2012
@@ -22,12 +22,12 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/ExecutionEngine/IntelJITEventsWrapper.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Errno.h"
#include "llvm/Support/ValueHandle.h"
#include "EventListenerCommon.h"
+#include "IntelJITEventsWrapper.h"
using namespace llvm;
using namespace llvm::jitprofiling;
@@ -37,13 +37,13 @@
class IntelJITEventListener : public JITEventListener {
typedef DenseMap<void*, unsigned int> MethodIDMap;
- IntelJITEventsWrapper& Wrapper;
+ OwningPtr<IntelJITEventsWrapper> Wrapper;
MethodIDMap MethodIDs;
FilenameCache Filenames;
public:
- IntelJITEventListener(IntelJITEventsWrapper& libraryWrapper)
- : Wrapper(libraryWrapper) {
+ IntelJITEventListener(IntelJITEventsWrapper* libraryWrapper) {
+ Wrapper.reset(libraryWrapper);
}
~IntelJITEventListener() {
@@ -94,7 +94,7 @@
void IntelJITEventListener::NotifyFunctionEmitted(
const Function &F, void *FnStart, size_t FnSize,
const EmittedFunctionDetails &Details) {
- iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(Wrapper,
+ iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(*Wrapper,
F.getName().data(),
reinterpret_cast<uint64_t>(FnStart),
FnSize);
@@ -151,15 +151,15 @@
FunctionMessage.line_number_table = 0;
}
- Wrapper.iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
- &FunctionMessage);
+ Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
+ &FunctionMessage);
MethodIDs[FnStart] = FunctionMessage.method_id;
}
void IntelJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
MethodIDMap::iterator I = MethodIDs.find(FnStart);
if (I != MethodIDs.end()) {
- Wrapper.iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START, &I->second);
+ Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START, &I->second);
MethodIDs.erase(I);
}
}
@@ -168,15 +168,13 @@
namespace llvm {
JITEventListener *JITEventListener::createIntelJITEventListener() {
- static OwningPtr<IntelJITEventsWrapper> JITProfilingWrapper(
- new IntelJITEventsWrapper);
- return new IntelJITEventListener(*JITProfilingWrapper);
+ return new IntelJITEventListener(new IntelJITEventsWrapper);
}
// for testing
JITEventListener *JITEventListener::createIntelJITEventListener(
IntelJITEventsWrapper* TestImpl) {
- return new IntelJITEventListener(*TestImpl);
+ return new IntelJITEventListener(TestImpl);
}
} // namespace llvm
Copied: llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h (from r165010, llvm/branches/R600/include/llvm/ExecutionEngine/IntelJITEventsWrapper.h)
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h?p2=llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h&p1=llvm/branches/R600/include/llvm/ExecutionEngine/IntelJITEventsWrapper.h&r1=165010&r2=165011&rev=165011&view=diff
==============================================================================
--- llvm/branches/R600/include/llvm/ExecutionEngine/IntelJITEventsWrapper.h (original)
+++ llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h Tue Oct 2 09:15:33 2012
@@ -18,7 +18,7 @@
#ifndef INTEL_JIT_EVENTS_WRAPPER_H
#define INTEL_JIT_EVENTS_WRAPPER_H
-#include <jitprofiling.h>
+#include "jitprofiling.h"
namespace llvm {
Modified: llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/Makefile
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/Makefile?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/Makefile (original)
+++ llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/Makefile Tue Oct 2 09:15:33 2012
@@ -11,7 +11,8 @@
include $(LEVEL)/Makefile.config
-SOURCES := IntelJITEventListener.cpp
-CPPFLAGS += -I$(INTEL_JITEVENTS_INCDIR) -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+SOURCES := IntelJITEventListener.cpp \
+ jitprofiling.c
+CPPFLAGS += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LLVM_SRC_ROOT)/Makefile.rules
Added: llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h (added)
+++ llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h Tue Oct 2 09:15:33 2012
@@ -0,0 +1,449 @@
+/*===-- ittnotify_config.h - JIT Profiling API internal config-----*- C -*-===*
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time)
+ * Profiling API internal config.
+ *
+ *===----------------------------------------------------------------------===*/
+#ifndef _ITTNOTIFY_CONFIG_H_
+#define _ITTNOTIFY_CONFIG_H_
+
+/** @cond exclude_from_documentation */
+#ifndef ITT_OS_WIN
+# define ITT_OS_WIN 1
+#endif /* ITT_OS_WIN */
+
+#ifndef ITT_OS_LINUX
+# define ITT_OS_LINUX 2
+#endif /* ITT_OS_LINUX */
+
+#ifndef ITT_OS_MAC
+# define ITT_OS_MAC 3
+#endif /* ITT_OS_MAC */
+
+#ifndef ITT_OS
+# if defined WIN32 || defined _WIN32
+# define ITT_OS ITT_OS_WIN
+# elif defined( __APPLE__ ) && defined( __MACH__ )
+# define ITT_OS ITT_OS_MAC
+# else
+# define ITT_OS ITT_OS_LINUX
+# endif
+#endif /* ITT_OS */
+
+#ifndef ITT_PLATFORM_WIN
+# define ITT_PLATFORM_WIN 1
+#endif /* ITT_PLATFORM_WIN */
+
+#ifndef ITT_PLATFORM_POSIX
+# define ITT_PLATFORM_POSIX 2
+#endif /* ITT_PLATFORM_POSIX */
+
+#ifndef ITT_PLATFORM
+# if ITT_OS==ITT_OS_WIN
+# define ITT_PLATFORM ITT_PLATFORM_WIN
+# else
+# define ITT_PLATFORM ITT_PLATFORM_POSIX
+# endif /* _WIN32 */
+#endif /* ITT_PLATFORM */
+
+#if defined(_UNICODE) && !defined(UNICODE)
+#define UNICODE
+#endif
+
+#include <stddef.h>
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#include <tchar.h>
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <stdint.h>
+#if defined(UNICODE) || defined(_UNICODE)
+#include <wchar.h>
+#endif /* UNICODE || _UNICODE */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef CDECL
+# if ITT_PLATFORM==ITT_PLATFORM_WIN
+# define CDECL __cdecl
+# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
+# define CDECL /* not actual on x86_64 platform */
+# else /* _M_X64 || _M_AMD64 || __x86_64__ */
+# define CDECL __attribute__ ((cdecl))
+# endif /* _M_X64 || _M_AMD64 || __x86_64__ */
+# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* CDECL */
+
+#ifndef STDCALL
+# if ITT_PLATFORM==ITT_PLATFORM_WIN
+# define STDCALL __stdcall
+# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
+# define STDCALL /* not supported on x86_64 platform */
+# else /* _M_X64 || _M_AMD64 || __x86_64__ */
+# define STDCALL __attribute__ ((stdcall))
+# endif /* _M_X64 || _M_AMD64 || __x86_64__ */
+# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* STDCALL */
+
+#define ITTAPI CDECL
+#define LIBITTAPI CDECL
+
+/* TODO: Temporary for compatibility! */
+#define ITTAPI_CALL CDECL
+#define LIBITTAPI_CALL CDECL
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+/* use __forceinline (VC++ specific) */
+#define ITT_INLINE __forceinline
+#define ITT_INLINE_ATTRIBUTE /* nothing */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/*
+ * Generally, functions are not inlined unless optimization is specified.
+ * For functions declared inline, this attribute inlines the function even
+ * if no optimization level was specified.
+ */
+#ifdef __STRICT_ANSI__
+#define ITT_INLINE static
+#else /* __STRICT_ANSI__ */
+#define ITT_INLINE static inline
+#endif /* __STRICT_ANSI__ */
+#define ITT_INLINE_ATTRIBUTE __attribute__ ((always_inline))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/** @endcond */
+
+#ifndef ITT_ARCH_IA32
+# define ITT_ARCH_IA32 1
+#endif /* ITT_ARCH_IA32 */
+
+#ifndef ITT_ARCH_IA32E
+# define ITT_ARCH_IA32E 2
+#endif /* ITT_ARCH_IA32E */
+
+#ifndef ITT_ARCH_IA64
+# define ITT_ARCH_IA64 3
+#endif /* ITT_ARCH_IA64 */
+
+#ifndef ITT_ARCH
+# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
+# define ITT_ARCH ITT_ARCH_IA32E
+# elif defined _M_IA64 || defined __ia64
+# define ITT_ARCH ITT_ARCH_IA64
+# else
+# define ITT_ARCH ITT_ARCH_IA32
+# endif
+#endif
+
+#ifdef __cplusplus
+# define ITT_EXTERN_C extern "C"
+#else
+# define ITT_EXTERN_C /* nothing */
+#endif /* __cplusplus */
+
+#define ITT_TO_STR_AUX(x) #x
+#define ITT_TO_STR(x) ITT_TO_STR_AUX(x)
+
+#define __ITT_BUILD_ASSERT(expr, suffix) do { \
+ static char __itt_build_check_##suffix[(expr) ? 1 : -1]; \
+ __itt_build_check_##suffix[0] = 0; \
+} while(0)
+#define _ITT_BUILD_ASSERT(expr, suffix) __ITT_BUILD_ASSERT((expr), suffix)
+#define ITT_BUILD_ASSERT(expr) _ITT_BUILD_ASSERT((expr), __LINE__)
+
+#define ITT_MAGIC { 0xED, 0xAB, 0xAB, 0xEC, 0x0D, 0xEE, 0xDA, 0x30 }
+
+/* Replace with snapshot date YYYYMMDD for promotion build. */
+#define API_VERSION_BUILD 20111111
+
+#ifndef API_VERSION_NUM
+#define API_VERSION_NUM 0.0.0
+#endif /* API_VERSION_NUM */
+
+#define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) \
+ " (" ITT_TO_STR(API_VERSION_BUILD) ")"
+
+/* OS communication functions */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#include <windows.h>
+typedef HMODULE lib_t;
+typedef DWORD TIDT;
+typedef CRITICAL_SECTION mutex_t;
+#define MUTEX_INITIALIZER { 0 }
+#define strong_alias(name, aliasname) /* empty for Windows */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <dlfcn.h>
+#if defined(UNICODE) || defined(_UNICODE)
+#include <wchar.h>
+#endif /* UNICODE */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1 /* need for PTHREAD_MUTEX_RECURSIVE */
+#endif /* _GNU_SOURCE */
+#include <pthread.h>
+typedef void* lib_t;
+typedef pthread_t TIDT;
+typedef pthread_mutex_t mutex_t;
+#define MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
+#define _strong_alias(name, aliasname) \
+ extern __typeof (name) aliasname __attribute__ ((alias (#name)));
+#define strong_alias(name, aliasname) _strong_alias(name, aliasname)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_get_proc(lib, name) GetProcAddress(lib, name)
+#define __itt_mutex_init(mutex) InitializeCriticalSection(mutex)
+#define __itt_mutex_lock(mutex) EnterCriticalSection(mutex)
+#define __itt_mutex_unlock(mutex) LeaveCriticalSection(mutex)
+#define __itt_load_lib(name) LoadLibraryA(name)
+#define __itt_unload_lib(handle) FreeLibrary(handle)
+#define __itt_system_error() (int)GetLastError()
+#define __itt_fstrcmp(s1, s2) lstrcmpA(s1, s2)
+#define __itt_fstrlen(s) lstrlenA(s)
+#define __itt_fstrcpyn(s1, s2, l) lstrcpynA(s1, s2, l)
+#define __itt_fstrdup(s) _strdup(s)
+#define __itt_thread_id() GetCurrentThreadId()
+#define __itt_thread_yield() SwitchToThread()
+#ifndef ITT_SIMPLE_INIT
+ITT_INLINE long
+__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE long __itt_interlocked_increment(volatile long* ptr)
+{
+ return InterlockedIncrement(ptr);
+}
+#endif /* ITT_SIMPLE_INIT */
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+#define __itt_get_proc(lib, name) dlsym(lib, name)
+#define __itt_mutex_init(mutex) {\
+ pthread_mutexattr_t mutex_attr; \
+ int error_code = pthread_mutexattr_init(&mutex_attr); \
+ if (error_code) \
+ __itt_report_error(__itt_error_system, "pthread_mutexattr_init", \
+ error_code); \
+ error_code = pthread_mutexattr_settype(&mutex_attr, \
+ PTHREAD_MUTEX_RECURSIVE); \
+ if (error_code) \
+ __itt_report_error(__itt_error_system, "pthread_mutexattr_settype", \
+ error_code); \
+ error_code = pthread_mutex_init(mutex, &mutex_attr); \
+ if (error_code) \
+ __itt_report_error(__itt_error_system, "pthread_mutex_init", \
+ error_code); \
+ error_code = pthread_mutexattr_destroy(&mutex_attr); \
+ if (error_code) \
+ __itt_report_error(__itt_error_system, "pthread_mutexattr_destroy", \
+ error_code); \
+}
+#define __itt_mutex_lock(mutex) pthread_mutex_lock(mutex)
+#define __itt_mutex_unlock(mutex) pthread_mutex_unlock(mutex)
+#define __itt_load_lib(name) dlopen(name, RTLD_LAZY)
+#define __itt_unload_lib(handle) dlclose(handle)
+#define __itt_system_error() errno
+#define __itt_fstrcmp(s1, s2) strcmp(s1, s2)
+#define __itt_fstrlen(s) strlen(s)
+#define __itt_fstrcpyn(s1, s2, l) strncpy(s1, s2, l)
+#define __itt_fstrdup(s) strdup(s)
+#define __itt_thread_id() pthread_self()
+#define __itt_thread_yield() sched_yield()
+#if ITT_ARCH==ITT_ARCH_IA64
+#ifdef __INTEL_COMPILER
+#define __TBB_machine_fetchadd4(addr, val) __fetchadd4_acq((void *)addr, val)
+#else /* __INTEL_COMPILER */
+/* TODO: Add Support for not Intel compilers for IA64 */
+#endif /* __INTEL_COMPILER */
+#else /* ITT_ARCH!=ITT_ARCH_IA64 */
+ITT_INLINE long
+__TBB_machine_fetchadd4(volatile void* ptr, long addend) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend)
+{
+ long result;
+ __asm__ __volatile__("lock\nxadd %0,%1"
+ : "=r"(result),"=m"(*(long*)ptr)
+ : "0"(addend), "m"(*(long*)ptr)
+ : "memory");
+ return result;
+}
+#endif /* ITT_ARCH==ITT_ARCH_IA64 */
+#ifndef ITT_SIMPLE_INIT
+ITT_INLINE long
+__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE long __itt_interlocked_increment(volatile long* ptr)
+{
+ return __TBB_machine_fetchadd4(ptr, 1) + 1L;
+}
+#endif /* ITT_SIMPLE_INIT */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+typedef enum {
+ __itt_collection_normal = 0,
+ __itt_collection_paused = 1
+} __itt_collection_state;
+
+typedef enum {
+ __itt_thread_normal = 0,
+ __itt_thread_ignored = 1
+} __itt_thread_state;
+
+#pragma pack(push, 8)
+
+typedef struct ___itt_thread_info
+{
+ const char* nameA; /*!< Copy of original name in ASCII. */
+#if defined(UNICODE) || defined(_UNICODE)
+ const wchar_t* nameW; /*!< Copy of original name in UNICODE. */
+#else /* UNICODE || _UNICODE */
+ void* nameW;
+#endif /* UNICODE || _UNICODE */
+ TIDT tid;
+ __itt_thread_state state; /*!< Thread state (paused or normal) */
+ int extra1; /*!< Reserved to the runtime */
+ void* extra2; /*!< Reserved to the runtime */
+ struct ___itt_thread_info* next;
+} __itt_thread_info;
+
+#include "ittnotify_types.h" /* For __itt_group_id definition */
+
+typedef struct ___itt_api_info_20101001
+{
+ const char* name;
+ void** func_ptr;
+ void* init_func;
+ __itt_group_id group;
+} __itt_api_info_20101001;
+
+typedef struct ___itt_api_info
+{
+ const char* name;
+ void** func_ptr;
+ void* init_func;
+ void* null_func;
+ __itt_group_id group;
+} __itt_api_info;
+
+struct ___itt_domain;
+struct ___itt_string_handle;
+
+typedef struct ___itt_global
+{
+ unsigned char magic[8];
+ unsigned long version_major;
+ unsigned long version_minor;
+ unsigned long version_build;
+ volatile long api_initialized;
+ volatile long mutex_initialized;
+ volatile long atomic_counter;
+ mutex_t mutex;
+ lib_t lib;
+ void* error_handler;
+ const char** dll_path_ptr;
+ __itt_api_info* api_list_ptr;
+ struct ___itt_global* next;
+ /* Joinable structures below */
+ __itt_thread_info* thread_list;
+ struct ___itt_domain* domain_list;
+ struct ___itt_string_handle* string_list;
+ __itt_collection_state state;
+} __itt_global;
+
+#pragma pack(pop)
+
+#define NEW_THREAD_INFO_W(gptr,h,h_tail,t,s,n) { \
+ h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \
+ if (h != NULL) { \
+ h->tid = t; \
+ h->nameA = NULL; \
+ h->nameW = n ? _wcsdup(n) : NULL; \
+ h->state = s; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->thread_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#define NEW_THREAD_INFO_A(gptr,h,h_tail,t,s,n) { \
+ h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \
+ if (h != NULL) { \
+ h->tid = t; \
+ h->nameA = n ? __itt_fstrdup(n) : NULL; \
+ h->nameW = NULL; \
+ h->state = s; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->thread_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#define NEW_DOMAIN_W(gptr,h,h_tail,name) { \
+ h = (__itt_domain*)malloc(sizeof(__itt_domain)); \
+ if (h != NULL) { \
+ h->flags = 0; /* domain is disabled by default */ \
+ h->nameA = NULL; \
+ h->nameW = name ? _wcsdup(name) : NULL; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->domain_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#define NEW_DOMAIN_A(gptr,h,h_tail,name) { \
+ h = (__itt_domain*)malloc(sizeof(__itt_domain)); \
+ if (h != NULL) { \
+ h->flags = 0; /* domain is disabled by default */ \
+ h->nameA = name ? __itt_fstrdup(name) : NULL; \
+ h->nameW = NULL; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->domain_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#define NEW_STRING_HANDLE_W(gptr,h,h_tail,name) { \
+ h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \
+ if (h != NULL) { \
+ h->strA = NULL; \
+ h->strW = name ? _wcsdup(name) : NULL; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->string_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#define NEW_STRING_HANDLE_A(gptr,h,h_tail,name) { \
+ h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \
+ if (h != NULL) { \
+ h->strA = name ? __itt_fstrdup(name) : NULL; \
+ h->strW = NULL; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->string_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#endif /* _ITTNOTIFY_CONFIG_H_ */
Added: llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h (added)
+++ llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h Tue Oct 2 09:15:33 2012
@@ -0,0 +1,63 @@
+//===-- ittnotify_types.h - Intel(R) Performance Analyzer JIT (Just-In-Time) Profiling API internal types. ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef _ITTNOTIFY_TYPES_H_
+#define _ITTNOTIFY_TYPES_H_
+
+typedef enum ___itt_group_id
+{
+ __itt_group_none = 0,
+ __itt_group_legacy = 1<<0,
+ __itt_group_control = 1<<1,
+ __itt_group_thread = 1<<2,
+ __itt_group_mark = 1<<3,
+ __itt_group_sync = 1<<4,
+ __itt_group_fsync = 1<<5,
+ __itt_group_jit = 1<<6,
+ __itt_group_model = 1<<7,
+ __itt_group_splitter_min = 1<<7,
+ __itt_group_counter = 1<<8,
+ __itt_group_frame = 1<<9,
+ __itt_group_stitch = 1<<10,
+ __itt_group_heap = 1<<11,
+ __itt_group_splitter_max = 1<<12,
+ __itt_group_structure = 1<<12,
+ __itt_group_suppress = 1<<13,
+ __itt_group_all = -1
+} __itt_group_id;
+
+#pragma pack(push, 8)
+
+typedef struct ___itt_group_list
+{
+ __itt_group_id id;
+ const char* name;
+} __itt_group_list;
+
+#pragma pack(pop)
+
+#define ITT_GROUP_LIST(varname) \
+ static __itt_group_list varname[] = { \
+ { __itt_group_all, "all" }, \
+ { __itt_group_control, "control" }, \
+ { __itt_group_thread, "thread" }, \
+ { __itt_group_mark, "mark" }, \
+ { __itt_group_sync, "sync" }, \
+ { __itt_group_fsync, "fsync" }, \
+ { __itt_group_jit, "jit" }, \
+ { __itt_group_model, "model" }, \
+ { __itt_group_counter, "counter" }, \
+ { __itt_group_frame, "frame" }, \
+ { __itt_group_stitch, "stitch" }, \
+ { __itt_group_heap, "heap" }, \
+ { __itt_group_structure, "structure" }, \
+ { __itt_group_suppress, "suppress" }, \
+ { __itt_group_none, NULL } \
+ }
+
+#endif /* _ITTNOTIFY_TYPES_H_ */
Added: llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c (added)
+++ llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c Tue Oct 2 09:15:33 2012
@@ -0,0 +1,476 @@
+/*===-- jitprofiling.c - JIT (Just-In-Time) Profiling API----------*- C -*-===*
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time)
+ * Profiling API implementation.
+ *
+ *===----------------------------------------------------------------------===*/
+#include "ittnotify_config.h"
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#include <windows.h>
+#pragma optimize("", off)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <pthread.h>
+#include <dlfcn.h>
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <malloc.h>
+#include <stdlib.h>
+
+#include "jitprofiling.h"
+
+static const char rcsid[] = "\n@(#) $Revision: 243501 $\n";
+
+#define DLL_ENVIRONMENT_VAR "VS_PROFILER"
+
+#ifndef NEW_DLL_ENVIRONMENT_VAR
+#if ITT_ARCH==ITT_ARCH_IA32
+#define NEW_DLL_ENVIRONMENT_VAR "INTEL_JIT_PROFILER32"
+#else
+#define NEW_DLL_ENVIRONMENT_VAR "INTEL_JIT_PROFILER64"
+#endif
+#endif /* NEW_DLL_ENVIRONMENT_VAR */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define DEFAULT_DLLNAME "JitPI.dll"
+HINSTANCE m_libHandle = NULL;
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define DEFAULT_DLLNAME "libJitPI.so"
+void* m_libHandle = NULL;
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/* default location of JIT profiling agent on Android */
+#define ANDROID_JIT_AGENT_PATH "/data/intel/libittnotify.so"
+
+/* the function pointers */
+typedef unsigned int(*TPInitialize)(void);
+static TPInitialize FUNC_Initialize=NULL;
+
+typedef unsigned int(*TPNotify)(unsigned int, void*);
+static TPNotify FUNC_NotifyEvent=NULL;
+
+static iJIT_IsProfilingActiveFlags executionMode = iJIT_NOTHING_RUNNING;
+
+/* end collector dll part. */
+
+/* loadiJIT_Funcs() : this function is called just in the beginning
+ * and is responsible to load the functions from BistroJavaCollector.dll
+ * result:
+ * on success: the functions loads, iJIT_DLL_is_missing=0, return value = 1
+ * on failure: the functions are NULL, iJIT_DLL_is_missing=1, return value = 0
+ */
+static int loadiJIT_Funcs(void);
+
+/* global representing whether the BistroJavaCollector can't be loaded */
+static int iJIT_DLL_is_missing = 0;
+
+/* Virtual stack - the struct is used as a virtual stack for each thread.
+ * Every thread initializes with a stack of size INIT_TOP_STACK.
+ * Every method entry decreases from the current stack point,
+ * and when a thread stack reaches its top of stack (return from the global
+ * function), the top of stack and the current stack increase. Notice that
+ * when returning from a function the stack pointer is the address of
+ * the function return.
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+static DWORD threadLocalStorageHandle = 0;
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+static pthread_key_t threadLocalStorageHandle = (pthread_key_t)0;
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#define INIT_TOP_Stack 10000
+
+typedef struct
+{
+ unsigned int TopStack;
+ unsigned int CurrentStack;
+} ThreadStack, *pThreadStack;
+
+/* end of virtual stack. */
+
+/*
+ * The function for reporting virtual-machine related events to VTune.
+ * Note: when reporting iJVM_EVENT_TYPE_ENTER_NIDS, there is no need to fill
+ * in the stack_id field in the iJIT_Method_NIDS structure, as VTune fills it.
+ * The return value in iJVM_EVENT_TYPE_ENTER_NIDS &&
+ * iJVM_EVENT_TYPE_LEAVE_NIDS events will be 0 in case of failure.
+ * in iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED event
+ * it will be -1 if EventSpecificData == 0 otherwise it will be 0.
+*/
+
+ITT_EXTERN_C int JITAPI
+iJIT_NotifyEvent(iJIT_JVM_EVENT event_type, void *EventSpecificData)
+{
+ int ReturnValue;
+
+ /*
+ * This section is for debugging outside of VTune.
+ * It creates the environment variables that indicates call graph mode.
+ * If running outside of VTune remove the remark.
+ *
+ *
+ * static int firstTime = 1;
+ * char DoCallGraph[12] = "DoCallGraph";
+ * if (firstTime)
+ * {
+ * firstTime = 0;
+ * SetEnvironmentVariable( "BISTRO_COLLECTORS_DO_CALLGRAPH", DoCallGraph);
+ * }
+ *
+ * end of section.
+ */
+
+ /* initialization part - the functions have not been loaded yet. This part
+ * will load the functions, and check if we are in Call Graph mode.
+ * (for special treatment).
+ */
+ if (!FUNC_NotifyEvent)
+ {
+ if (iJIT_DLL_is_missing)
+ return 0;
+
+ /* load the Function from the DLL */
+ if (!loadiJIT_Funcs())
+ return 0;
+
+ /* Call Graph initialization. */
+ }
+
+ /* If the event is method entry/exit, check that in the current mode
+ * VTune is allowed to receive it
+ */
+ if ((event_type == iJVM_EVENT_TYPE_ENTER_NIDS ||
+ event_type == iJVM_EVENT_TYPE_LEAVE_NIDS) &&
+ (executionMode != iJIT_CALLGRAPH_ON))
+ {
+ return 0;
+ }
+ /* This section is performed when method enter event occurs.
+ * It updates the virtual stack, or creates it if this is the first
+ * method entry in the thread. The stack pointer is decreased.
+ */
+ if (event_type == iJVM_EVENT_TYPE_ENTER_NIDS)
+ {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ pThreadStack threadStack =
+ (pThreadStack)TlsGetValue (threadLocalStorageHandle);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ pThreadStack threadStack =
+ (pThreadStack)pthread_getspecific(threadLocalStorageHandle);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+ /* check for use of reserved method IDs */
+ if ( ((piJIT_Method_NIDS) EventSpecificData)->method_id <= 999 )
+ return 0;
+
+ if (!threadStack)
+ {
+ /* initialize the stack. */
+ threadStack = (pThreadStack) calloc (sizeof(ThreadStack), 1);
+ threadStack->TopStack = INIT_TOP_Stack;
+ threadStack->CurrentStack = INIT_TOP_Stack;
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ TlsSetValue(threadLocalStorageHandle,(void*)threadStack);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ pthread_setspecific(threadLocalStorageHandle,(void*)threadStack);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ }
+
+ /* decrease the stack. */
+ ((piJIT_Method_NIDS) EventSpecificData)->stack_id =
+ (threadStack->CurrentStack)--;
+ }
+
+ /* This section is performed when method leave event occurs
+ * It updates the virtual stack.
+ * Increases the stack pointer.
+ * If the stack pointer reached the top (left the global function)
+ * increase the pointer and the top pointer.
+ */
+ if (event_type == iJVM_EVENT_TYPE_LEAVE_NIDS)
+ {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ pThreadStack threadStack =
+ (pThreadStack)TlsGetValue (threadLocalStorageHandle);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ pThreadStack threadStack =
+ (pThreadStack)pthread_getspecific(threadLocalStorageHandle);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+ /* check for use of reserved method IDs */
+ if ( ((piJIT_Method_NIDS) EventSpecificData)->method_id <= 999 )
+ return 0;
+
+ if (!threadStack)
+ {
+ /* Error: first report in this thread is method exit */
+ exit (1);
+ }
+
+ ((piJIT_Method_NIDS) EventSpecificData)->stack_id =
+ ++(threadStack->CurrentStack) + 1;
+
+ if (((piJIT_Method_NIDS) EventSpecificData)->stack_id
+ > threadStack->TopStack)
+ ((piJIT_Method_NIDS) EventSpecificData)->stack_id =
+ (unsigned int)-1;
+ }
+
+ if (event_type == iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED)
+ {
+ /* check for use of reserved method IDs */
+ if ( ((piJIT_Method_Load) EventSpecificData)->method_id <= 999 )
+ return 0;
+ }
+
+ ReturnValue = (int)FUNC_NotifyEvent(event_type, EventSpecificData);
+
+ return ReturnValue;
+}
+
+/* The new mode call back routine */
+ITT_EXTERN_C void JITAPI
+iJIT_RegisterCallbackEx(void *userdata, iJIT_ModeChangedEx
+ NewModeCallBackFuncEx)
+{
+ /* is it already missing... or the load of functions from the DLL failed */
+ if (iJIT_DLL_is_missing || !loadiJIT_Funcs())
+ {
+ /* then do not bother with notifications */
+ NewModeCallBackFuncEx(userdata, iJIT_NO_NOTIFICATIONS);
+ /* Error: could not load JIT functions. */
+ return;
+ }
+ /* nothing to do with the callback */
+}
+
+/*
+ * This function allows the user to query in which mode, if at all,
+ *VTune is running
+ */
+ITT_EXTERN_C iJIT_IsProfilingActiveFlags JITAPI iJIT_IsProfilingActive()
+{
+ if (!iJIT_DLL_is_missing)
+ {
+ loadiJIT_Funcs();
+ }
+
+ return executionMode;
+}
+
+/* this function loads the collector dll (BistroJavaCollector)
+ * and the relevant functions.
+ * on success: all functions load, iJIT_DLL_is_missing = 0, return value = 1
+ * on failure: all functions are NULL, iJIT_DLL_is_missing = 1, return value = 0
+ */
+static int loadiJIT_Funcs()
+{
+ static int bDllWasLoaded = 0;
+ char *dllName = (char*)rcsid; /* !! Just to avoid unused code elimination */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ DWORD dNameLength = 0;
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+ if(bDllWasLoaded)
+ {
+ /* dll was already loaded, no need to do it for the second time */
+ return 1;
+ }
+
+ /* Assumes that the DLL will not be found */
+ iJIT_DLL_is_missing = 1;
+ FUNC_NotifyEvent = NULL;
+
+ if (m_libHandle)
+ {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ FreeLibrary(m_libHandle);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ dlclose(m_libHandle);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ m_libHandle = NULL;
+ }
+
+ /* Try to get the dll name from the environment */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ dNameLength = GetEnvironmentVariableA(NEW_DLL_ENVIRONMENT_VAR, NULL, 0);
+ if (dNameLength)
+ {
+ DWORD envret = 0;
+ dllName = (char*)malloc(sizeof(char) * (dNameLength + 1));
+ envret = GetEnvironmentVariableA(NEW_DLL_ENVIRONMENT_VAR,
+ dllName, dNameLength);
+ if (envret)
+ {
+ /* Try to load the dll from the PATH... */
+ m_libHandle = LoadLibraryExA(dllName,
+ NULL, LOAD_WITH_ALTERED_SEARCH_PATH);
+ }
+ free(dllName);
+ } else {
+ /* Try to use old VS_PROFILER variable */
+ dNameLength = GetEnvironmentVariableA(DLL_ENVIRONMENT_VAR, NULL, 0);
+ if (dNameLength)
+ {
+ DWORD envret = 0;
+ dllName = (char*)malloc(sizeof(char) * (dNameLength + 1));
+ envret = GetEnvironmentVariableA(DLL_ENVIRONMENT_VAR,
+ dllName, dNameLength);
+ if (envret)
+ {
+ /* Try to load the dll from the PATH... */
+ m_libHandle = LoadLibraryA(dllName);
+ }
+ free(dllName);
+ }
+ }
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ dllName = getenv(NEW_DLL_ENVIRONMENT_VAR);
+ if (!dllName)
+ dllName = getenv(DLL_ENVIRONMENT_VAR);
+#ifdef ANDROID
+ if (!dllName)
+ dllName = ANDROID_JIT_AGENT_PATH;
+#endif
+ if (dllName)
+ {
+ /* Try to load the dll from the PATH... */
+ m_libHandle = dlopen(dllName, RTLD_LAZY);
+ }
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+ if (!m_libHandle)
+ {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ m_libHandle = LoadLibraryA(DEFAULT_DLLNAME);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ m_libHandle = dlopen(DEFAULT_DLLNAME, RTLD_LAZY);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ }
+
+ /* if the dll wasn't loaded - exit. */
+ if (!m_libHandle)
+ {
+ iJIT_DLL_is_missing = 1; /* don't try to initialize
+ * JIT agent the second time
+ */
+ return 0;
+ }
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ FUNC_NotifyEvent = (TPNotify)GetProcAddress(m_libHandle, "NotifyEvent");
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ FUNC_NotifyEvent = (TPNotify)dlsym(m_libHandle, "NotifyEvent");
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ if (!FUNC_NotifyEvent)
+ {
+ FUNC_Initialize = NULL;
+ return 0;
+ }
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ FUNC_Initialize = (TPInitialize)GetProcAddress(m_libHandle, "Initialize");
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ FUNC_Initialize = (TPInitialize)dlsym(m_libHandle, "Initialize");
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ if (!FUNC_Initialize)
+ {
+ FUNC_NotifyEvent = NULL;
+ return 0;
+ }
+
+ executionMode = (iJIT_IsProfilingActiveFlags)FUNC_Initialize();
+
+ bDllWasLoaded = 1;
+ iJIT_DLL_is_missing = 0; /* DLL is ok. */
+
+ /*
+ * Call Graph mode: init the thread local storage
+ * (need to store the virtual stack there).
+ */
+ if ( executionMode == iJIT_CALLGRAPH_ON )
+ {
+ /* Allocate a thread local storage slot for the thread "stack" */
+ if (!threadLocalStorageHandle)
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ threadLocalStorageHandle = TlsAlloc();
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ pthread_key_create(&threadLocalStorageHandle, NULL);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ }
+
+ return 1;
+}
+
+/*
+ * This function should be called by the user whenever a thread ends,
+ * to free the thread "virtual stack" storage
+ */
+ITT_EXTERN_C void JITAPI FinalizeThread()
+{
+ if (threadLocalStorageHandle)
+ {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ pThreadStack threadStack =
+ (pThreadStack)TlsGetValue (threadLocalStorageHandle);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ pThreadStack threadStack =
+ (pThreadStack)pthread_getspecific(threadLocalStorageHandle);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ if (threadStack)
+ {
+ free (threadStack);
+ threadStack = NULL;
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ TlsSetValue (threadLocalStorageHandle, threadStack);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ pthread_setspecific(threadLocalStorageHandle, threadStack);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ }
+ }
+}
+
+/*
+ * This function should be called by the user when the process ends,
+ * to free the local storage index
+*/
+ITT_EXTERN_C void JITAPI FinalizeProcess()
+{
+ if (m_libHandle)
+ {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ FreeLibrary(m_libHandle);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ dlclose(m_libHandle);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ m_libHandle = NULL;
+ }
+
+ if (threadLocalStorageHandle)
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ TlsFree (threadLocalStorageHandle);
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ pthread_key_delete(threadLocalStorageHandle);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+}
+
+/*
+ * This function should be called by the user for any method once.
+ * The function will return a unique method ID, the user should maintain
+ * the ID for each method
+ */
+ITT_EXTERN_C unsigned int JITAPI iJIT_GetNewMethodID()
+{
+ static unsigned int methodID = 0x100000;
+
+ if (methodID == 0)
+ return 0; /* ERROR : this is not a valid value */
+
+ return methodID++;
+}
Added: llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h (added)
+++ llvm/branches/R600/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h Tue Oct 2 09:15:33 2012
@@ -0,0 +1,254 @@
+/*===-- jitprofiling.h - JIT Profiling API-------------------------*- C -*-===*
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time)
+ * Profiling API declaration.
+ *
+ *===----------------------------------------------------------------------===*/
+#ifndef __JITPROFILING_H__
+#define __JITPROFILING_H__
+
+/*
+ * Various constants used by functions
+ */
+
+/* event notification */
+typedef enum iJIT_jvm_event
+{
+
+ /* shutdown */
+
+ /*
+ * Program exiting EventSpecificData NA
+ */
+ iJVM_EVENT_TYPE_SHUTDOWN = 2,
+
+ /* JIT profiling */
+
+ /*
+ * issued after method code jitted into memory but before code is executed
+ * EventSpecificData is an iJIT_Method_Load
+ */
+ iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED=13,
+
+ /* issued before unload. Method code will no longer be executed, but code
+ * and info are still in memory. The VTune profiler may capture method
+ * code only at this point EventSpecificData is iJIT_Method_Id
+ */
+ iJVM_EVENT_TYPE_METHOD_UNLOAD_START,
+
+ /* Method Profiling */
+
+ /* method name, Id and stack is supplied
+ * issued when a method is about to be entered EventSpecificData is
+ * iJIT_Method_NIDS
+ */
+ iJVM_EVENT_TYPE_ENTER_NIDS = 19,
+
+ /* method name, Id and stack is supplied
+ * issued when a method is about to be left EventSpecificData is
+ * iJIT_Method_NIDS
+ */
+ iJVM_EVENT_TYPE_LEAVE_NIDS
+} iJIT_JVM_EVENT;
+
+typedef enum _iJIT_ModeFlags
+{
+ /* No need to Notify VTune, since VTune is not running */
+ iJIT_NO_NOTIFICATIONS = 0x0000,
+
+ /* when turned on the jit must call
+ * iJIT_NotifyEvent
+ * (
+ * iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
+ * )
+ * for all the method already jitted
+ */
+ iJIT_BE_NOTIFY_ON_LOAD = 0x0001,
+
+ /* when turned on the jit must call
+ * iJIT_NotifyEvent
+ * (
+ * iJVM_EVENT_TYPE_METHOD_UNLOAD_FINISHED,
+ * ) for all the method that are unloaded
+ */
+ iJIT_BE_NOTIFY_ON_UNLOAD = 0x0002,
+
+ /* when turned on the jit must instrument all
+ * the currently jited code with calls on
+ * method entries
+ */
+ iJIT_BE_NOTIFY_ON_METHOD_ENTRY = 0x0004,
+
+ /* when turned on the jit must instrument all
+ * the currently jited code with calls
+ * on method exit
+ */
+ iJIT_BE_NOTIFY_ON_METHOD_EXIT = 0x0008
+
+} iJIT_ModeFlags;
+
+
+ /* Flags used by iJIT_IsProfilingActive() */
+typedef enum _iJIT_IsProfilingActiveFlags
+{
+ /* No profiler is running. Currently not used */
+ iJIT_NOTHING_RUNNING = 0x0000,
+
+ /* Sampling is running. This is the default value
+ * returned by iJIT_IsProfilingActive()
+ */
+ iJIT_SAMPLING_ON = 0x0001,
+
+ /* Call Graph is running */
+ iJIT_CALLGRAPH_ON = 0x0002
+
+} iJIT_IsProfilingActiveFlags;
+
+/* Enumerator for the environment of methods*/
+typedef enum _iJDEnvironmentType
+{
+ iJDE_JittingAPI = 2
+} iJDEnvironmentType;
+
+/**********************************
+ * Data structures for the events *
+ **********************************/
+
+/* structure for the events:
+ * iJVM_EVENT_TYPE_METHOD_UNLOAD_START
+ */
+
+typedef struct _iJIT_Method_Id
+{
+ /* Id of the method (same as the one passed in
+ * the iJIT_Method_Load struct
+ */
+ unsigned int method_id;
+
+} *piJIT_Method_Id, iJIT_Method_Id;
+
+
+/* structure for the events:
+ * iJVM_EVENT_TYPE_ENTER_NIDS,
+ * iJVM_EVENT_TYPE_LEAVE_NIDS,
+ * iJVM_EVENT_TYPE_EXCEPTION_OCCURRED_NIDS
+ */
+
+typedef struct _iJIT_Method_NIDS
+{
+ /* unique method ID */
+ unsigned int method_id;
+
+ /* NOTE: no need to fill this field, it's filled by VTune */
+ unsigned int stack_id;
+
+ /* method name (just the method, without the class) */
+ char* method_name;
+} *piJIT_Method_NIDS, iJIT_Method_NIDS;
+
+/* structures for the events:
+ * iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED
+ */
+
+typedef struct _LineNumberInfo
+{
+ /* x86 Offset from the begining of the method*/
+ unsigned int Offset;
+
+ /* source line number from the begining of the source file */
+ unsigned int LineNumber;
+
+} *pLineNumberInfo, LineNumberInfo;
+
+typedef struct _iJIT_Method_Load
+{
+ /* unique method ID - can be any unique value, (except 0 - 999) */
+ unsigned int method_id;
+
+ /* method name (can be with or without the class and signature, in any case
+ * the class name will be added to it)
+ */
+ char* method_name;
+
+ /* virtual address of that method - This determines the method range for the
+ * iJVM_EVENT_TYPE_ENTER/LEAVE_METHOD_ADDR events
+ */
+ void* method_load_address;
+
+ /* Size in memory - Must be exact */
+ unsigned int method_size;
+
+ /* Line Table size in number of entries - Zero if none */
+ unsigned int line_number_size;
+
+ /* Pointer to the begining of the line numbers info array */
+ pLineNumberInfo line_number_table;
+
+ /* unique class ID */
+ unsigned int class_id;
+
+ /* class file name */
+ char* class_file_name;
+
+ /* source file name */
+ char* source_file_name;
+
+ /* bits supplied by the user for saving in the JIT file */
+ void* user_data;
+
+ /* the size of the user data buffer */
+ unsigned int user_data_size;
+
+ /* NOTE: no need to fill this field, it's filled by VTune */
+ iJDEnvironmentType env;
+
+} *piJIT_Method_Load, iJIT_Method_Load;
+
+/* API Functions */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef CDECL
+# if defined WIN32 || defined _WIN32
+# define CDECL __cdecl
+# else /* defined WIN32 || defined _WIN32 */
+# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
+# define CDECL /* not actual on x86_64 platform */
+# else /* _M_X64 || _M_AMD64 || __x86_64__ */
+# define CDECL __attribute__ ((cdecl))
+# endif /* _M_X64 || _M_AMD64 || __x86_64__ */
+# endif /* defined WIN32 || defined _WIN32 */
+#endif /* CDECL */
+
+#define JITAPI CDECL
+
+/* called when the settings are changed with new settings */
+typedef void (*iJIT_ModeChangedEx)(void *UserData, iJIT_ModeFlags Flags);
+
+int JITAPI iJIT_NotifyEvent(iJIT_JVM_EVENT event_type, void *EventSpecificData);
+
+/* The new mode call back routine */
+void JITAPI iJIT_RegisterCallbackEx(void *userdata,
+ iJIT_ModeChangedEx NewModeCallBackFuncEx);
+
+iJIT_IsProfilingActiveFlags JITAPI iJIT_IsProfilingActive(void);
+
+void JITAPI FinalizeThread(void);
+
+void JITAPI FinalizeProcess(void);
+
+unsigned int JITAPI iJIT_GetNewMethodID(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __JITPROFILING_H__ */
Modified: llvm/branches/R600/lib/MC/MCExpr.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/MC/MCExpr.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/MC/MCExpr.cpp (original)
+++ llvm/branches/R600/lib/MC/MCExpr.cpp Tue Oct 2 09:15:33 2012
@@ -267,7 +267,7 @@
/* *** */
-void MCTargetExpr::Anchor() {}
+void MCTargetExpr::anchor() {}
/* *** */
Modified: llvm/branches/R600/lib/MC/MCObjectStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/MC/MCObjectStreamer.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/MC/MCObjectStreamer.cpp (original)
+++ llvm/branches/R600/lib/MC/MCObjectStreamer.cpp Tue Oct 2 09:15:33 2012
@@ -270,6 +270,14 @@
DF->getContents().resize(DF->getContents().size() + 8, 0);
}
+void MCObjectStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
+ unsigned AddrSpace) {
+ assert(AddrSpace == 0 && "Address space must be 0!");
+ // FIXME: A MCFillFragment would be more memory efficient but MCExpr has
+ // problems evaluating expressions across multiple fragments.
+ getOrCreateDataFragment()->getContents().append(NumBytes, FillValue);
+}
+
void MCObjectStreamer::FinishImpl() {
// Dump out the dwarf file & directory tables and line tables.
const MCSymbol *LineSectionSymbol = NULL;
Modified: llvm/branches/R600/lib/Support/APFloat.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Support/APFloat.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Support/APFloat.cpp (original)
+++ llvm/branches/R600/lib/Support/APFloat.cpp Tue Oct 2 09:15:33 2012
@@ -1775,7 +1775,7 @@
// If the exponent is large enough, we know that this value is already
// integral, and the arithmetic below would potentially cause it to saturate
// to +/-Inf. Bail out early instead.
- if (exponent+1 >= (int)semanticsPrecision(*semantics))
+ if (category == fcNormal && exponent+1 >= (int)semanticsPrecision(*semantics))
return opOK;
// The algorithm here is quite simple: we add 2^(p-1), where p is the
Modified: llvm/branches/R600/lib/Support/Errno.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Support/Errno.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Support/Errno.cpp (original)
+++ llvm/branches/R600/lib/Support/Errno.cpp Tue Oct 2 09:15:33 2012
@@ -13,6 +13,7 @@
#include "llvm/Support/Errno.h"
#include "llvm/Config/config.h" // Get autoconf configuration settings
+#include "llvm/Support/raw_ostream.h"
#if HAVE_STRING_H
#include <string.h>
@@ -39,7 +40,7 @@
const int MaxErrStrLen = 2000;
char buffer[MaxErrStrLen];
buffer[0] = '\0';
- char* str = buffer;
+ std::string str;
#ifdef HAVE_STRERROR_R
// strerror_r is thread-safe.
if (errnum)
@@ -49,6 +50,7 @@
str = strerror_r(errnum,buffer,MaxErrStrLen-1);
# else
strerror_r(errnum,buffer,MaxErrStrLen-1);
+ str = buffer;
# endif
#elif HAVE_DECL_STRERROR_S // "Windows Secure API"
if (errnum)
@@ -58,12 +60,13 @@
// the buffer as fast as possible to minimize impact
// of collision of strerror in multiple threads.
if (errnum)
- strncpy(buffer,strerror(errnum),MaxErrStrLen-1);
- buffer[MaxErrStrLen-1] = '\0';
+ str = strerror(errnum);
#else
// Strange that this system doesn't even have strerror
// but, oh well, just use a generic message
- sprintf(buffer, "Error #%d", errnum);
+ raw_string_ostream stream(str);
+ stream << "Error #" << errnum;
+ stream.flush();
#endif
return str;
}
Modified: llvm/branches/R600/lib/Support/Host.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Support/Host.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Support/Host.cpp (original)
+++ llvm/branches/R600/lib/Support/Host.cpp Tue Oct 2 09:15:33 2012
@@ -234,6 +234,8 @@
case 37: // Intel Core i7, laptop version.
case 44: // Intel Core i7 processor and Intel Xeon processor. All
// processors are manufactured using the 32 nm process.
+ case 46: // Nehalem EX
+ case 47: // Westmere EX
return "corei7";
// SandyBridge:
Modified: llvm/branches/R600/lib/Support/Unix/Path.inc
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Support/Unix/Path.inc?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Support/Unix/Path.inc (original)
+++ llvm/branches/R600/lib/Support/Unix/Path.inc Tue Oct 2 09:15:33 2012
@@ -261,7 +261,8 @@
}
#if defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \
- defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__)
+ defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) || \
+ defined(__linux__) || defined(__CYGWIN__)
static int
test_dir(char buf[PATH_MAX], char ret[PATH_MAX],
const char *dir, const char *bin)
@@ -337,9 +338,17 @@
return Path(exe_path);
#elif defined(__linux__) || defined(__CYGWIN__)
char exe_path[MAXPATHLEN];
- ssize_t len = readlink("/proc/self/exe", exe_path, sizeof(exe_path));
- if (len >= 0)
- return Path(StringRef(exe_path, len));
+ StringRef aPath("/proc/self/exe");
+ if (sys::fs::exists(aPath)) {
+ // /proc is not always mounted under Linux (chroot for example).
+ ssize_t len = readlink(aPath.str().c_str(), exe_path, sizeof(exe_path));
+ if (len >= 0)
+ return Path(StringRef(exe_path, len));
+ } else {
+ // Fall back to the classical detection.
+ if (getprogpath(exe_path, argv0) != NULL)
+ return Path(exe_path);
+ }
#elif defined(HAVE_DLFCN_H)
// Use dladdr to get executable path if available.
Dl_info DLInfo;
Modified: llvm/branches/R600/lib/Support/Unix/Signals.inc
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Support/Unix/Signals.inc?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Support/Unix/Signals.inc (original)
+++ llvm/branches/R600/lib/Support/Unix/Signals.inc Tue Oct 2 09:15:33 2012
@@ -243,7 +243,7 @@
// On glibc systems we have the 'backtrace' function, which works nicely, but
// doesn't demangle symbols.
static void PrintStackTrace(void *) {
-#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACE)
+#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
static void* StackTrace[256];
// Use backtrace() to output a backtrace on Linux systems with glibc.
int depth = backtrace(StackTrace,
Modified: llvm/branches/R600/lib/Support/YAMLParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Support/YAMLParser.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Support/YAMLParser.cpp (original)
+++ llvm/branches/R600/lib/Support/YAMLParser.cpp Tue Oct 2 09:15:33 2012
@@ -903,6 +903,7 @@
void Scanner::skip(uint32_t Distance) {
Current += Distance;
Column += Distance;
+ assert(Current <= End && "Skipped past the end");
}
bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
@@ -1239,6 +1240,12 @@
}
}
}
+
+ if (Current == End) {
+ setError("Expected quote at end of scalar", Current);
+ return false;
+ }
+
skip(1); // Skip ending quote.
Token T;
T.Kind = Token::TK_Scalar;
Modified: llvm/branches/R600/lib/Target/ARM/ARM.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARM.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARM.h (original)
+++ llvm/branches/R600/lib/Target/ARM/ARM.h Tue Oct 2 09:15:33 2012
@@ -37,6 +37,7 @@
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
+FunctionPass *createARMGlobalBaseRegPass();
FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
FunctionPass *createARMConstantIslandPass();
FunctionPass *createMLxExpansionPass();
Modified: llvm/branches/R600/lib/Target/ARM/ARM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARM.td?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARM.td (original)
+++ llvm/branches/R600/lib/Target/ARM/ARM.td Tue Oct 2 09:15:33 2012
@@ -32,9 +32,6 @@
def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
"Enable VFP3 instructions",
[FeatureVFP2]>;
-def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
- "Enable VFP4 instructions",
- [FeatureVFP3]>;
def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
"Enable NEON instructions",
[FeatureVFP3]>;
@@ -44,10 +41,16 @@
"Does not support ARM mode execution">;
def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
"Enable half-precision floating point">;
+def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
+ "Enable VFP4 instructions",
+ [FeatureVFP3, FeatureFP16]>;
def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
"Restrict VFP3 to 16 double registers">;
def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
"Enable divide instructions">;
+def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm",
+ "HasHardwareDivideInARM", "true",
+ "Enable divide instructions in ARM mode">;
def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
"Enable Thumb2 extract and pack instructions">;
def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
@@ -139,6 +142,13 @@
[FeatureVMLxForwarding,
FeatureT2XtPk, FeatureFP16,
FeatureAvoidPartialCPSR]>;
+def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
+ "Swift ARM processors",
+ [FeatureNEONForFP, FeatureT2XtPk,
+ FeatureVFP4, FeatureMP, FeatureHWDiv,
+ FeatureHWDivARM, FeatureAvoidPartialCPSR,
+ FeatureHasSlowFPVMLx]>;
+
// FIXME: It has not been determined if A15 has these features.
def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
"Cortex-A15 ARM processors",
@@ -236,6 +246,12 @@
FeatureT2XtPk, FeatureVFP4,
FeatureVFPOnlySP, FeatureMClass]>;
+// Swift uArch Processors.
+def : ProcessorModel<"swift", SwiftModel,
+ [ProcSwift, HasV7Ops, FeatureNEON,
+ FeatureDB, FeatureDSPThumb2,
+ FeatureHasRAS]>;
+
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
Modified: llvm/branches/R600/lib/Target/ARM/ARMBaseInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMBaseInstrInfo.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMBaseInstrInfo.cpp (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMBaseInstrInfo.cpp Tue Oct 2 09:15:33 2012
@@ -49,6 +49,11 @@
WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true),
cl::desc("Widen ARM vmovs to vmovd when possible"));
+static cl::opt<unsigned>
+SwiftPartialUpdateClearance("swift-partial-update-clearance",
+ cl::Hidden, cl::init(12),
+ cl::desc("Clearance before partial register updates"));
+
/// ARM_MLxEntry - Record information about MLA / MLS instructions.
struct ARM_MLxEntry {
uint16_t MLxOpc; // MLA / MLS opcode
@@ -1389,7 +1394,6 @@
case ARM::VLDRD:
case ARM::VLDRS:
case ARM::t2LDRi8:
- case ARM::t2LDRDi8:
case ARM::t2LDRSHi8:
case ARM::t2LDRi12:
case ARM::t2LDRSHi12:
@@ -1528,6 +1532,14 @@
return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
}
+bool
+ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const {
+ // Reduce false anti-dependencies to let Swift's out-of-order execution
+ // engine do its thing.
+ return Subtarget.isSwift();
+}
+
/// getInstrPredicate - If instruction is predicated, returns its predicate
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
@@ -2342,6 +2354,229 @@
return true;
}
+static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: {
+ const MCInstrDesc &Desc = MI->getDesc();
+ int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
+ assert(UOps >= 0 && "bad # UOps");
+ return UOps;
+ }
+
+ case ARM::LDRrs:
+ case ARM::LDRBrs:
+ case ARM::STRrs:
+ case ARM::STRBrs: {
+ unsigned ShOpVal = MI->getOperand(3).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 1;
+ return 2;
+ }
+
+ case ARM::LDRH:
+ case ARM::STRH: {
+ if (!MI->getOperand(2).getReg())
+ return 1;
+
+ unsigned ShOpVal = MI->getOperand(3).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 1;
+ return 2;
+ }
+
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2;
+
+ case ARM::LDRSB_POST:
+ case ARM::LDRSH_POST: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ return (Rt == Rm) ? 4 : 3;
+ }
+
+ case ARM::LDR_PRE_REG:
+ case ARM::LDRB_PRE_REG: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rt == Rm)
+ return 3;
+ unsigned ShOpVal = MI->getOperand(4).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 2;
+ return 3;
+ }
+
+ case ARM::STR_PRE_REG:
+ case ARM::STRB_PRE_REG: {
+ unsigned ShOpVal = MI->getOperand(4).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 2;
+ return 3;
+ }
+
+ case ARM::LDRH_PRE:
+ case ARM::STRH_PRE: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (!Rm)
+ return 2;
+ if (Rt == Rm)
+ return 3;
+ return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub)
+ ? 3 : 2;
+ }
+
+ case ARM::LDR_POST_REG:
+ case ARM::LDRB_POST_REG:
+ case ARM::LDRH_POST: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ return (Rt == Rm) ? 3 : 2;
+ }
+
+ case ARM::LDR_PRE_IMM:
+ case ARM::LDRB_PRE_IMM:
+ case ARM::LDR_POST_IMM:
+ case ARM::LDRB_POST_IMM:
+ case ARM::STRB_POST_IMM:
+ case ARM::STRB_POST_REG:
+ case ARM::STRB_PRE_IMM:
+ case ARM::STRH_POST:
+ case ARM::STR_POST_IMM:
+ case ARM::STR_POST_REG:
+ case ARM::STR_PRE_IMM:
+ return 2;
+
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSH_PRE: {
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rm == 0)
+ return 3;
+ unsigned Rt = MI->getOperand(0).getReg();
+ if (Rt == Rm)
+ return 4;
+ unsigned ShOpVal = MI->getOperand(4).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ return 3;
+ return 4;
+ }
+
+ case ARM::LDRD: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(2).getReg();
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
+ return (Rt == Rn) ? 3 : 2;
+ }
+
+ case ARM::STRD: {
+ unsigned Rm = MI->getOperand(3).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
+ return 2;
+ }
+
+ case ARM::LDRD_POST:
+ case ARM::t2LDRD_POST:
+ return 3;
+
+ case ARM::STRD_POST:
+ case ARM::t2STRD_POST:
+ return 4;
+
+ case ARM::LDRD_PRE: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(3).getReg();
+ unsigned Rm = MI->getOperand(4).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
+ return (Rt == Rn) ? 4 : 3;
+ }
+
+ case ARM::t2LDRD_PRE: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(3).getReg();
+ return (Rt == Rn) ? 4 : 3;
+ }
+
+ case ARM::STRD_PRE: {
+ unsigned Rm = MI->getOperand(4).getReg();
+ if (Rm)
+ return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
+ return 3;
+ }
+
+ case ARM::t2STRD_PRE:
+ return 3;
+
+ case ARM::t2LDR_POST:
+ case ARM::t2LDRB_POST:
+ case ARM::t2LDRB_PRE:
+ case ARM::t2LDRSBi12:
+ case ARM::t2LDRSBi8:
+ case ARM::t2LDRSBpci:
+ case ARM::t2LDRSBs:
+ case ARM::t2LDRH_POST:
+ case ARM::t2LDRH_PRE:
+ case ARM::t2LDRSBT:
+ case ARM::t2LDRSB_POST:
+ case ARM::t2LDRSB_PRE:
+ case ARM::t2LDRSH_POST:
+ case ARM::t2LDRSH_PRE:
+ case ARM::t2LDRSHi12:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRSHpci:
+ case ARM::t2LDRSHs:
+ return 2;
+
+ case ARM::t2LDRDi8: {
+ unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rn = MI->getOperand(2).getReg();
+ return (Rt == Rn) ? 3 : 2;
+ }
+
+ case ARM::t2STRB_POST:
+ case ARM::t2STRB_PRE:
+ case ARM::t2STRBs:
+ case ARM::t2STRDi8:
+ case ARM::t2STRH_POST:
+ case ARM::t2STRH_PRE:
+ case ARM::t2STRHs:
+ case ARM::t2STR_POST:
+ case ARM::t2STR_PRE:
+ case ARM::t2STRs:
+ return 2;
+ }
+}
+
// Return the number of 32-bit words loaded by LDM or stored by STM. If this
// can't be easily determined return 0 (missing MachineMemOperand).
//
@@ -2382,8 +2617,12 @@
const MCInstrDesc &Desc = MI->getDesc();
unsigned Class = Desc.getSchedClass();
int ItinUOps = ItinData->getNumMicroOps(Class);
- if (ItinUOps >= 0)
+ if (ItinUOps >= 0) {
+ if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
+ return getNumMicroOpsSwiftLdSt(ItinData, MI);
+
return ItinUOps;
+ }
unsigned Opc = MI->getOpcode();
switch (Opc) {
@@ -2452,7 +2691,43 @@
case ARM::t2STMIA_UPD:
case ARM::t2STMDB_UPD: {
unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
- if (Subtarget.isCortexA8()) {
+ if (Subtarget.isSwift()) {
+ // rdar://8402126
+ int UOps = 1 + NumRegs; // One for address computation, one for each ld / st.
+ switch (Opc) {
+ default: break;
+ case ARM::VLDMDIA_UPD:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ case ARM::VSTMDIA_UPD:
+ case ARM::VSTMDDB_UPD:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD:
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::STMIA_UPD:
+ case ARM::STMDA_UPD:
+ case ARM::STMDB_UPD:
+ case ARM::STMIB_UPD:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA_UPD:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD:
+ ++UOps; // One for base register writeback.
+ break;
+ case ARM::LDMIA_RET:
+ case ARM::tPOP_RET:
+ case ARM::t2LDMIA_RET:
+ UOps += 2; // One for base reg wb, one for write to pc.
+ break;
+ }
+ return UOps;
+ } else if (Subtarget.isCortexA8()) {
if (NumRegs < 4)
return 2;
// 4 registers would be issued: 2, 2.
@@ -2461,7 +2736,7 @@
if (NumRegs % 2)
++A8UOps;
return A8UOps;
- } else if (Subtarget.isLikeA9()) {
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
int A9UOps = (NumRegs / 2);
// If there are odd number of registers or if it's not 64-bit aligned,
// then it takes an extra AGU (Address Generation Unit) cycle.
@@ -2494,7 +2769,7 @@
DefCycle = RegNo / 2 + 1;
if (RegNo % 2)
++DefCycle;
- } else if (Subtarget.isLikeA9()) {
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
DefCycle = RegNo;
bool isSLoad = false;
@@ -2538,7 +2813,7 @@
DefCycle = 1;
// Result latency is issue cycle + 2: E2.
DefCycle += 2;
- } else if (Subtarget.isLikeA9()) {
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
DefCycle = (RegNo / 2);
// If there are odd number of registers or if it's not 64-bit aligned,
// then it takes an extra AGU (Address Generation Unit) cycle.
@@ -2569,7 +2844,7 @@
UseCycle = RegNo / 2 + 1;
if (RegNo % 2)
++UseCycle;
- } else if (Subtarget.isLikeA9()) {
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
UseCycle = RegNo;
bool isSStore = false;
@@ -2610,7 +2885,7 @@
UseCycle = 2;
// Read in E3.
UseCycle += 2;
- } else if (Subtarget.isLikeA9()) {
+ } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
UseCycle = (RegNo / 2);
// If there are odd number of registers or if it's not 64-bit aligned,
// then it takes an extra AGU (Address Generation Unit) cycle.
@@ -2820,6 +3095,37 @@
break;
}
}
+ } else if (Subtarget.isSwift()) {
+ // FIXME: Properly handle all of the latency adjustments for address
+ // writeback.
+ switch (DefMCID->getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal = DefMI->getOperand(3).getImm();
+ bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (!isSub &&
+ (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
+ Adjust -= 2;
+ else if (!isSub &&
+ ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
+ --Adjust;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl only.
+ unsigned ShAmt = DefMI->getOperand(3).getImm();
+ if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
+ Adjust -= 2;
+ break;
+ }
+ }
}
if (DefAlign < 8 && Subtarget.isLikeA9()) {
@@ -2996,7 +3302,7 @@
// instructions).
if (Latency > 0 && Subtarget.isThumb2()) {
const MachineFunction *MF = DefMI->getParent()->getParent();
- if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ if (MF->getFunction()->getFnAttributes().hasOptimizeForSizeAttr())
--Latency;
}
return Latency;
@@ -3046,7 +3352,7 @@
if (!UseNode->isMachineOpcode()) {
int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
- if (Subtarget.isLikeA9())
+ if (Subtarget.isLikeA9() || Subtarget.isSwift())
return Latency <= 2 ? 1 : Latency - 1;
else
return Latency <= 3 ? 1 : Latency - 2;
@@ -3090,6 +3396,33 @@
break;
}
}
+ } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
+ // FIXME: Properly handle all of the latency adjustments for address
+ // writeback.
+ switch (DefMCID.getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal =
+ cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (ShImm == 0 ||
+ ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
+ ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+ Latency -= 2;
+ else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
+ --Latency;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl 0-3 only.
+ Latency -= 2;
+ break;
+ }
+ }
}
if (DefAlign < 8 && Subtarget.isLikeA9())
@@ -3658,6 +3991,122 @@
}
+//===----------------------------------------------------------------------===//
+// Partial register updates
+//===----------------------------------------------------------------------===//
+//
+// Swift renames NEON registers with 64-bit granularity. That means any
+// instruction writing an S-reg implicitly reads the containing D-reg. The
+// problem is mostly avoided by translating f32 operations to v2f32 operations
+// on D-registers, but f32 loads are still a problem.
+//
+// These instructions can load an f32 into a NEON register:
+//
+// VLDRS - Only writes S, partial D update.
+// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
+// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
+//
+// FCONSTD can be used as a dependency-breaking instruction.
+
+
+unsigned ARMBaseInstrInfo::
+getPartialRegUpdateClearance(const MachineInstr *MI,
+ unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ // Only Swift has partial register update problems.
+ if (!SwiftPartialUpdateClearance || !Subtarget.isSwift())
+ return 0;
+
+ assert(TRI && "Need TRI instance");
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (MO.readsReg())
+ return 0;
+ unsigned Reg = MO.getReg();
+ int UseOp = -1;
+
+ switch(MI->getOpcode()) {
+ // Normal instructions writing only an S-register.
+ case ARM::VLDRS:
+ case ARM::FCONSTS:
+ case ARM::VMOVSR:
+ // rdar://problem/8791586
+ case ARM::VMOVv8i8:
+ case ARM::VMOVv4i16:
+ case ARM::VMOVv2i32:
+ case ARM::VMOVv2f32:
+ case ARM::VMOVv1i64:
+ UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI);
+ break;
+
+ // Explicitly reads the dependency.
+ case ARM::VLD1LNd32:
+ UseOp = 1;
+ break;
+ default:
+ return 0;
+ }
+
+ // If this instruction actually reads a value from Reg, there is no unwanted
+ // dependency.
+ if (UseOp != -1 && MI->getOperand(UseOp).readsReg())
+ return 0;
+
+ // We must be able to clobber the whole D-reg.
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // Virtual register must be a foo:ssub_0<def,undef> operand.
+ if (!MO.getSubReg() || MI->readsVirtualRegister(Reg))
+ return 0;
+ } else if (ARM::SPRRegClass.contains(Reg)) {
+ // Physical register: MI must define the full D-reg.
+ unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
+ &ARM::DPRRegClass);
+ if (!DReg || !MI->definesRegister(DReg, TRI))
+ return 0;
+ }
+
+ // MI has an unwanted D-register dependency.
+ // Avoid defs in the previous N instructrions.
+ return SwiftPartialUpdateClearance;
+}
+
+// Break a partial register dependency after getPartialRegUpdateClearance
+// returned non-zero.
+void ARMBaseInstrInfo::
+breakPartialRegDependency(MachineBasicBlock::iterator MI,
+ unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def");
+ assert(TRI && "Need TRI instance");
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ unsigned Reg = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ "Can't break virtual register dependencies.");
+ unsigned DReg = Reg;
+
+ // If MI defines an S-reg, find the corresponding D super-register.
+ if (ARM::SPRRegClass.contains(Reg)) {
+ DReg = ARM::D0 + (Reg - ARM::S0) / 2;
+ assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
+ }
+
+ assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
+ assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
+
+ // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
+ // the full D-register by loading the same value to both lanes. The
+ // instruction is micro-coded with 2 uops, so don't do this until we can
+ // properly schedule micro-coded instuctions. The dispatcher stalls cause
+ // too big regressions.
+
+ // Insert the dependency-breaking FCONSTD before MI.
+ // 96 is the encoding of 0.5, but the actual value doesn't matter here.
+ AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ get(ARM::FCONSTD), DReg).addImm(96));
+ MI->addRegisterKilled(DReg, TRI, true);
+}
+
bool ARMBaseInstrInfo::hasNOP() const {
return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
}
Modified: llvm/branches/R600/lib/Target/ARM/ARMBaseInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMBaseInstrInfo.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMBaseInstrInfo.h (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMBaseInstrInfo.h Tue Oct 2 09:15:33 2012
@@ -182,10 +182,13 @@
virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
unsigned NumCycles,
const BranchProbability
- &Probability) const {
+ &Probability) const {
return NumCycles == 1;
}
+ virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const;
+
/// analyzeCompare - For a comparison instruction, return the source registers
/// in SrcReg and SrcReg2 if having two register operands, and the value it
/// compares against in CmpValue. Return true if the comparison instruction
@@ -235,6 +238,10 @@
getExecutionDomain(const MachineInstr *MI) const;
void setExecutionDomain(MachineInstr *MI, unsigned Domain) const;
+ unsigned getPartialRegUpdateClearance(const MachineInstr*, unsigned,
+ const TargetRegisterInfo*) const;
+ void breakPartialRegDependency(MachineBasicBlock::iterator, unsigned,
+ const TargetRegisterInfo *TRI) const;
/// Get the number of addresses by LDM or VLDM or zero for unknown.
unsigned getNumLDMAddresses(const MachineInstr *MI) const;
Modified: llvm/branches/R600/lib/Target/ARM/ARMBaseRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMBaseRegisterInfo.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMBaseRegisterInfo.cpp (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMBaseRegisterInfo.cpp Tue Oct 2 09:15:33 2012
@@ -562,7 +562,7 @@
const Function *F = MF.getFunction();
unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
- F->hasFnAttr(Attribute::StackAlignment));
+ F->getFnAttributes().hasStackAlignmentAttr());
return requiresRealignment && canRealignStack(MF);
}
Modified: llvm/branches/R600/lib/Target/ARM/ARMFastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMFastISel.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMFastISel.cpp (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMFastISel.cpp Tue Oct 2 09:15:33 2012
@@ -194,6 +194,7 @@
unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
unsigned ARMSelectCallOp(bool UseReg);
+ unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, EVT VT);
// Call handling routines.
private:
@@ -648,6 +649,9 @@
Align = TD.getTypeAllocSize(GV->getType());
}
+ if (Subtarget->isTargetELF() && RelocM == Reloc::PIC_)
+ return ARMLowerPICELF(GV, Align, VT);
+
// Grab index.
unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 :
(Subtarget->isThumb() ? 4 : 8);
@@ -2796,6 +2800,47 @@
return true;
}
+unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
+ unsigned Align, EVT VT) {
+ bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
+ ARMConstantPoolConstant *CPV =
+ ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
+ unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
+
+ unsigned Opc;
+ unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT));
+ // Load value.
+ if (isThumb2) {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::t2LDRpci), DestReg1)
+ .addConstantPoolIndex(Idx));
+ Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs;
+ } else {
+ // The extra immediate is for addrmode2.
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(ARM::LDRcp), DestReg1)
+ .addConstantPoolIndex(Idx).addImm(0));
+ Opc = UseGOTOFF ? ARM::ADDrr : ARM::LDRrs;
+ }
+
+ unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
+ if (GlobalBaseReg == 0) {
+ GlobalBaseReg = MRI.createVirtualRegister(TLI.getRegClassFor(VT));
+ AFI->setGlobalBaseReg(GlobalBaseReg);
+ }
+
+ unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT));
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(Opc), DestReg2)
+ .addReg(DestReg1)
+ .addReg(GlobalBaseReg);
+ if (!UseGOTOFF)
+ MIB.addImm(0);
+ AddOptionalDefs(MIB);
+
+ return DestReg2;
+}
+
namespace llvm {
FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) {
Modified: llvm/branches/R600/lib/Target/ARM/ARMFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMFrameLowering.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMFrameLowering.cpp (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMFrameLowering.cpp Tue Oct 2 09:15:33 2012
@@ -1151,7 +1151,7 @@
return;
// Naked functions don't spill callee-saved registers.
- if (MF.getFunction()->hasFnAttr(Attribute::Naked))
+ if (MF.getFunction()->getFnAttributes().hasNakedAttr())
return;
// We are planning to use NEON instructions vst1 / vld1.
Modified: llvm/branches/R600/lib/Target/ARM/ARMISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMISelDAGToDAG.cpp (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMISelDAGToDAG.cpp Tue Oct 2 09:15:33 2012
@@ -335,7 +335,9 @@
if (!CheckVMLxHazard)
return true;
- if (!Subtarget->isCortexA8() && !Subtarget->isLikeA9())
+
+ if (!Subtarget->isCortexA8() && !Subtarget->isLikeA9() &&
+ !Subtarget->isSwift())
return true;
if (!N->hasOneUse())
@@ -373,12 +375,13 @@
bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
ARM_AM::ShiftOpc ShOpcVal,
unsigned ShAmt) {
- if (!Subtarget->isLikeA9())
+ if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
return true;
if (Shift.hasOneUse())
return true;
// R << 2 is free.
- return ShOpcVal == ARM_AM::lsl && ShAmt == 2;
+ return ShOpcVal == ARM_AM::lsl &&
+ (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
}
bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
@@ -485,7 +488,7 @@
bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
SDValue &Opc) {
if (N.getOpcode() == ISD::MUL &&
- (!Subtarget->isLikeA9() || N.hasOneUse())) {
+ ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
// X * [3,5,9] -> X + X * [2,4,8] etc.
int RHSC = (int)RHS->getZExtValue();
@@ -549,7 +552,8 @@
// Try matching (R shl C) + (R).
if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
- !(Subtarget->isLikeA9() || N.getOperand(0).hasOneUse())) {
+ !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
+ N.getOperand(0).hasOneUse())) {
ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't
@@ -583,7 +587,7 @@
SDValue &Offset,
SDValue &Opc) {
if (N.getOpcode() == ISD::MUL &&
- (!Subtarget->isLikeA9() || N.hasOneUse())) {
+ (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
// X * [3,5,9] -> X + X * [2,4,8] etc.
int RHSC = (int)RHS->getZExtValue();
@@ -649,7 +653,7 @@
}
}
- if (Subtarget->isLikeA9() && !N.hasOneUse()) {
+ if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
// Compute R +/- (R << N) and reuse it.
Base = N;
Offset = CurDAG->getRegister(0, MVT::i32);
@@ -687,7 +691,8 @@
// Try matching (R shl C) + (R).
if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
- !(Subtarget->isLikeA9() || N.getOperand(0).hasOneUse())) {
+ !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
+ N.getOperand(0).hasOneUse())) {
ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't
Modified: llvm/branches/R600/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMISelLowering.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMISelLowering.cpp Tue Oct 2 09:15:33 2012
@@ -635,9 +635,9 @@
if (!Subtarget->hasV6Ops())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
- // These are expanded into libcalls.
- if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) {
- // v7M has a hardware divider
+ if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) &&
+ !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) {
+ // These are expanded into libcalls if the cpu doesn't have HW divider.
setOperationAction(ISD::SDIV, MVT::i32, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
}
@@ -5594,7 +5594,7 @@
// ldrex dest, ptr
// (sign extend dest, if required)
// cmp dest, incr
- // cmov.cond scratch2, dest, incr
+ // cmov.cond scratch2, incr, dest
// strex scratch, scratch2, ptr
// cmp scratch, #0
// bne- loopMBB
@@ -5617,7 +5617,7 @@
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
.addReg(oldval).addReg(incr));
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2)
- .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR);
+ .addReg(incr).addReg(oldval).addImm(Cond).addReg(ARM::CPSR);
MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
if (strOpc == ARM::t2STREX)
@@ -6326,7 +6326,7 @@
UnitSize = 2;
} else {
// Check whether we can use NEON instructions.
- if (!MF->getFunction()->hasFnAttr(Attribute::NoImplicitFloat) &&
+ if (!MF->getFunction()->getFnAttributes().hasNoImplicitFloatAttr() &&
Subtarget->hasNEON()) {
if ((Align % 16 == 0) && SizeVal >= 16) {
ldrOpc = ARM::VLD1q32wb_fixed;
@@ -9060,7 +9060,7 @@
// See if we can use NEON instructions for this...
if (IsZeroVal &&
- !F->hasFnAttr(Attribute::NoImplicitFloat) &&
+ !F->getFnAttributes().hasNoImplicitFloatAttr() &&
Subtarget->hasNEON()) {
if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) {
return MVT::v4i32;
Modified: llvm/branches/R600/lib/Target/ARM/ARMInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMInstrFormats.td?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMInstrFormats.td (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMInstrFormats.td Tue Oct 2 09:15:33 2012
@@ -846,6 +846,23 @@
let Inst{3-0} = Rm;
}
+// Division instructions.
+class ADivA1I<bits<3> opcod, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{27-23} = 0b01110;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = Rd;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-8} = Rm;
+ let Inst{7-4} = 0b0001;
+ let Inst{3-0} = Rn;
+}
+
// PKH instructions
def PKHLSLAsmOperand : ImmAsmOperand {
let Name = "PKHLSLImm";
@@ -893,6 +910,10 @@
class ARMV5TEPat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [IsARM, HasV5TE];
}
+// ARMV5MOPat - Same as ARMV5TEPat with UseMulOps.
+class ARMV5MOPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV5TE, UseMulOps];
+}
class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [IsARM, HasV6];
}
Modified: llvm/branches/R600/lib/Target/ARM/ARMInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMInstrInfo.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMInstrInfo.cpp (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMInstrInfo.cpp Tue Oct 2 09:15:33 2012
@@ -13,13 +13,17 @@
#include "ARMInstrInfo.h"
#include "ARM.h"
+#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
+#include "ARMTargetMachine.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
using namespace llvm;
@@ -84,3 +88,61 @@
return 0;
}
+
+namespace {
+ /// ARMCGBR - Create Global Base Reg pass. This initializes the PIC
+ /// global base register for ARM ELF.
+ struct ARMCGBR : public MachineFunctionPass {
+ static char ID;
+ ARMCGBR() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (AFI->getGlobalBaseReg() == 0)
+ return false;
+
+ const ARMTargetMachine *TM =
+ static_cast<const ARMTargetMachine *>(&MF.getTarget());
+ if (TM->getRelocationModel() != Reloc::PIC_)
+ return false;
+
+ LLVMContext* Context = &MF.getFunction()->getContext();
+ GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false,
+ GlobalValue::ExternalLinkage, 0,
+ "_GLOBAL_OFFSET_TABLE_");
+ unsigned Id = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id);
+ unsigned Align = TM->getTargetData()->getPrefTypeAlignment(GV->getType());
+ unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align);
+
+ MachineBasicBlock &FirstMBB = MF.front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
+ unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
+ unsigned Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ?
+ ARM::t2LDRpci : ARM::LDRcp;
+ const TargetInstrInfo &TII = *TM->getInstrInfo();
+ MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL,
+ TII.get(Opc), GlobalBaseReg)
+ .addConstantPoolIndex(Idx);
+ if (Opc == ARM::LDRcp)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
+
+ return true;
+ }
+
+ virtual const char *getPassName() const {
+ return "ARM PIC Global Base Reg Initialization";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char ARMCGBR::ID = 0;
+FunctionPass*
+llvm::createARMGlobalBaseRegPass() { return new ARMCGBR(); }
Modified: llvm/branches/R600/lib/Target/ARM/ARMInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMInstrInfo.td?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMInstrInfo.td (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMInstrInfo.td Tue Oct 2 09:15:33 2012
@@ -207,6 +207,8 @@
AssemblerPredicate<"FeatureFP16","half-float">;
def HasDivide : Predicate<"Subtarget->hasDivide()">,
AssemblerPredicate<"FeatureHWDiv", "divide">;
+def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
+ AssemblerPredicate<"FeatureHWDivARM">;
def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
AssemblerPredicate<"FeatureT2XtPk",
"pack/extract">;
@@ -242,6 +244,7 @@
def UseMovt : Predicate<"Subtarget->useMovt()">;
def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
+def UseMulOps : Predicate<"Subtarget->useMulOps()">;
// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
// But only select them if more precision in FP computation is allowed.
@@ -252,6 +255,20 @@
def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || "
"Subtarget->isTargetDarwin()">;
+// VGETLNi32 is microcoded on Swift - prefer VMOV.
+def HasFastVGETLNi32 : Predicate<"!Subtarget->isSwift()">;
+def HasSlowVGETLNi32 : Predicate<"Subtarget->isSwift()">;
+
+// VDUP.32 is microcoded on Swift - prefer VMOV.
+def HasFastVDUP32 : Predicate<"!Subtarget->isSwift()">;
+def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">;
+
+// Cortex-A9 prefers VMOVSR to VMOVDRR even when using NEON for scalar FP, as
+// this allows more effective execution domain optimization. See
+// setExecutionDomain().
+def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">;
+def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">;
+
def IsLE : Predicate<"TLI.isLittleEndian()">;
def IsBE : Predicate<"TLI.isBigEndian()">;
@@ -3446,13 +3463,13 @@
4, IIC_iMUL32,
[(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))],
(MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6]>;
+ Requires<[IsARM, NoV6, UseMulOps]>;
}
def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra",
[(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6, UseMulOps]> {
bits<4> Ra;
let Inst{15-12} = Ra;
}
@@ -3468,7 +3485,7 @@
def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra",
[(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>,
- Requires<[IsARM, HasV6T2]> {
+ Requires<[IsARM, HasV6T2, UseMulOps]> {
bits<4> Rd;
bits<4> Rm;
bits<4> Rn;
@@ -3574,7 +3591,7 @@
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra",
[(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6, UseMulOps]>;
def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
@@ -3584,7 +3601,7 @@
def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6, UseMulOps]>;
def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
@@ -3638,7 +3655,7 @@
[(set GPRnopc:$Rd, (add GPR:$Ra,
(opnode (sext_inreg GPRnopc:$Rn, i16),
(sext_inreg GPRnopc:$Rm, i16))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -3646,7 +3663,7 @@
[(set GPRnopc:$Rd,
(add GPR:$Ra, (opnode (sext_inreg GPRnopc:$Rn, i16),
(sra GPRnopc:$Rm, (i32 16)))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -3654,7 +3671,7 @@
[(set GPRnopc:$Rd,
(add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)),
(sext_inreg GPRnopc:$Rm, i16))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -3662,7 +3679,7 @@
[(set GPRnopc:$Rd,
(add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)),
(sra GPRnopc:$Rm, (i32 16)))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -3670,7 +3687,7 @@
[(set GPRnopc:$Rd,
(add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
(sext_inreg GPRnopc:$Rm, i16)), (i32 16))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -3678,7 +3695,7 @@
[(set GPRnopc:$Rd,
(add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
(sra GPRnopc:$Rm, (i32 16))), (i32 16))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>;
}
}
@@ -3781,6 +3798,19 @@
defm SMUS : AI_sdml<1, "smus">;
//===----------------------------------------------------------------------===//
+// Division Instructions (ARMv7-A with virtualization extension)
+//
+def SDIV : ADivA1I<0b001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
+ "sdiv", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (sdiv GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasDivideInARM]>;
+
+def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
+ "udiv", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (udiv GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasDivideInARM]>;
+
+//===----------------------------------------------------------------------===//
// Misc. Arithmetic Instructions.
//
@@ -4831,32 +4861,32 @@
def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)),
(SMULWB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
(sra (shl GPR:$b, (i32 16)), (i32 16)))),
(SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul sext_16_node:$a, sext_16_node:$b)),
(SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
(sra GPR:$b, (i32 16)))),
(SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
(SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul (sra GPR:$a, (i32 16)),
(sra (shl GPR:$b, (i32 16)), (i32 16)))),
(SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
(SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
(i32 16))),
(SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
+def : ARMV5MOPat<(add GPR:$acc,
(sra (mul GPR:$a, sext_16_node:$b), (i32 16))),
(SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
Modified: llvm/branches/R600/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMInstrNEON.td?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMInstrNEON.td Tue Oct 2 09:15:33 2012
@@ -5043,7 +5043,8 @@
(outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
[(set GPR:$R, (extractelt (v2i32 DPR:$V),
- imm:$lane))]> {
+ imm:$lane))]>,
+ Requires<[HasNEON, HasFastVGETLNi32]> {
let Inst{21} = lane{0};
}
// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
@@ -5066,7 +5067,16 @@
def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
(VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i32_reg imm:$lane))),
- (SubReg_i32_lane imm:$lane))>;
+ (SubReg_i32_lane imm:$lane))>,
+ Requires<[HasNEON, HasFastVGETLNi32]>;
+def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane),
+ (COPY_TO_REGCLASS
+ (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
+ Requires<[HasNEON, HasSlowVGETLNi32]>;
+def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
+ (COPY_TO_REGCLASS
+ (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
+ Requires<[HasNEON, HasSlowVGETLNi32]>;
def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
(EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
(SSubReg_f32_reg imm:$src2))>;
@@ -5175,14 +5185,23 @@
def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>;
def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>;
-def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>;
+def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>,
+ Requires<[HasNEON, HasFastVDUP32]>;
def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
-def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>;
+// NEONvdup patterns for uarchs with fast VDUP.32.
+def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>,
+ Requires<[HasNEON,HasFastVDUP32]>;
def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
+// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
+def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>,
+ Requires<[HasNEON,HasSlowVDUP32]>;
+def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>,
+ Requires<[HasNEON,HasSlowVDUP32]>;
+
// VDUP : Vector Duplicate Lane (from scalar to all elements)
class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
@@ -5619,6 +5638,11 @@
def : N2VSPat<arm_sitof, VCVTs2fd>;
def : N2VSPat<arm_uitof, VCVTu2fd>;
+// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
+def : Pat<(f32 (bitconvert GPR:$a)),
+ (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
+ Requires<[HasNEON, DontUseVMOVSR]>;
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
Modified: llvm/branches/R600/lib/Target/ARM/ARMInstrThumb2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMInstrThumb2.td?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMInstrThumb2.td (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMInstrThumb2.td Tue Oct 2 09:15:33 2012
@@ -2396,7 +2396,8 @@
def t2MLA: T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"mla", "\t$Rd, $Rn, $Rm, $Ra",
- [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]> {
+ [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]>,
+ Requires<[IsThumb2, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -2406,7 +2407,8 @@
def t2MLS: T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"mls", "\t$Rd, $Rn, $Rm, $Ra",
- [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]> {
+ [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]>,
+ Requires<[IsThumb2, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -2475,7 +2477,7 @@
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"smmla", "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -2496,7 +2498,7 @@
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"smmls", "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b110;
@@ -2601,7 +2603,7 @@
[(set rGPR:$Rd, (add rGPR:$Ra,
(opnode (sext_inreg rGPR:$Rn, i16),
(sext_inreg rGPR:$Rm, i16))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2614,7 +2616,7 @@
!strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16),
(sra rGPR:$Rm, (i32 16)))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2627,7 +2629,7 @@
!strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
(sext_inreg rGPR:$Rm, i16))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2640,7 +2642,7 @@
!strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
(sra rGPR:$Rm, (i32 16)))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2653,7 +2655,7 @@
!strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
(sext_inreg rGPR:$Rm, i16)), (i32 16))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -2666,7 +2668,7 @@
!strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
(sra rGPR:$Rm, (i32 16))), (i32 16))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -2760,7 +2762,7 @@
// Division Instructions.
// Signed and unsigned division on v7-M
//
-def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
"sdiv", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
Requires<[HasDivide, IsThumb2]> {
@@ -2771,7 +2773,7 @@
let Inst{7-4} = 0b1111;
}
-def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
"udiv", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
Requires<[HasDivide, IsThumb2]> {
Modified: llvm/branches/R600/lib/Target/ARM/ARMInstrVFP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMInstrVFP.td?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMInstrVFP.td (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMInstrVFP.td Tue Oct 2 09:15:33 2012
@@ -523,10 +523,12 @@
let D = VFPNeonDomain;
}
+// Bitcast i32 -> f32. NEON prefers to use VMOVDRR.
def VMOVSR : AVConv4I<0b11100000, 0b1010,
(outs SPR:$Sn), (ins GPR:$Rt),
IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
- [(set SPR:$Sn, (bitconvert GPR:$Rt))]> {
+ [(set SPR:$Sn, (bitconvert GPR:$Rt))]>,
+ Requires<[HasVFP2, UseVMOVSR]> {
// Instruction operands.
bits<5> Sn;
bits<4> Rt;
Modified: llvm/branches/R600/lib/Target/ARM/ARMMachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMMachineFunctionInfo.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMMachineFunctionInfo.h (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMMachineFunctionInfo.h Tue Oct 2 09:15:33 2012
@@ -108,6 +108,11 @@
/// pass.
DenseMap<unsigned, unsigned> CPEClones;
+ /// GlobalBaseReg - keeps track of the virtual register initialized for
+ /// use as the global base register. This is used for PIC in some PIC
+ /// relocation models.
+ unsigned GlobalBaseReg;
+
public:
ARMFunctionInfo() :
isThumb(false),
@@ -119,7 +124,7 @@
GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
NumAlignedDPRCS2Regs(0),
JumpTableUId(0), PICLabelUId(0),
- VarArgsFrameIndex(0), HasITBlocks(false) {}
+ VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
explicit ARMFunctionInfo(MachineFunction &MF) :
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
@@ -130,7 +135,7 @@
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
JumpTableUId(0), PICLabelUId(0),
- VarArgsFrameIndex(0), HasITBlocks(false) {}
+ VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
bool isThumbFunction() const { return isThumb; }
bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
@@ -249,6 +254,9 @@
bool hasITBlocks() const { return HasITBlocks; }
void setHasITBlocks(bool h) { HasITBlocks = h; }
+ unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
assert(0 && "Duplicate entries!");
Modified: llvm/branches/R600/lib/Target/ARM/ARMRegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMRegisterInfo.td?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMRegisterInfo.td (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMRegisterInfo.td Tue Oct 2 09:15:33 2012
@@ -247,11 +247,16 @@
}
// Scalar single precision floating point register class..
-def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)>;
+// FIXME: Allocation order changed to s0, s2, s4, ... as a quick hack to
+// avoid partial-write dependencies on D registers (S registers are
+// renamed as portions of D registers).
+def SPR : RegisterClass<"ARM", [f32], 32, (add (decimate
+ (sequence "S%u", 0, 31), 2),
+ (sequence "S%u", 0, 31))>;
// Subset of SPR which can be used as a source of NEON scalars for 16-bit
// operations
-def SPR_8 : RegisterClass<"ARM", [f32], 32, (trunc SPR, 16)>;
+def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)>;
// Scalar double precision floating point / generic 64-bit vector register
// class.
Modified: llvm/branches/R600/lib/Target/ARM/ARMSchedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMSchedule.td?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMSchedule.td (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMSchedule.td Tue Oct 2 09:15:33 2012
@@ -55,6 +55,7 @@
def IIC_iMAC32 : InstrItinClass;
def IIC_iMUL64 : InstrItinClass;
def IIC_iMAC64 : InstrItinClass;
+def IIC_iDIV : InstrItinClass;
def IIC_iLoad_i : InstrItinClass;
def IIC_iLoad_r : InstrItinClass;
def IIC_iLoad_si : InstrItinClass;
@@ -261,3 +262,4 @@
include "ARMScheduleV6.td"
include "ARMScheduleA8.td"
include "ARMScheduleA9.td"
+include "ARMScheduleSwift.td"
Added: llvm/branches/R600/lib/Target/ARM/ARMScheduleSwift.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMScheduleSwift.td?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMScheduleSwift.td (added)
+++ llvm/branches/R600/lib/Target/ARM/ARMScheduleSwift.td Tue Oct 2 09:15:33 2012
@@ -0,0 +1,1085 @@
+//=- ARMScheduleSwift.td - Swift Scheduling Definitions -*- tablegen -*----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the Swift processor..
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// This section contains legacy support for itineraries. This is
+// required until SD and PostRA schedulers are replaced by MachineScheduler.
+
+def SW_DIS0 : FuncUnit;
+def SW_DIS1 : FuncUnit;
+def SW_DIS2 : FuncUnit;
+
+def SW_ALU0 : FuncUnit;
+def SW_ALU1 : FuncUnit;
+def SW_LS : FuncUnit;
+def SW_IDIV : FuncUnit;
+def SW_FDIV : FuncUnit;
+
+// FIXME: Need bypasses.
+// FIXME: Model the multiple stages of IIC_iMOVix2, IIC_iMOVix2addpc, and
+// IIC_iMOVix2ld better.
+// FIXME: Model the special immediate shifts that are not microcoded.
+// FIXME: Do we need to model the fact that uses of r15 in a micro-op force it
+// to issue on pipe 1?
+// FIXME: Model the pipelined behavior of CMP / TST instructions.
+// FIXME: Better model the microcode stages of multiply instructions, especially
+// conditional variants.
+// FIXME: Add preload instruction when it is documented.
+// FIXME: Model non-pipelined nature of FP div / sqrt unit.
+
+def SwiftItineraries : ProcessorItineraries<
+ [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [3]>,
+ InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_LS]>],
+ [5]>,
+ //
+ // MVN instructions
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMVNsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMVNsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1]>,
+ //
+ // Bitwise Instructions that produce a result
+ InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1]>,
+ //
+ // Unary Instructions that produce a result
+
+ // CLZ, RBIT, etc.
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+
+ // BFC, BFI, UBFX, SBFX
+ InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1]>,
+
+ //
+ // Zero and sign extension instructions
+ InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iCMPsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<2, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iCMPsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<2, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ //
+ // Test instructions
+ InstrItinData<IIC_iTSTi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iTSTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iTSTsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<2, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iTSTsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<2, [SW_ALU0, SW_ALU1]>],
+ [1, 1, 1]>,
+ //
+ // Move instructions, conditional
+ // FIXME: Correctly model the extra input dep on the destination.
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2]>,
+
+ // Integer multiply pipeline
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1, 1]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_ALU0], 3>,
+ InstrStage<1, [SW_ALU0]>],
+ [5, 5, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [5, 6, 1, 1]>,
+ //
+ // Integer divide
+ InstrItinData<IIC_iDIV , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0], 0>,
+ InstrStage<14, [SW_IDIV]>],
+ [14, 1, 1]>,
+
+ // Integer load pipeline
+ // FIXME: The timings are some rough approximations
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1]>,
+ InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 4, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [3, 4, 1, 1]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iLoad_si , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS]>],
+ [5, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS]>],
+ [5, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0], 1>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [3, 4, 1, 1]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [5, 3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [5, 3, 1, 1]>,
+ //
+ // Load multiple, def is the 5th operand.
+ // FIXME: This assumes 3 to 4 registers.
+ InstrItinData<IIC_iLoad_m , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1, 3], [], -1>, // dynamic uops
+
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1, 3], [], -1>, // dynamic uops
+ //
+ // Load multiple plus branch
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1, 3], [], -1>, // dynamic uops
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 3], [], -1>, // dynamic uops
+ //
+ // Pop + branch, def is the 3rd operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 3], [], -1>, // dynamic uops
+
+ //
+ // iLoadi + iALUr for t2LDRpci_pic.
+ InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [4, 1]>,
+
+ // Integer store pipeline
+ ///
+ // Immediate offset
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iStore_si , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStore_ru , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 1]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iStore_siu, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
+ InstrStage<1, [SW_LS], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
+ [3, 1, 1, 1]>,
+ //
+ // Store multiple
+ InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [], [], -1>, // dynamic uops
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS], 1>,
+ InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
+ InstrStage<1, [SW_LS]>],
+ [2], [], -1>, // dynamic uops
+
+ //
+ // Preload
+ InstrItinData<IIC_Preload, [InstrStage<1, [SW_DIS0], 0>], [1, 1]>,
+
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [SW_DIS0], 0>]>,
+
+ // FP Special Register to Integer Register File Move
+ InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [1]>,
+ //
+ // Single-precision FP Unary
+ //
+ // Most floating-point moves get issued on ALU0.
+ InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+ //
+ // Double-precision FP Unary
+ InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+
+ //
+ // Single-precision FP Compare
+ InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Compare
+ InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [1, 1]>,
+ //
+ // Single to Double FP Convert
+ InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Double to Single FP Convert
+ InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+
+ //
+ // Single to Half FP Convert
+ InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU1], 4>,
+ InstrStage<1, [SW_ALU1]>],
+ [6, 1]>,
+ //
+ // Half to Single FP Convert
+ InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+
+ //
+ // Single-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Double-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Integer to Single-Precision FP Convert
+ InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Integer to Double-Precision FP Convert
+ InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1]>,
+ //
+ // Single-precision FP ALU
+ InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-precision FP ALU
+ InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Single-precision FP Multiply
+ InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Double-precision FP Multiply
+ InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [6, 1, 1]>,
+ //
+ // Single-precision FP MAC
+ InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-precision FP MAC
+ InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [12, 1, 1]>,
+ //
+ // Single-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-precision Fused FP MAC
+ InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [12, 1, 1]>,
+ //
+ // Single-precision FP DIV
+ InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 0>,
+ InstrStage<15, [SW_FDIV]>],
+ [17, 1, 1]>,
+ //
+ // Double-precision FP DIV
+ InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 0>,
+ InstrStage<30, [SW_FDIV]>],
+ [32, 1, 1]>,
+ //
+ // Single-precision FP SQRT
+ InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 0>,
+ InstrStage<15, [SW_FDIV]>],
+ [17, 1]>,
+ //
+ // Double-precision FP SQRT
+ InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 0>,
+ InstrStage<30, [SW_FDIV]>],
+ [32, 1, 1]>,
+
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_fpMOVIS, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0]>],
+ [6, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_fpMOVID, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [4, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_fpMOVSI, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_fpMOVDI, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_LS]>],
+ [3, 4, 1]>,
+ //
+ // Single-precision FP Load
+ InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [4, 1]>,
+ //
+ // Double-precision FP Load
+ InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [4, 1]>,
+ //
+ // FP Load Multiple
+ // FIXME: Assumes a single Q register.
+ InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1, 4], [], -1>, // dynamic uops
+ //
+ // FP Load Multiple + update
+ // FIXME: Assumes a single Q register.
+ InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1, 4], [], -1>, // dynamic uops
+ //
+ // Single-precision FP Store
+ InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ //
+ // Double-precision FP Store
+ InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1]>,
+ //
+ // FP Store Multiple
+ // FIXME: Assumes a single Q register.
+ InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [1, 1, 1], [], -1>, // dynamic uops
+ //
+ // FP Store Multiple + update
+ // FIXME: Assumes a single Q register.
+ InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0, SW_ALU1]>],
+ [2, 1, 1, 1], [], -1>, // dynamic uops
+ // NEON
+ //
+ // Double-register Integer Unary
+ InstrItinData<IIC_VUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1]>,
+ //
+ // Quad-register Integer Unary
+ InstrItinData<IIC_VUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1]>,
+ //
+ // Double-register Integer Q-Unary
+ InstrItinData<IIC_VQUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1]>,
+ //
+ // Quad-register Integer CountQ-Unary
+ InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1]>,
+ //
+ // Double-register Integer Binary
+ InstrItinData<IIC_VBINiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register Integer Binary
+ InstrItinData<IIC_VBINiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-register Integer Subtract
+ InstrItinData<IIC_VSUBiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register Integer Subtract
+ InstrItinData<IIC_VSUBiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-register Integer Shift
+ InstrItinData<IIC_VSHLiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register Integer Shift
+ InstrItinData<IIC_VSHLiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Shift (4 cycle)
+ InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Double-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Binary (4 cycle)
+ InstrItinData<IIC_VBINi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Double-register Integer Subtract (4 cycle)
+ InstrItinData<IIC_VSUBi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Subtract (4 cycle)
+ InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+
+ //
+ // Double-register Integer Count
+ InstrItinData<IIC_VCNTiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register Integer Count
+ InstrItinData<IIC_VCNTiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1, 1]>,
+ //
+ // Double-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1, 1]>,
+ //
+ // Quad-register Absolute Difference and Accumulate
+ InstrItinData<IIC_VABAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1, 1]>,
+ //
+ // Double-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Pair Add Long
+ InstrItinData<IIC_VPALiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+
+ //
+ // Double-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Multiply (.8, .16)
+ InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+
+ //
+ // Double-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register Integer Multiply (.32)
+ InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1, 1]>,
+ //
+ // Double-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1, 1]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.8, .16)
+ InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1, 1]>,
+ //
+ // Quad-register Integer Multiply-Accumulate (.32)
+ InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1, 1]>,
+
+ //
+ // Move
+ InstrItinData<IIC_VMOV, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+ //
+ // Move Immediate
+ InstrItinData<IIC_VMOVImm, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2]>,
+ //
+ // Double-register Permute Move
+ InstrItinData<IIC_VMOVD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1]>,
+ //
+ // Quad-register Permute Move
+ InstrItinData<IIC_VMOVQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1]>,
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_VMOVIS , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0]>],
+ [6, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_VMOVID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [4, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_VMOVSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_LS]>],
+ [3, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_VMOVDI , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 3>,
+ InstrStage<1, [SW_LS]>],
+ [3, 4, 1]>,
+ //
+ // Integer to Lane Move
+ // FIXME: I think this is correct, but it is not clear from the tuning guide.
+ InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_LS], 4>,
+ InstrStage<1, [SW_ALU0]>],
+ [6, 1]>,
+
+ //
+ // Vector narrow move
+ InstrItinData<IIC_VMOVN, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1]>,
+ //
+ // Double-register FP Unary
+ // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
+ // and they issue on a different pipeline.
+ InstrItinData<IIC_VUNAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+ //
+ // Quad-register FP Unary
+ // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
+ // and they issue on a different pipeline.
+ InstrItinData<IIC_VUNAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [2, 1]>,
+ //
+ // Double-register FP Binary
+ // FIXME: We're using this itin for many instructions.
+ InstrItinData<IIC_VBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+
+ //
+ // VPADD, etc.
+ InstrItinData<IIC_VPBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Double-register FP VMUL
+ InstrItinData<IIC_VFMULD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register FP Binary
+ InstrItinData<IIC_VBINQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU0]>],
+ [4, 1, 1]>,
+ //
+ // Quad-register FP VMUL
+ InstrItinData<IIC_VFMULQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 1]>,
+ //
+ // Double-register FP Multiple-Accumulate
+ InstrItinData<IIC_VMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Quad-register FP Multiple-Accumulate
+ InstrItinData<IIC_VMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-register Fused FP Multiple-Accumulate
+ InstrItinData<IIC_VFMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Quad-register FusedF P Multiple-Accumulate
+ InstrItinData<IIC_VFMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-register Reciprical Step
+ InstrItinData<IIC_VRECSD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Quad-register Reciprical Step
+ InstrItinData<IIC_VRECSQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 1]>,
+ //
+ // Double-register Permute
+ // FIXME: The latencies are unclear from the documentation.
+ InstrItinData<IIC_VPERMD, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [3, 4, 3, 4]>,
+ //
+ // Quad-register Permute
+ // FIXME: The latencies are unclear from the documentation.
+ InstrItinData<IIC_VPERMQ, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [3, 4, 3, 4]>,
+ //
+ // Quad-register Permute (3 cycle issue on A9)
+ InstrItinData<IIC_VPERMQ3, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [3, 4, 3, 4]>,
+
+ //
+ // Double-register VEXT
+ InstrItinData<IIC_VEXTD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1, 1]>,
+ //
+ // Quad-register VEXT
+ InstrItinData<IIC_VEXTQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1, 1]>,
+ //
+ // VTB
+ InstrItinData<IIC_VTB1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VTB2, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 3, 3]>,
+ InstrItinData<IIC_VTB3, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [6, 1, 3, 5, 5]>,
+ InstrItinData<IIC_VTB4, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 3, 5, 7, 7]>,
+ //
+ // VTBX
+ InstrItinData<IIC_VTBX1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VTBX2, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [4, 1, 3, 3]>,
+ InstrItinData<IIC_VTBX3, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [6, 1, 3, 5, 5]>,
+ InstrItinData<IIC_VTBX4, [InstrStage<1, [SW_DIS0], 0>,
+ InstrStage<1, [SW_DIS1], 0>,
+ InstrStage<1, [SW_DIS2], 0>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1], 2>,
+ InstrStage<1, [SW_ALU1]>],
+ [8, 1, 3, 5, 7, 7]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// This following definitions describe the simple machine model which
+// will replace itineraries.
+
+// Swift machine model for scheduling and other instruction cost heuristics.
+def SwiftModel : SchedMachineModel {
+ let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
+ let MinLatency = 0; // Data dependencies are allowed within dispatch groups.
+ let LoadLatency = 3;
+
+ let Itineraries = SwiftItineraries;
+}
+
+// TODO: Add Swift processor and scheduler resources.
Modified: llvm/branches/R600/lib/Target/ARM/ARMSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMSubtarget.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMSubtarget.cpp (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMSubtarget.cpp Tue Oct 2 09:15:33 2012
@@ -32,6 +32,10 @@
DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden);
static cl::opt<bool>
+UseFusedMulOps("arm-use-mulops",
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
StrictAlign("arm-strict-align", cl::Hidden,
cl::desc("Disallow all unaligned memory accesses"));
@@ -50,6 +54,7 @@
, HasVFPv4(false)
, HasNEON(false)
, UseNEONForSinglePrecisionFP(false)
+ , UseMulOps(UseFusedMulOps)
, SlowFPVMLx(false)
, HasVMLxForwarding(false)
, SlowFPBrcc(false)
@@ -64,6 +69,7 @@
, HasFP16(false)
, HasD16(false)
, HasHardwareDivide(false)
+ , HasHardwareDivideInARM(false)
, HasT2ExtractPack(false)
, HasDataBarrier(false)
, Pref32BitThumb(false)
Modified: llvm/branches/R600/lib/Target/ARM/ARMSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMSubtarget.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMSubtarget.h (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMSubtarget.h Tue Oct 2 09:15:33 2012
@@ -30,7 +30,7 @@
class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
enum ARMProcFamilyEnum {
- Others, CortexA8, CortexA9, CortexA15
+ Others, CortexA8, CortexA9, CortexA15, Swift
};
/// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
@@ -57,6 +57,10 @@
/// determine if NEON should actually be used.
bool UseNEONForSinglePrecisionFP;
+ /// UseMulOps - True if non-microcoded fused integer multiply-add and
+ /// multiply-subtract instructions should be used.
+ bool UseMulOps;
+
/// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates
/// whether the FP VML[AS] instructions are slow (if so, don't use them).
bool SlowFPVMLx;
@@ -107,6 +111,9 @@
/// HasHardwareDivide - True if subtarget supports [su]div
bool HasHardwareDivide;
+ /// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode
+ bool HasHardwareDivideInARM;
+
/// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack
/// instructions.
bool HasT2ExtractPack;
@@ -200,6 +207,7 @@
bool isCortexA8() const { return ARMProcFamily == CortexA8; }
bool isCortexA9() const { return ARMProcFamily == CortexA9; }
bool isCortexA15() const { return ARMProcFamily == CortexA15; }
+ bool isSwift() const { return ARMProcFamily == Swift; }
bool isCortexM3() const { return CPUString == "cortex-m3"; }
bool isLikeA9() const { return isCortexA9() || isCortexA15(); }
@@ -213,8 +221,10 @@
return hasNEON() && UseNEONForSinglePrecisionFP; }
bool hasDivide() const { return HasHardwareDivide; }
+ bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }
bool hasT2ExtractPack() const { return HasT2ExtractPack; }
bool hasDataBarrier() const { return HasDataBarrier; }
+ bool useMulOps() const { return UseMulOps; }
bool useFPVMLx() const { return !SlowFPVMLx; }
bool hasVMLxForwarding() const { return HasVMLxForwarding; }
bool isFPBrccSlow() const { return SlowFPBrcc; }
Modified: llvm/branches/R600/lib/Target/ARM/ARMTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/ARMTargetMachine.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/ARMTargetMachine.cpp (original)
+++ llvm/branches/R600/lib/Target/ARM/ARMTargetMachine.cpp Tue Oct 2 09:15:33 2012
@@ -143,6 +143,11 @@
bool ARMPassConfig::addInstSelector() {
addPass(createARMISelDag(getARMTargetMachine(), getOptLevel()));
+
+ const ARMSubtarget *Subtarget = &getARMSubtarget();
+ if (Subtarget->isTargetELF() && !Subtarget->isThumb1Only() &&
+ TM->Options.EnableFastISel)
+ addPass(createARMGlobalBaseRegPass());
return false;
}
Modified: llvm/branches/R600/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp (original)
+++ llvm/branches/R600/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp Tue Oct 2 09:15:33 2012
@@ -687,6 +687,15 @@
else if (TheTriple.getArchName() == "armv6" ||
TheTriple.getArchName() == "thumbv6")
return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V6);
+ else if (TheTriple.getArchName() == "armv7f" ||
+ TheTriple.getArchName() == "thumbv7f")
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7F);
+ else if (TheTriple.getArchName() == "armv7k" ||
+ TheTriple.getArchName() == "thumbv7k")
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7K);
+ else if (TheTriple.getArchName() == "armv7s" ||
+ TheTriple.getArchName() == "thumbv7s")
+ return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7S);
return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7);
}
Modified: llvm/branches/R600/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp (original)
+++ llvm/branches/R600/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp Tue Oct 2 09:15:33 2012
@@ -71,6 +71,14 @@
else
// Use CPU to figure out the exact features.
ARMArchFeature = "+v7";
+ } else if (Len >= Idx+2 && TT[Idx+1] == 's') {
+ if (NoCPU)
+ // v7s: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk
+ // Swift
+ ARMArchFeature = "+v7,+swift,+neon,+db,+t2dsp,+t2xtpk";
+ else
+ // Use CPU to figure out the exact features.
+ ARMArchFeature = "+v7";
} else {
// v7 CPUs have lots of different feature sets. If no CPU is specified,
// then assume v7a (e.g. cortex-a8) feature set. Otherwise, return
Modified: llvm/branches/R600/lib/Target/ARM/MLxExpansionPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/ARM/MLxExpansionPass.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/ARM/MLxExpansionPass.cpp (original)
+++ llvm/branches/R600/lib/Target/ARM/MLxExpansionPass.cpp Tue Oct 2 09:15:33 2012
@@ -52,6 +52,7 @@
MachineRegisterInfo *MRI;
bool isLikeA9;
+ bool isSwift;
unsigned MIIdx;
MachineInstr* LastMIs[4];
SmallPtrSet<MachineInstr*, 4> IgnoreStall;
@@ -60,6 +61,7 @@
void pushStack(MachineInstr *MI);
MachineInstr *getAccDefMI(MachineInstr *MI) const;
unsigned getDefReg(MachineInstr *MI) const;
+ bool hasLoopHazard(MachineInstr *MI) const;
bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
bool FindMLxHazard(MachineInstr *MI);
void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
@@ -135,6 +137,50 @@
return Reg;
}
+/// hasLoopHazard - Check whether an MLx instruction is chained to itself across
+/// a single-MBB loop.
+bool MLxExpansion::hasLoopHazard(MachineInstr *MI) const {
+ unsigned Reg = MI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return false;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ while (true) {
+outer_continue:
+ if (DefMI->getParent() != MBB)
+ break;
+
+ if (DefMI->isPHI()) {
+ for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) {
+ if (DefMI->getOperand(i + 1).getMBB() == MBB) {
+ unsigned SrcReg = DefMI->getOperand(i).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ DefMI = MRI->getVRegDef(SrcReg);
+ goto outer_continue;
+ }
+ }
+ }
+ } else if (DefMI->isCopyLike()) {
+ Reg = DefMI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ } else if (DefMI->isInsertSubreg()) {
+ Reg = DefMI->getOperand(2).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ }
+
+ break;
+ }
+
+ return DefMI == MI;
+}
+
bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
// FIXME: Detect integer instructions properly.
const MCInstrDesc &MCID = MI->getDesc();
@@ -149,6 +195,19 @@
return false;
}
+static bool isFpMulInstruction(unsigned Opcode) {
+ switch (Opcode) {
+ case ARM::VMULS:
+ case ARM::VMULfd:
+ case ARM::VMULfq:
+ case ARM::VMULD:
+ case ARM::VMULslfd:
+ case ARM::VMULslfq:
+ return true;
+ default:
+ return false;
+ }
+}
bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
if (NumExpand >= ExpandLimit)
@@ -171,6 +230,12 @@
return true;
}
+ // On Swift, we mostly care about hazards from multiplication instructions
+ // writing the accumulator and the pipelining of loop iterations by out-of-
+ // order execution.
+ if (isSwift)
+ return isFpMulInstruction(DefMI->getOpcode()) || hasLoopHazard(MI);
+
if (IgnoreStall.count(MI))
return false;
@@ -316,7 +381,8 @@
TRI = Fn.getTarget().getRegisterInfo();
MRI = &Fn.getRegInfo();
const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
- isLikeA9 = STI->isLikeA9();
+ isLikeA9 = STI->isLikeA9() || STI->isSwift();
+ isSwift = STI->isSwift();
bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
Modified: llvm/branches/R600/lib/Target/Hexagon/HexagonISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/Hexagon/HexagonISelLowering.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/Hexagon/HexagonISelLowering.cpp (original)
+++ llvm/branches/R600/lib/Target/Hexagon/HexagonISelLowering.cpp Tue Oct 2 09:15:33 2012
@@ -1350,6 +1350,8 @@
} else {
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
}
+ // Increase jump tables cutover to 5, was 4.
+ setMinimumJumpTableEntries(5);
setOperationAction(ISD::BR_CC, MVT::i32, Expand);
Modified: llvm/branches/R600/lib/Target/Mips/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/Mips/CMakeLists.txt?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/Mips/CMakeLists.txt (original)
+++ llvm/branches/R600/lib/Target/Mips/CMakeLists.txt Tue Oct 2 09:15:33 2012
@@ -11,6 +11,7 @@
tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM MipsGenEDInfo.inc -gen-enhanced-disassembly-info)
tablegen(LLVM MipsGenAsmMatcher.inc -gen-asm-matcher)
+tablegen(LLVM MipsGenMCPseudoLowering.inc -gen-pseudo-lowering)
add_public_tablegen_target(MipsCommonTableGen)
add_llvm_target(MipsCodeGen
Modified: llvm/branches/R600/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/Mips/Disassembler/MipsDisassembler.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/Mips/Disassembler/MipsDisassembler.cpp (original)
+++ llvm/branches/R600/lib/Target/Mips/Disassembler/MipsDisassembler.cpp Tue Oct 2 09:15:33 2012
@@ -108,6 +108,11 @@
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeDSPRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -138,6 +143,11 @@
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeBranchTarget(MCInst &Inst,
unsigned Offset,
uint64_t Address,
@@ -346,6 +356,13 @@
return MCDisassembler::Success;
}
+static DecodeStatus DecodeDSPRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeCPURegsRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -463,6 +480,18 @@
return MCDisassembler::Success;
}
+static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo >= 4)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::ACRegsRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeBranchTarget(MCInst &Inst,
unsigned Offset,
uint64_t Address,
Modified: llvm/branches/R600/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h (original)
+++ llvm/branches/R600/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h Tue Oct 2 09:15:33 2012
@@ -122,14 +122,16 @@
{
switch (RegEnum) {
case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64:
- case Mips::D0: case Mips::FCC0:
+ case Mips::D0: case Mips::FCC0: case Mips::AC0:
return 0;
case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64:
+ case Mips::AC1:
return 1;
case Mips::V0: case Mips::V0_64: case Mips::F2: case Mips::D2_64:
- case Mips::D1:
+ case Mips::D1: case Mips::AC2:
return 2;
case Mips::V1: case Mips::V1_64: case Mips::F3: case Mips::D3_64:
+ case Mips::AC3:
return 3;
case Mips::A0: case Mips::A0_64: case Mips::F4: case Mips::D4_64:
case Mips::D2:
Modified: llvm/branches/R600/lib/Target/Mips/Makefile
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/Mips/Makefile?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/Mips/Makefile (original)
+++ llvm/branches/R600/lib/Target/Mips/Makefile Tue Oct 2 09:15:33 2012
@@ -17,7 +17,7 @@
MipsGenDAGISel.inc MipsGenCallingConv.inc \
MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc \
MipsGenEDInfo.inc MipsGenDisassemblerTables.inc \
- MipsGenAsmMatcher.inc
+ MipsGenMCPseudoLowering.inc MipsGenAsmMatcher.inc
DIRS = InstPrinter Disassembler AsmParser TargetInfo MCTargetDesc
Modified: llvm/branches/R600/lib/Target/Mips/Mips16InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/Mips/Mips16InstrInfo.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/Mips/Mips16InstrInfo.cpp (original)
+++ llvm/branches/R600/lib/Target/Mips/Mips16InstrInfo.cpp Tue Oct 2 09:15:33 2012
@@ -84,7 +84,15 @@
unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- assert(false && "Implement this function.");
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
+ unsigned Opc = 0;
+ if (Mips::CPU16RegsRegClass.hasSubClassEq(RC))
+ Opc = Mips::SwRxSpImmX16;
+ assert(Opc && "Register class not handled!");
+ BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
}
void Mips16InstrInfo::
@@ -92,7 +100,16 @@
unsigned DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- assert(false && "Implement this function.");
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
+ unsigned Opc = 0;
+
+ if (Mips::CPU16RegsRegClass.hasSubClassEq(RC))
+ Opc = Mips::LwRxSpImmX16;
+ assert(Opc && "Register class not handled!");
+ BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0)
+ .addMemOperand(MMO);
}
bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
Modified: llvm/branches/R600/lib/Target/Mips/Mips16InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/Mips/Mips16InstrInfo.td?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/Mips/Mips16InstrInfo.td (original)
+++ llvm/branches/R600/lib/Target/Mips/Mips16InstrInfo.td Tue Oct 2 09:15:33 2012
@@ -29,10 +29,35 @@
//
// I8_MOV32R instruction format (used only by MOV32R instruction)
//
+
class FI8_MOV32R16_ins<string asmstr, InstrItinClass itin>:
FI8_MOV32R16<(outs CPURegs:$r32), (ins CPU16Regs:$rz),
!strconcat(asmstr, "\t$r32, $rz"), [], itin>;
+
+//
+// RR-type instruction format
+//
+
+class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> :
+ FRR16<f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rx, $ry"), [], itin> {
+}
+
+class FRxRxRy16_ins<bits<5> f, string asmstr,
+ InstrItinClass itin> :
+ FRR16<f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rz, $ry"),
+ [], itin> {
+ let Constraints = "$rx = $rz";
+}
+
+let rx=0 in
+class FRR16_JALRC_RA_only_ins<bits<1> nd_, bits<1> l_,
+ string asmstr, InstrItinClass itin>:
+ FRR16_JALRC<nd_, l_, 1, (outs), (ins), !strconcat(asmstr, "\t $$ra"),
+ [], itin> ;
+
//
// EXT-RI instruction format
//
@@ -56,30 +81,14 @@
!strconcat(asmstr, "\t$rx, $imm"), [], itin> {
let Constraints = "$rx_ = $rx";
}
-
-
-//
-// RR-type instruction format
-//
-
-class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> :
- FRR16<f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry),
- !strconcat(asmstr, "\t$rx, $ry"), [], itin> {
-}
-
-class FRxRxRy16_ins<bits<5> f, string asmstr,
- InstrItinClass itin> :
- FRR16<f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry),
- !strconcat(asmstr, "\t$rz, $ry"),
- [], itin> {
- let Constraints = "$rx = $rz";
+// this has an explicit sp argument that we ignore to work around a problem
+// in the compiler
+class FEXT_RI16_SP_explicit_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins CPUSPReg:$ry, simm16:$imm),
+ !strconcat(asmstr, "\t$rx, $imm ( $ry ); "), [], itin> {
}
-let rx=0 in
-class FRR16_JALRC_RA_only_ins<bits<1> nd_, bits<1> l_,
- string asmstr, InstrItinClass itin>:
- FRR16_JALRC<nd_, l_, 1, (outs), (ins), !strconcat(asmstr, "\t $$ra"),
- [], itin> ;
//
// EXT-RRI instruction format
@@ -122,6 +131,13 @@
bit neverHasSideEffects = 1;
}
+class MayLoad {
+ bit mayLoad = 1;
+}
+
+class MayStore {
+ bit mayStore = 1;
+}
//
// Format: ADDIU rx, immediate MIPS16e
@@ -169,28 +185,30 @@
// Purpose: Load Byte (Extended)
// To load a byte from memory as a signed value.
//
-def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IIAlu>;
+def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IILoad>, MayLoad;
//
// Format: LBU ry, offset(rx) MIPS16e
// Purpose: Load Byte Unsigned (Extended)
// To load a byte from memory as a unsigned value.
//
-def LbuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IIAlu>;
+def LbuRxRyOffMemX16:
+ FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IILoad>, MayLoad;
//
// Format: LH ry, offset(rx) MIPS16e
// Purpose: Load Halfword signed (Extended)
// To load a halfword from memory as a signed value.
//
-def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IIAlu>;
+def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IILoad>, MayLoad;
//
// Format: LHU ry, offset(rx) MIPS16e
// Purpose: Load Halfword unsigned (Extended)
// To load a halfword from memory as an unsigned value.
//
-def LhuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IIAlu>;
+def LhuRxRyOffMemX16:
+ FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IILoad>, MayLoad;
//
// Format: LI rx, immediate MIPS16e
@@ -204,7 +222,13 @@
// Purpose: Load Word (Extended)
// To load a word from memory as a signed value.
//
-def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IIAlu>;
+def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IILoad>, MayLoad;
+
+// Format: LW rx, offset(sp) MIPS16e
+// Purpose: Load Word (SP-Relative, Extended)
+// To load an SP-relative word from memory as a signed value.
+//
+def LwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b10110, "lw", IILoad>, MayLoad;
//
// Format: MOVE r32, rz MIPS16e
@@ -257,7 +281,7 @@
let ra=1, s=0,s0=1,s1=1 in
def RestoreRaF16:
FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
- "restore \t$$ra, $$s0, $$s1, $frame_size", [], IILoad > {
+ "restore \t$$ra, $$s0, $$s1, $frame_size", [], IILoad >, MayLoad {
let isCodeGenOnly = 1;
}
@@ -271,7 +295,7 @@
let ra=1, s=1,s0=1,s1=1 in
def SaveRaF16:
FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
- "save \t$$ra, $$s0, $$s1, $frame_size", [], IILoad > {
+ "save \t$$ra, $$s0, $$s1, $frame_size", [], IIStore >, MayStore {
let isCodeGenOnly = 1;
}
//
@@ -279,14 +303,16 @@
// Purpose: Store Byte (Extended)
// To store a byte to memory.
//
-def SbRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11000, "sb", mem16, IIAlu>;
+def SbRxRyOffMemX16:
+ FEXT_RRI16_mem2_ins<0b11000, "sb", mem16, IIStore>, MayStore;
//
// Format: SH ry, offset(rx) MIPS16e
// Purpose: Store Halfword (Extended)
// To store a halfword to memory.
//
-def ShRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11001, "sh", mem16, IIAlu>;
+def ShRxRyOffMemX16:
+ FEXT_RRI16_mem2_ins<0b11001, "sh", mem16, IIStore>, MayStore;
//
// Format: SLL rx, ry, sa MIPS16e
@@ -350,9 +376,18 @@
// Purpose: Store Word (Extended)
// To store a word to memory.
//
-def SwRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11011, "sw", mem16, IIAlu>;
+def SwRxRyOffMemX16:
+ FEXT_RRI16_mem2_ins<0b11011, "sw", mem16, IIStore>, MayStore;
//
+// Format: SW rx, offset(sp) MIPS16e
+// Purpose: Store Word rx (SP-Relative)
+// To store an SP-relative word to memory.
+//
+def SwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b11010, "sw", IIStore>, MayStore;
+
+//
+//
// Format: XOR rx, ry MIPS16e
// Purpose: Xor
// To do a bitwise logical XOR.
Modified: llvm/branches/R600/lib/Target/Mips/Mips16RegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/Mips/Mips16RegisterInfo.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/Mips/Mips16RegisterInfo.cpp (original)
+++ llvm/branches/R600/lib/Target/Mips/Mips16RegisterInfo.cpp Tue Oct 2 09:15:33 2012
@@ -57,7 +57,6 @@
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
MachineFrameInfo *MFI = MF.getFrameInfo();
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
int MinCSFI = 0;
@@ -77,8 +76,7 @@
// getFrameRegister() returns.
unsigned FrameReg;
- if (MipsFI->isOutArgFI(FrameIndex) ||
- (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
+ if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)
FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
else
FrameReg = getFrameRegister(MF);
@@ -94,12 +92,8 @@
// incoming argument, callee-saved register location or local variable.
int64_t Offset;
- if (MipsFI->isOutArgFI(FrameIndex))
- Offset = SPOffset;
- else
- Offset = SPOffset + (int64_t)StackSize;
-
- Offset += MI.getOperand(OpNo + 1).getImm();
+ Offset = SPOffset + (int64_t)StackSize;
+ Offset += MI.getOperand(OpNo + 1).getImm();
DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
Modified: llvm/branches/R600/lib/Target/Mips/MipsAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/Mips/MipsAsmPrinter.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/Mips/MipsAsmPrinter.cpp (original)
+++ llvm/branches/R600/lib/Target/Mips/MipsAsmPrinter.cpp Tue Oct 2 09:15:33 2012
@@ -50,6 +50,13 @@
return true;
}
+bool MipsAsmPrinter::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) {
+ MCOp = MCInstLowering.LowerOperand(MO);
+ return MCOp.isValid();
+}
+
+#include "MipsGenMCPseudoLowering.inc"
+
void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (MI->isDebugValue()) {
SmallString<128> Str;
@@ -59,6 +66,10 @@
return;
}
+ // Do any auto-generated pseudo lowerings.
+ if (emitPseudoExpansionLowering(OutStreamer, MI))
+ return;
+
MachineBasicBlock::const_instr_iterator I = MI;
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
Modified: llvm/branches/R600/lib/Target/Mips/MipsAsmPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/Mips/MipsAsmPrinter.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/Mips/MipsAsmPrinter.h (original)
+++ llvm/branches/R600/lib/Target/Mips/MipsAsmPrinter.h Tue Oct 2 09:15:33 2012
@@ -32,6 +32,14 @@
void EmitInstrWithMacroNoAT(const MachineInstr *MI);
+private:
+ // tblgen'erated function.
+ bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
+ const MachineInstr *MI);
+
+ // lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
+ bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
+
public:
const MipsSubtarget *Subtarget;
Modified: llvm/branches/R600/lib/Target/Mips/MipsDSPInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/Mips/MipsDSPInstrFormats.td?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/Mips/MipsDSPInstrFormats.td (original)
+++ llvm/branches/R600/lib/Target/Mips/MipsDSPInstrFormats.td Tue Oct 2 09:15:33 2012
@@ -23,3 +23,287 @@
class DSPInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> {
let Predicates = [HasDSP];
}
+
+class PseudoDSP<dag outs, dag ins, list<dag> pattern>:
+ MipsPseudo<outs, ins, "", pattern> {
+ let Predicates = [HasDSP];
+}
+
+// ADDU.QB sub-class format.
+class ADDU_QB_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010000;
+}
+
+class RADDU_W_QB_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<5> rs;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = 0;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010000;
+}
+
+// CMPU.EQ.QB sub-class format.
+class CMP_EQ_QB_R2_FMT<bits<5> op> : DSPInst {
+ bits<5> rs;
+ bits<5> rt;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = 0;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010001;
+}
+
+class CMP_EQ_QB_R3_FMT<bits<5> op> : DSPInst {
+ bits<5> rs;
+ bits<5> rt;
+ bits<5> rd;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010001;
+}
+
+class PRECR_SRA_PH_W_FMT<bits<5> op> : DSPInst {
+ bits<5> rs;
+ bits<5> rt;
+ bits<5> sa;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = sa;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010001;
+}
+
+// ABSQ_S.PH sub-class format.
+class ABSQ_S_PH_R2_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<5> rt;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = 0;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010010;
+}
+
+
+class REPL_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<10> imm;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-16} = imm;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010010;
+}
+
+// SHLL.QB sub-class format.
+class SHLL_QB_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<5> rt;
+ bits<5> rs_sa;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs_sa;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b010011;
+}
+
+// LX sub-class format.
+class LX_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<5> base;
+ bits<5> index;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = base;
+ let Inst{20-16} = index;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b001010;
+}
+
+// ADDUH.QB sub-class format.
+class ADDUH_QB_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b011000;
+}
+
+// APPEND sub-class format.
+class APPEND_FMT<bits<5> op> : DSPInst {
+ bits<5> rt;
+ bits<5> rs;
+ bits<5> sa;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = sa;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b110001;
+}
+
+// DPA.W.PH sub-class format.
+class DPA_W_PH_FMT<bits<5> op> : DSPInst {
+ bits<2> ac;
+ bits<5> rs;
+ bits<5> rt;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-13} = 0;
+ let Inst{12-11} = ac;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b110000;
+}
+
+// MULT sub-class format.
+class MULT_FMT<bits<6> opcode, bits<6> funct> : DSPInst {
+ bits<2> ac;
+ bits<5> rs;
+ bits<5> rt;
+
+ let Opcode = opcode;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-13} = 0;
+ let Inst{12-11} = ac;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = funct;
+}
+
+// EXTR.W sub-class format (type 1).
+class EXTR_W_TY1_FMT<bits<5> op> : DSPInst {
+ bits<5> rt;
+ bits<2> ac;
+ bits<5> shift_rs;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = shift_rs;
+ let Inst{20-16} = rt;
+ let Inst{15-13} = 0;
+ let Inst{12-11} = ac;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b111000;
+}
+
+// SHILO sub-class format.
+class SHILO_R1_FMT<bits<5> op> : DSPInst {
+ bits<2> ac;
+ bits<6> shift;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-20} = shift;
+ let Inst{19-13} = 0;
+ let Inst{12-11} = ac;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b111000;
+}
+
+class SHILO_R2_FMT<bits<5> op> : DSPInst {
+ bits<2> ac;
+ bits<5> rs;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-13} = 0;
+ let Inst{12-11} = ac;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b111000;
+}
+
+class RDDSP_FMT<bits<5> op> : DSPInst {
+ bits<5> rd;
+ bits<10> mask;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-16} = mask;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b111000;
+}
+
+class WRDSP_FMT<bits<5> op> : DSPInst {
+ bits<5> rs;
+ bits<10> mask;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-11} = mask;
+ let Inst{10-6} = op;
+ let Inst{5-0} = 0b111000;
+}
+
+class BPOSGE32_FMT<bits<5> op> : DSPInst {
+ bits<16> offset;
+
+ let Opcode = REGIMM_OPCODE.V;
+
+ let Inst{25-21} = 0;
+ let Inst{20-16} = op;
+ let Inst{15-0} = offset;
+}
+
+// INSV sub-class format.
+class INSV_FMT<bits<6> op> : DSPInst {
+ bits<5> rt;
+ bits<5> rs;
+
+ let Opcode = SPECIAL3_OPCODE.V;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-6} = 0;
+ let Inst{5-0} = op;
+}
Modified: llvm/branches/R600/lib/Target/Mips/MipsDSPInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/Mips/MipsDSPInstrInfo.td?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/Mips/MipsDSPInstrInfo.td (original)
+++ llvm/branches/R600/lib/Target/Mips/MipsDSPInstrInfo.td Tue Oct 2 09:15:33 2012
@@ -18,3 +18,1302 @@
def immZExt8 : ImmLeaf<i32, [{return isUInt<8>(Imm);}]>;
def immZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}]>;
def immSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}]>;
+
+// Mips-specific dsp nodes
+def SDT_MipsExtr : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>]>;
+def SDT_MipsShilo : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def SDT_MipsDPA : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>]>;
+
+class MipsDSPBase<string Opc, SDTypeProfile Prof> :
+ SDNode<!strconcat("MipsISD::", Opc), Prof,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+
+class MipsDSPSideEffectBase<string Opc, SDTypeProfile Prof> :
+ SDNode<!strconcat("MipsISD::", Opc), Prof,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPSideEffect]>;
+
+def MipsEXTP : MipsDSPSideEffectBase<"EXTP", SDT_MipsExtr>;
+def MipsEXTPDP : MipsDSPSideEffectBase<"EXTPDP", SDT_MipsExtr>;
+def MipsEXTR_S_H : MipsDSPSideEffectBase<"EXTR_S_H", SDT_MipsExtr>;
+def MipsEXTR_W : MipsDSPSideEffectBase<"EXTR_W", SDT_MipsExtr>;
+def MipsEXTR_R_W : MipsDSPSideEffectBase<"EXTR_R_W", SDT_MipsExtr>;
+def MipsEXTR_RS_W : MipsDSPSideEffectBase<"EXTR_RS_W", SDT_MipsExtr>;
+
+def MipsSHILO : MipsDSPBase<"SHILO", SDT_MipsShilo>;
+def MipsMTHLIP : MipsDSPBase<"MTHLIP", SDT_MipsShilo>;
+
+def MipsMULSAQ_S_W_PH : MipsDSPSideEffectBase<"MULSAQ_S_W_PH", SDT_MipsDPA>;
+def MipsMAQ_S_W_PHL : MipsDSPSideEffectBase<"MAQ_S_W_PHL", SDT_MipsDPA>;
+def MipsMAQ_S_W_PHR : MipsDSPSideEffectBase<"MAQ_S_W_PHR", SDT_MipsDPA>;
+def MipsMAQ_SA_W_PHL : MipsDSPSideEffectBase<"MAQ_SA_W_PHL", SDT_MipsDPA>;
+def MipsMAQ_SA_W_PHR : MipsDSPSideEffectBase<"MAQ_SA_W_PHR", SDT_MipsDPA>;
+
+def MipsDPAU_H_QBL : MipsDSPBase<"DPAU_H_QBL", SDT_MipsDPA>;
+def MipsDPAU_H_QBR : MipsDSPBase<"DPAU_H_QBR", SDT_MipsDPA>;
+def MipsDPSU_H_QBL : MipsDSPBase<"DPSU_H_QBL", SDT_MipsDPA>;
+def MipsDPSU_H_QBR : MipsDSPBase<"DPSU_H_QBR", SDT_MipsDPA>;
+def MipsDPAQ_S_W_PH : MipsDSPSideEffectBase<"DPAQ_S_W_PH", SDT_MipsDPA>;
+def MipsDPSQ_S_W_PH : MipsDSPSideEffectBase<"DPSQ_S_W_PH", SDT_MipsDPA>;
+def MipsDPAQ_SA_L_W : MipsDSPSideEffectBase<"DPAQ_SA_L_W", SDT_MipsDPA>;
+def MipsDPSQ_SA_L_W : MipsDSPSideEffectBase<"DPSQ_SA_L_W", SDT_MipsDPA>;
+
+def MipsDPA_W_PH : MipsDSPBase<"DPA_W_PH", SDT_MipsDPA>;
+def MipsDPS_W_PH : MipsDSPBase<"DPS_W_PH", SDT_MipsDPA>;
+def MipsDPAQX_S_W_PH : MipsDSPSideEffectBase<"DPAQX_S_W_PH", SDT_MipsDPA>;
+def MipsDPAQX_SA_W_PH : MipsDSPSideEffectBase<"DPAQX_SA_W_PH", SDT_MipsDPA>;
+def MipsDPAX_W_PH : MipsDSPBase<"DPAX_W_PH", SDT_MipsDPA>;
+def MipsDPSX_W_PH : MipsDSPBase<"DPSX_W_PH", SDT_MipsDPA>;
+def MipsDPSQX_S_W_PH : MipsDSPSideEffectBase<"DPSQX_S_W_PH", SDT_MipsDPA>;
+def MipsDPSQX_SA_W_PH : MipsDSPSideEffectBase<"DPSQX_SA_W_PH", SDT_MipsDPA>;
+def MipsMULSA_W_PH : MipsDSPBase<"MULSA_W_PH", SDT_MipsDPA>;
+
+def MipsMULT : MipsDSPBase<"MULT", SDT_MipsDPA>;
+def MipsMULTU : MipsDSPBase<"MULTU", SDT_MipsDPA>;
+def MipsMADD_DSP : MipsDSPBase<"MADD_DSP", SDT_MipsDPA>;
+def MipsMADDU_DSP : MipsDSPBase<"MADDU_DSP", SDT_MipsDPA>;
+def MipsMSUB_DSP : MipsDSPBase<"MSUB_DSP", SDT_MipsDPA>;
+def MipsMSUBU_DSP : MipsDSPBase<"MSUBU_DSP", SDT_MipsDPA>;
+
+// Flags.
+class IsCommutable {
+ bit isCommutable = 1;
+}
+
+class UseAC {
+ list<Register> Uses = [AC0];
+}
+
+class UseDSPCtrl {
+ list<Register> Uses = [DSPCtrl];
+}
+
+class ClearDefs {
+ list<Register> Defs = [];
+}
+
+// Instruction encoding.
+class ADDU_QB_ENC : ADDU_QB_FMT<0b00000>;
+class ADDU_S_QB_ENC : ADDU_QB_FMT<0b00100>;
+class SUBU_QB_ENC : ADDU_QB_FMT<0b00001>;
+class SUBU_S_QB_ENC : ADDU_QB_FMT<0b00101>;
+class ADDQ_PH_ENC : ADDU_QB_FMT<0b01010>;
+class ADDQ_S_PH_ENC : ADDU_QB_FMT<0b01110>;
+class SUBQ_PH_ENC : ADDU_QB_FMT<0b01011>;
+class SUBQ_S_PH_ENC : ADDU_QB_FMT<0b01111>;
+class ADDQ_S_W_ENC : ADDU_QB_FMT<0b10110>;
+class SUBQ_S_W_ENC : ADDU_QB_FMT<0b10111>;
+class ADDSC_ENC : ADDU_QB_FMT<0b10000>;
+class ADDWC_ENC : ADDU_QB_FMT<0b10001>;
+class MODSUB_ENC : ADDU_QB_FMT<0b10010>;
+class RADDU_W_QB_ENC : RADDU_W_QB_FMT<0b10100>;
+class ABSQ_S_PH_ENC : ABSQ_S_PH_R2_FMT<0b01001>;
+class ABSQ_S_W_ENC : ABSQ_S_PH_R2_FMT<0b10001>;
+class PRECRQ_QB_PH_ENC : CMP_EQ_QB_R3_FMT<0b01100>;
+class PRECRQ_PH_W_ENC : CMP_EQ_QB_R3_FMT<0b10100>;
+class PRECRQ_RS_PH_W_ENC : CMP_EQ_QB_R3_FMT<0b10101>;
+class PRECRQU_S_QB_PH_ENC : CMP_EQ_QB_R3_FMT<0b01111>;
+class PRECEQ_W_PHL_ENC : ABSQ_S_PH_R2_FMT<0b01100>;
+class PRECEQ_W_PHR_ENC : ABSQ_S_PH_R2_FMT<0b01101>;
+class PRECEQU_PH_QBL_ENC : ABSQ_S_PH_R2_FMT<0b00100>;
+class PRECEQU_PH_QBR_ENC : ABSQ_S_PH_R2_FMT<0b00101>;
+class PRECEQU_PH_QBLA_ENC : ABSQ_S_PH_R2_FMT<0b00110>;
+class PRECEQU_PH_QBRA_ENC : ABSQ_S_PH_R2_FMT<0b00111>;
+class PRECEU_PH_QBL_ENC : ABSQ_S_PH_R2_FMT<0b11100>;
+class PRECEU_PH_QBR_ENC : ABSQ_S_PH_R2_FMT<0b11101>;
+class PRECEU_PH_QBLA_ENC : ABSQ_S_PH_R2_FMT<0b11110>;
+class PRECEU_PH_QBRA_ENC : ABSQ_S_PH_R2_FMT<0b11111>;
+class SHLL_QB_ENC : SHLL_QB_FMT<0b00000>;
+class SHLLV_QB_ENC : SHLL_QB_FMT<0b00010>;
+class SHRL_QB_ENC : SHLL_QB_FMT<0b00001>;
+class SHRLV_QB_ENC : SHLL_QB_FMT<0b00011>;
+class SHLL_PH_ENC : SHLL_QB_FMT<0b01000>;
+class SHLLV_PH_ENC : SHLL_QB_FMT<0b01010>;
+class SHLL_S_PH_ENC : SHLL_QB_FMT<0b01100>;
+class SHLLV_S_PH_ENC : SHLL_QB_FMT<0b01110>;
+class SHRA_PH_ENC : SHLL_QB_FMT<0b01001>;
+class SHRAV_PH_ENC : SHLL_QB_FMT<0b01011>;
+class SHRA_R_PH_ENC : SHLL_QB_FMT<0b01101>;
+class SHRAV_R_PH_ENC : SHLL_QB_FMT<0b01111>;
+class SHLL_S_W_ENC : SHLL_QB_FMT<0b10100>;
+class SHLLV_S_W_ENC : SHLL_QB_FMT<0b10110>;
+class SHRA_R_W_ENC : SHLL_QB_FMT<0b10101>;
+class SHRAV_R_W_ENC : SHLL_QB_FMT<0b10111>;
+class MULEU_S_PH_QBL_ENC : ADDU_QB_FMT<0b00110>;
+class MULEU_S_PH_QBR_ENC : ADDU_QB_FMT<0b00111>;
+class MULEQ_S_W_PHL_ENC : ADDU_QB_FMT<0b11100>;
+class MULEQ_S_W_PHR_ENC : ADDU_QB_FMT<0b11101>;
+class MULQ_RS_PH_ENC : ADDU_QB_FMT<0b11111>;
+class MULSAQ_S_W_PH_ENC : DPA_W_PH_FMT<0b00110>;
+class MAQ_S_W_PHL_ENC : DPA_W_PH_FMT<0b10100>;
+class MAQ_S_W_PHR_ENC : DPA_W_PH_FMT<0b10110>;
+class MAQ_SA_W_PHL_ENC : DPA_W_PH_FMT<0b10000>;
+class MAQ_SA_W_PHR_ENC : DPA_W_PH_FMT<0b10010>;
+class DPAU_H_QBL_ENC : DPA_W_PH_FMT<0b00011>;
+class DPAU_H_QBR_ENC : DPA_W_PH_FMT<0b00111>;
+class DPSU_H_QBL_ENC : DPA_W_PH_FMT<0b01011>;
+class DPSU_H_QBR_ENC : DPA_W_PH_FMT<0b01111>;
+class DPAQ_S_W_PH_ENC : DPA_W_PH_FMT<0b00100>;
+class DPSQ_S_W_PH_ENC : DPA_W_PH_FMT<0b00101>;
+class DPAQ_SA_L_W_ENC : DPA_W_PH_FMT<0b01100>;
+class DPSQ_SA_L_W_ENC : DPA_W_PH_FMT<0b01101>;
+class MULT_DSP_ENC : MULT_FMT<0b000000, 0b011000>;
+class MULTU_DSP_ENC : MULT_FMT<0b000000, 0b011001>;
+class MADD_DSP_ENC : MULT_FMT<0b011100, 0b000000>;
+class MADDU_DSP_ENC : MULT_FMT<0b011100, 0b000001>;
+class MSUB_DSP_ENC : MULT_FMT<0b011100, 0b000100>;
+class MSUBU_DSP_ENC : MULT_FMT<0b011100, 0b000101>;
+class CMPU_EQ_QB_ENC : CMP_EQ_QB_R2_FMT<0b00000>;
+class CMPU_LT_QB_ENC : CMP_EQ_QB_R2_FMT<0b00001>;
+class CMPU_LE_QB_ENC : CMP_EQ_QB_R2_FMT<0b00010>;
+class CMPGU_EQ_QB_ENC : CMP_EQ_QB_R3_FMT<0b00100>;
+class CMPGU_LT_QB_ENC : CMP_EQ_QB_R3_FMT<0b00101>;
+class CMPGU_LE_QB_ENC : CMP_EQ_QB_R3_FMT<0b00110>;
+class CMP_EQ_PH_ENC : CMP_EQ_QB_R2_FMT<0b01000>;
+class CMP_LT_PH_ENC : CMP_EQ_QB_R2_FMT<0b01001>;
+class CMP_LE_PH_ENC : CMP_EQ_QB_R2_FMT<0b01010>;
+class BITREV_ENC : ABSQ_S_PH_R2_FMT<0b11011>;
+class PACKRL_PH_ENC : CMP_EQ_QB_R3_FMT<0b01110>;
+class REPL_QB_ENC : REPL_FMT<0b00010>;
+class REPL_PH_ENC : REPL_FMT<0b01010>;
+class REPLV_QB_ENC : ABSQ_S_PH_R2_FMT<0b00011>;
+class REPLV_PH_ENC : ABSQ_S_PH_R2_FMT<0b01011>;
+class PICK_QB_ENC : CMP_EQ_QB_R3_FMT<0b00011>;
+class PICK_PH_ENC : CMP_EQ_QB_R3_FMT<0b01011>;
+class LWX_ENC : LX_FMT<0b00000>;
+class LHX_ENC : LX_FMT<0b00100>;
+class LBUX_ENC : LX_FMT<0b00110>;
+class BPOSGE32_ENC : BPOSGE32_FMT<0b11100>;
+class INSV_ENC : INSV_FMT<0b001100>;
+
+class EXTP_ENC : EXTR_W_TY1_FMT<0b00010>;
+class EXTPV_ENC : EXTR_W_TY1_FMT<0b00011>;
+class EXTPDP_ENC : EXTR_W_TY1_FMT<0b01010>;
+class EXTPDPV_ENC : EXTR_W_TY1_FMT<0b01011>;
+class EXTR_W_ENC : EXTR_W_TY1_FMT<0b00000>;
+class EXTRV_W_ENC : EXTR_W_TY1_FMT<0b00001>;
+class EXTR_R_W_ENC : EXTR_W_TY1_FMT<0b00100>;
+class EXTRV_R_W_ENC : EXTR_W_TY1_FMT<0b00101>;
+class EXTR_RS_W_ENC : EXTR_W_TY1_FMT<0b00110>;
+class EXTRV_RS_W_ENC : EXTR_W_TY1_FMT<0b00111>;
+class EXTR_S_H_ENC : EXTR_W_TY1_FMT<0b01110>;
+class EXTRV_S_H_ENC : EXTR_W_TY1_FMT<0b01111>;
+class SHILO_ENC : SHILO_R1_FMT<0b11010>;
+class SHILOV_ENC : SHILO_R2_FMT<0b11011>;
+class MTHLIP_ENC : SHILO_R2_FMT<0b11111>;
+
+class RDDSP_ENC : RDDSP_FMT<0b10010>;
+class WRDSP_ENC : WRDSP_FMT<0b10011>;
+class ADDU_PH_ENC : ADDU_QB_FMT<0b01000>;
+class ADDU_S_PH_ENC : ADDU_QB_FMT<0b01100>;
+class SUBU_PH_ENC : ADDU_QB_FMT<0b01001>;
+class SUBU_S_PH_ENC : ADDU_QB_FMT<0b01101>;
+class CMPGDU_EQ_QB_ENC : CMP_EQ_QB_R3_FMT<0b11000>;
+class CMPGDU_LT_QB_ENC : CMP_EQ_QB_R3_FMT<0b11001>;
+class CMPGDU_LE_QB_ENC : CMP_EQ_QB_R3_FMT<0b11010>;
+class ABSQ_S_QB_ENC : ABSQ_S_PH_R2_FMT<0b00001>;
+class ADDUH_QB_ENC : ADDUH_QB_FMT<0b00000>;
+class ADDUH_R_QB_ENC : ADDUH_QB_FMT<0b00010>;
+class SUBUH_QB_ENC : ADDUH_QB_FMT<0b00001>;
+class SUBUH_R_QB_ENC : ADDUH_QB_FMT<0b00011>;
+class ADDQH_PH_ENC : ADDUH_QB_FMT<0b01000>;
+class ADDQH_R_PH_ENC : ADDUH_QB_FMT<0b01010>;
+class SUBQH_PH_ENC : ADDUH_QB_FMT<0b01001>;
+class SUBQH_R_PH_ENC : ADDUH_QB_FMT<0b01011>;
+class ADDQH_W_ENC : ADDUH_QB_FMT<0b10000>;
+class ADDQH_R_W_ENC : ADDUH_QB_FMT<0b10010>;
+class SUBQH_W_ENC : ADDUH_QB_FMT<0b10001>;
+class SUBQH_R_W_ENC : ADDUH_QB_FMT<0b10011>;
+class MUL_PH_ENC : ADDUH_QB_FMT<0b01100>;
+class MUL_S_PH_ENC : ADDUH_QB_FMT<0b01110>;
+class MULQ_S_W_ENC : ADDUH_QB_FMT<0b10110>;
+class MULQ_RS_W_ENC : ADDUH_QB_FMT<0b10111>;
+class MULQ_S_PH_ENC : ADDU_QB_FMT<0b11110>;
+class DPA_W_PH_ENC : DPA_W_PH_FMT<0b00000>;
+class DPS_W_PH_ENC : DPA_W_PH_FMT<0b00001>;
+class DPAQX_S_W_PH_ENC : DPA_W_PH_FMT<0b11000>;
+class DPAQX_SA_W_PH_ENC : DPA_W_PH_FMT<0b11010>;
+class DPAX_W_PH_ENC : DPA_W_PH_FMT<0b01000>;
+class DPSX_W_PH_ENC : DPA_W_PH_FMT<0b01001>;
+class DPSQX_S_W_PH_ENC : DPA_W_PH_FMT<0b11001>;
+class DPSQX_SA_W_PH_ENC : DPA_W_PH_FMT<0b11011>;
+class MULSA_W_PH_ENC : DPA_W_PH_FMT<0b00010>;
+class PRECR_QB_PH_ENC : CMP_EQ_QB_R3_FMT<0b01101>;
+class PRECR_SRA_PH_W_ENC : PRECR_SRA_PH_W_FMT<0b11110>;
+class PRECR_SRA_R_PH_W_ENC : PRECR_SRA_PH_W_FMT<0b11111>;
+class SHRA_QB_ENC : SHLL_QB_FMT<0b00100>;
+class SHRAV_QB_ENC : SHLL_QB_FMT<0b00110>;
+class SHRA_R_QB_ENC : SHLL_QB_FMT<0b00101>;
+class SHRAV_R_QB_ENC : SHLL_QB_FMT<0b00111>;
+class SHRL_PH_ENC : SHLL_QB_FMT<0b11001>;
+class SHRLV_PH_ENC : SHLL_QB_FMT<0b11011>;
+class APPEND_ENC : APPEND_FMT<0b00000>;
+class BALIGN_ENC : APPEND_FMT<0b10000>;
+class PREPEND_ENC : APPEND_FMT<0b00001>;
+
+// Instruction desc.
+class ADDU_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RCD,
+ RegisterClass RCS, RegisterClass RCT = RCS> {
+ dag OutOperandList = (outs RCD:$rd);
+ dag InOperandList = (ins RCS:$rs, RCT:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
+ list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class RADDU_W_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RCD,
+ RegisterClass RCS = RCD> {
+ dag OutOperandList = (outs RCD:$rd);
+ dag InOperandList = (ins RCS:$rs);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs");
+ list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class CMP_EQ_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RCS,
+ RegisterClass RCT = RCS> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins RCS:$rs, RCT:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$rs, $rt");
+ list<dag> Pattern = [(OpNode RCS:$rs, RCT:$rt)];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class CMP_EQ_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RCD,
+ RegisterClass RCS, RegisterClass RCT = RCS> {
+ dag OutOperandList = (outs RCD:$rd);
+ dag InOperandList = (ins RCS:$rs, RCT:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
+ list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RCT,
+ RegisterClass RCS = RCT> {
+ dag OutOperandList = (outs RCT:$rt);
+ dag InOperandList = (ins RCS:$rs, shamt:$sa, RCS:$src);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
+ list<dag> Pattern = [(set RCT:$rt, (OpNode RCS:$src, RCS:$rs, immZExt5:$sa))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+ string Constraints = "$src = $rt";
+}
+
+class ABSQ_S_PH_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RCD,
+ RegisterClass RCT = RCD> {
+ dag OutOperandList = (outs RCD:$rd);
+ dag InOperandList = (ins RCT:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rt");
+ list<dag> Pattern = [(set RCD:$rd, (OpNode RCT:$rt))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class REPL_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ ImmLeaf immPat, InstrItinClass itin, RegisterClass RC> {
+ dag OutOperandList = (outs RC:$rd);
+ dag InOperandList = (ins uimm16:$imm);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $imm");
+ list<dag> Pattern = [(set RC:$rd, (OpNode immPat:$imm))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class SHLL_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RC> {
+ dag OutOperandList = (outs RC:$rd);
+ dag InOperandList = (ins RC:$rt, CPURegs:$rs_sa);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa");
+ list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, CPURegs:$rs_sa))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class SHLL_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ SDPatternOperator ImmPat, InstrItinClass itin,
+ RegisterClass RC> {
+ dag OutOperandList = (outs RC:$rd);
+ dag InOperandList = (ins RC:$rt, uimm16:$rs_sa);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa");
+ list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, ImmPat:$rs_sa))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs CPURegs:$rd);
+ dag InOperandList = (ins CPURegs:$base, CPURegs:$index);
+ string AsmString = !strconcat(instr_asm, "\t$rd, ${index}(${base})");
+ list<dag> Pattern = [(set CPURegs:$rd,
+ (OpNode CPURegs:$base, CPURegs:$index))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+ bit mayLoad = 1;
+}
+
+class ADDUH_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterClass RCD,
+ RegisterClass RCS = RCD, RegisterClass RCT = RCD> {
+ dag OutOperandList = (outs RCD:$rd);
+ dag InOperandList = (ins RCS:$rs, RCT:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
+ list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ SDPatternOperator ImmOp, InstrItinClass itin> {
+ dag OutOperandList = (outs CPURegs:$rt);
+ dag InOperandList = (ins CPURegs:$rs, shamt:$sa, CPURegs:$src);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
+ list<dag> Pattern = [(set CPURegs:$rt,
+ (OpNode CPURegs:$src, CPURegs:$rs, ImmOp:$sa))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+ string Constraints = "$src = $rt";
+}
+
+class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs CPURegs:$rt);
+ dag InOperandList = (ins ACRegs:$ac, CPURegs:$shift_rs);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs CPURegs:$rt);
+ dag InOperandList = (ins ACRegs:$ac, uimm16:$shift_rs);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class SHILO_R1_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
+ Instruction realinst> :
+ PseudoDSP<(outs), (ins simm16:$shift), [(OpNode immSExt6:$shift)]>,
+ PseudoInstExpansion<(realinst AC0, simm16:$shift)> {
+ list<Register> Defs = [DSPCtrl, AC0];
+ list<Register> Uses = [AC0];
+ InstrItinClass Itinerary = itin;
+}
+
+class SHILO_R1_DESC_BASE<string instr_asm> {
+ dag OutOperandList = (outs ACRegs:$ac);
+ dag InOperandList = (ins simm16:$shift);
+ string AsmString = !strconcat(instr_asm, "\t$ac, $shift");
+}
+
+class SHILO_R2_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
+ Instruction realinst> :
+ PseudoDSP<(outs), (ins CPURegs:$rs), [(OpNode CPURegs:$rs)]>,
+ PseudoInstExpansion<(realinst AC0, CPURegs:$rs)> {
+ list<Register> Defs = [DSPCtrl, AC0];
+ list<Register> Uses = [AC0];
+ InstrItinClass Itinerary = itin;
+}
+
+class SHILO_R2_DESC_BASE<string instr_asm> {
+ dag OutOperandList = (outs ACRegs:$ac);
+ dag InOperandList = (ins CPURegs:$rs);
+ string AsmString = !strconcat(instr_asm, "\t$ac, $rs");
+}
+
+class MTHLIP_DESC_BASE<string instr_asm> {
+ dag OutOperandList = (outs ACRegs:$ac);
+ dag InOperandList = (ins CPURegs:$rs);
+ string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
+}
+
+class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs CPURegs:$rd);
+ dag InOperandList = (ins uimm16:$mask);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $mask");
+ list<dag> Pattern = [(set CPURegs:$rd, (OpNode immZExt10:$mask))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Uses = [DSPCtrl];
+}
+
+class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins CPURegs:$rs, uimm16:$mask);
+ string AsmString = !strconcat(instr_asm, "\t$rs, $mask");
+ list<dag> Pattern = [(OpNode CPURegs:$rs, immZExt10:$mask)];
+ InstrItinClass Itinerary = itin;
+ list<Register> Defs = [DSPCtrl];
+}
+
+class DPA_W_PH_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
+ Instruction realinst> :
+ PseudoDSP<(outs), (ins CPURegs:$rs, CPURegs:$rt),
+ [(OpNode CPURegs:$rs, CPURegs:$rt)]>,
+ PseudoInstExpansion<(realinst AC0, CPURegs:$rs, CPURegs:$rt)> {
+ list<Register> Defs = [DSPCtrl, AC0];
+ list<Register> Uses = [AC0];
+ InstrItinClass Itinerary = itin;
+}
+
+class DPA_W_PH_DESC_BASE<string instr_asm> {
+ dag OutOperandList = (outs ACRegs:$ac);
+ dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
+}
+
+class MULT_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
+ Instruction realinst> :
+ PseudoDSP<(outs), (ins CPURegs:$rs, CPURegs:$rt),
+ [(OpNode CPURegs:$rs, CPURegs:$rt)]>,
+ PseudoInstExpansion<(realinst AC0, CPURegs:$rs, CPURegs:$rt)> {
+ list<Register> Defs = [DSPCtrl, AC0];
+ InstrItinClass Itinerary = itin;
+}
+
+class MULT_DESC_BASE<string instr_asm> {
+ dag OutOperandList = (outs ACRegs:$ac);
+ dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
+}
+
+class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> :
+ MipsPseudo<(outs CPURegs:$dst), (ins), "", [(set CPURegs:$dst, (OpNode))]> {
+ list<Register> Uses = [DSPCtrl];
+ bit usesCustomInserter = 1;
+}
+
+class BPOSGE32_DESC_BASE<string instr_asm, InstrItinClass itin> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins brtarget:$offset);
+ string AsmString = !strconcat(instr_asm, "\t$offset");
+ InstrItinClass Itinerary = itin;
+ list<Register> Uses = [DSPCtrl];
+ bit isBranch = 1;
+ bit isTerminator = 1;
+ bit hasDelaySlot = 1;
+}
+
+class INSV_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs CPURegs:$rt);
+ dag InOperandList = (ins CPURegs:$src, CPURegs:$rs);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $rs");
+ list<dag> Pattern = [(set CPURegs:$rt, (OpNode CPURegs:$src, CPURegs:$rs))];
+ InstrItinClass Itinerary = itin;
+ list<Register> Uses = [DSPCtrl];
+ string Constraints = "$src = $rt";
+}
+
+//===----------------------------------------------------------------------===//
+// MIPS DSP Rev 1
+//===----------------------------------------------------------------------===//
+
+// Addition/subtraction
+class ADDU_QB_DESC : ADDU_QB_DESC_BASE<"addu.qb", int_mips_addu_qb, NoItinerary,
+ DSPRegs, DSPRegs>, IsCommutable;
+
+class ADDU_S_QB_DESC : ADDU_QB_DESC_BASE<"addu_s.qb", int_mips_addu_s_qb,
+ NoItinerary, DSPRegs, DSPRegs>,
+ IsCommutable;
+
+class SUBU_QB_DESC : ADDU_QB_DESC_BASE<"subu.qb", int_mips_subu_qb, NoItinerary,
+ DSPRegs, DSPRegs>;
+
+class SUBU_S_QB_DESC : ADDU_QB_DESC_BASE<"subu_s.qb", int_mips_subu_s_qb,
+ NoItinerary, DSPRegs, DSPRegs>;
+
+class ADDQ_PH_DESC : ADDU_QB_DESC_BASE<"addq.ph", int_mips_addq_ph, NoItinerary,
+ DSPRegs, DSPRegs>, IsCommutable;
+
+class ADDQ_S_PH_DESC : ADDU_QB_DESC_BASE<"addq_s.ph", int_mips_addq_s_ph,
+ NoItinerary, DSPRegs, DSPRegs>,
+ IsCommutable;
+
+class SUBQ_PH_DESC : ADDU_QB_DESC_BASE<"subq.ph", int_mips_subq_ph, NoItinerary,
+ DSPRegs, DSPRegs>;
+
+class SUBQ_S_PH_DESC : ADDU_QB_DESC_BASE<"subq_s.ph", int_mips_subq_s_ph,
+ NoItinerary, DSPRegs, DSPRegs>;
+
+class ADDQ_S_W_DESC : ADDU_QB_DESC_BASE<"addq_s.w", int_mips_addq_s_w,
+ NoItinerary, CPURegs, CPURegs>,
+ IsCommutable;
+
+class SUBQ_S_W_DESC : ADDU_QB_DESC_BASE<"subq_s.w", int_mips_subq_s_w,
+ NoItinerary, CPURegs, CPURegs>;
+
+class ADDSC_DESC : ADDU_QB_DESC_BASE<"addsc", int_mips_addsc, NoItinerary,
+ CPURegs, CPURegs>, IsCommutable;
+
+class ADDWC_DESC : ADDU_QB_DESC_BASE<"addwc", int_mips_addwc, NoItinerary,
+ CPURegs, CPURegs>,
+ IsCommutable, UseDSPCtrl;
+
+class MODSUB_DESC : ADDU_QB_DESC_BASE<"modsub", int_mips_modsub, NoItinerary,
+ CPURegs, CPURegs>, ClearDefs;
+
+class RADDU_W_QB_DESC : RADDU_W_QB_DESC_BASE<"raddu.w.qb", int_mips_raddu_w_qb,
+ NoItinerary, CPURegs, DSPRegs>,
+ ClearDefs;
+
+// Absolute value
+class ABSQ_S_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.ph", int_mips_absq_s_ph,
+ NoItinerary, DSPRegs>;
+
+class ABSQ_S_W_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.w", int_mips_absq_s_w,
+ NoItinerary, CPURegs>;
+
+// Precision reduce/expand
+class PRECRQ_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.qb.ph",
+ int_mips_precrq_qb_ph,
+ NoItinerary, DSPRegs, DSPRegs>,
+ ClearDefs;
+
+class PRECRQ_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.ph.w",
+ int_mips_precrq_ph_w,
+ NoItinerary, DSPRegs, CPURegs>,
+ ClearDefs;
+
+class PRECRQ_RS_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq_rs.ph.w",
+ int_mips_precrq_rs_ph_w,
+ NoItinerary, DSPRegs,
+ CPURegs>;
+
+class PRECRQU_S_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrqu_s.qb.ph",
+ int_mips_precrqu_s_qb_ph,
+ NoItinerary, DSPRegs,
+ DSPRegs>;
+
+class PRECEQ_W_PHL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phl",
+ int_mips_preceq_w_phl,
+ NoItinerary, CPURegs, DSPRegs>,
+ ClearDefs;
+
+class PRECEQ_W_PHR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phr",
+ int_mips_preceq_w_phr,
+ NoItinerary, CPURegs, DSPRegs>,
+ ClearDefs;
+
+class PRECEQU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbl",
+ int_mips_precequ_ph_qbl,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class PRECEQU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbr",
+ int_mips_precequ_ph_qbr,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class PRECEQU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbla",
+ int_mips_precequ_ph_qbla,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class PRECEQU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbra",
+ int_mips_precequ_ph_qbra,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class PRECEU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbl",
+ int_mips_preceu_ph_qbl,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class PRECEU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbr",
+ int_mips_preceu_ph_qbr,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class PRECEU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbla",
+ int_mips_preceu_ph_qbla,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class PRECEU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbra",
+ int_mips_preceu_ph_qbra,
+ NoItinerary, DSPRegs>,
+ ClearDefs;
+
+// Shift
+class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", int_mips_shll_qb, immZExt3,
+ NoItinerary, DSPRegs>;
+
+class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb,
+ NoItinerary, DSPRegs>;
+
+class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", int_mips_shrl_qb, immZExt3,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHRLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.qb", int_mips_shrl_qb,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", int_mips_shll_ph, immZExt4,
+ NoItinerary, DSPRegs>;
+
+class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph,
+ NoItinerary, DSPRegs>;
+
+class SHLL_S_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.ph", int_mips_shll_s_ph,
+ immZExt4, NoItinerary, DSPRegs>;
+
+class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph,
+ NoItinerary, DSPRegs>;
+
+class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", int_mips_shra_ph, immZExt4,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHRAV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav.ph", int_mips_shra_ph,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHRA_R_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.ph", int_mips_shra_r_ph,
+ immZExt4, NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class SHRAV_R_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.ph", int_mips_shra_r_ph,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHLL_S_W_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.w", int_mips_shll_s_w,
+ immZExt5, NoItinerary, CPURegs>;
+
+class SHLLV_S_W_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.w", int_mips_shll_s_w,
+ NoItinerary, CPURegs>;
+
+class SHRA_R_W_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.w", int_mips_shra_r_w,
+ immZExt5, NoItinerary, CPURegs>,
+ ClearDefs;
+
+class SHRAV_R_W_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.w", int_mips_shra_r_w,
+ NoItinerary, CPURegs>;
+
+// Multiplication
+class MULEU_S_PH_QBL_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbl",
+ int_mips_muleu_s_ph_qbl,
+ NoItinerary, DSPRegs, DSPRegs>;
+
+class MULEU_S_PH_QBR_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbr",
+ int_mips_muleu_s_ph_qbr,
+ NoItinerary, DSPRegs, DSPRegs>;
+
+class MULEQ_S_W_PHL_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phl",
+ int_mips_muleq_s_w_phl,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+class MULEQ_S_W_PHR_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phr",
+ int_mips_muleq_s_w_phr,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+class MULQ_RS_PH_DESC : ADDU_QB_DESC_BASE<"mulq_rs.ph", int_mips_mulq_rs_ph,
+ NoItinerary, DSPRegs, DSPRegs>,
+ IsCommutable;
+
+class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph">;
+
+class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl">;
+
+class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr">;
+
+class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl">;
+
+class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr">;
+
+// Dot product with accumulate/subtract
+class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl">;
+
+class DPAU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbr">;
+
+class DPSU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbl">;
+
+class DPSU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbr">;
+
+class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph">;
+
+class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph">;
+
+class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w">;
+
+class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w">;
+
+class MULT_DSP_DESC : MULT_DESC_BASE<"mult">;
+
+class MULTU_DSP_DESC : MULT_DESC_BASE<"multu">;
+
+class MADD_DSP_DESC : MULT_DESC_BASE<"madd">;
+
+class MADDU_DSP_DESC : MULT_DESC_BASE<"maddu">;
+
+class MSUB_DSP_DESC : MULT_DESC_BASE<"msub">;
+
+class MSUBU_DSP_DESC : MULT_DESC_BASE<"msubu">;
+
+// Comparison
+class CMPU_EQ_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.eq.qb",
+ int_mips_cmpu_eq_qb, NoItinerary,
+ DSPRegs>, IsCommutable;
+
+class CMPU_LT_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.lt.qb",
+ int_mips_cmpu_lt_qb, NoItinerary,
+ DSPRegs>, IsCommutable;
+
+class CMPU_LE_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.le.qb",
+ int_mips_cmpu_le_qb, NoItinerary,
+ DSPRegs>, IsCommutable;
+
+class CMPGU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.eq.qb",
+ int_mips_cmpgu_eq_qb,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+class CMPGU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.lt.qb",
+ int_mips_cmpgu_lt_qb,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+class CMPGU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.le.qb",
+ int_mips_cmpgu_le_qb,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+class CMP_EQ_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.eq.ph", int_mips_cmp_eq_ph,
+ NoItinerary, DSPRegs>,
+ IsCommutable;
+
+class CMP_LT_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.lt.ph", int_mips_cmp_lt_ph,
+ NoItinerary, DSPRegs>,
+ IsCommutable;
+
+class CMP_LE_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.le.ph", int_mips_cmp_le_ph,
+ NoItinerary, DSPRegs>,
+ IsCommutable;
+
+// Misc
+class BITREV_DESC : ABSQ_S_PH_R2_DESC_BASE<"bitrev", int_mips_bitrev,
+ NoItinerary, CPURegs>, ClearDefs;
+
+class PACKRL_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"packrl.ph", int_mips_packrl_ph,
+ NoItinerary, DSPRegs, DSPRegs>,
+ ClearDefs;
+
+class REPL_QB_DESC : REPL_DESC_BASE<"repl.qb", int_mips_repl_qb, immZExt8,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, immZExt10,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class REPLV_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.qb", int_mips_repl_qb,
+ NoItinerary, DSPRegs, CPURegs>,
+ ClearDefs;
+
+class REPLV_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.ph", int_mips_repl_ph,
+ NoItinerary, DSPRegs, CPURegs>,
+ ClearDefs;
+
+class PICK_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.qb", int_mips_pick_qb,
+ NoItinerary, DSPRegs, DSPRegs>,
+ ClearDefs, UseDSPCtrl;
+
+class PICK_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.ph", int_mips_pick_ph,
+ NoItinerary, DSPRegs, DSPRegs>,
+ ClearDefs, UseDSPCtrl;
+
+class LWX_DESC : LX_DESC_BASE<"lwx", int_mips_lwx, NoItinerary>, ClearDefs;
+
+class LHX_DESC : LX_DESC_BASE<"lhx", int_mips_lhx, NoItinerary>, ClearDefs;
+
+class LBUX_DESC : LX_DESC_BASE<"lbux", int_mips_lbux, NoItinerary>, ClearDefs;
+
+class BPOSGE32_DESC : BPOSGE32_DESC_BASE<"bposge32", NoItinerary>;
+
+// Extr
+class EXTP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extp", MipsEXTP, NoItinerary>;
+
+class EXTPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpv", MipsEXTP, NoItinerary>;
+
+class EXTPDP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extpdp", MipsEXTPDP, NoItinerary>;
+
+class EXTPDPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpdpv", MipsEXTPDP,
+ NoItinerary>;
+
+class EXTR_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr.w", MipsEXTR_W, NoItinerary>;
+
+class EXTRV_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv.w", MipsEXTR_W,
+ NoItinerary>;
+
+class EXTR_R_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_r.w", MipsEXTR_R_W,
+ NoItinerary>;
+
+class EXTRV_R_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_r.w", MipsEXTR_R_W,
+ NoItinerary>;
+
+class EXTR_RS_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_rs.w", MipsEXTR_RS_W,
+ NoItinerary>;
+
+class EXTRV_RS_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_rs.w", MipsEXTR_RS_W,
+ NoItinerary>;
+
+class EXTR_S_H_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_s.h", MipsEXTR_S_H,
+ NoItinerary>;
+
+class EXTRV_S_H_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_s.h", MipsEXTR_S_H,
+ NoItinerary>;
+
+class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo">;
+
+class SHILOV_DESC : SHILO_R2_DESC_BASE<"shilov">;
+
+class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip">;
+
+class RDDSP_DESC : RDDSP_DESC_BASE<"rddsp", int_mips_rddsp, NoItinerary>;
+
+class WRDSP_DESC : WRDSP_DESC_BASE<"wrdsp", int_mips_wrdsp, NoItinerary>;
+
+class INSV_DESC : INSV_DESC_BASE<"insv", int_mips_insv, NoItinerary>;
+
+//===----------------------------------------------------------------------===//
+// MIPS DSP Rev 2
+// Addition/subtraction
+class ADDU_PH_DESC : ADDU_QB_DESC_BASE<"addu.ph", int_mips_addu_ph, NoItinerary,
+ DSPRegs, DSPRegs>, IsCommutable;
+
+class ADDU_S_PH_DESC : ADDU_QB_DESC_BASE<"addu_s.ph", int_mips_addu_s_ph,
+ NoItinerary, DSPRegs, DSPRegs>,
+ IsCommutable;
+
+class SUBU_PH_DESC : ADDU_QB_DESC_BASE<"subu.ph", int_mips_subu_ph, NoItinerary,
+ DSPRegs, DSPRegs>;
+
+class SUBU_S_PH_DESC : ADDU_QB_DESC_BASE<"subu_s.ph", int_mips_subu_s_ph,
+ NoItinerary, DSPRegs, DSPRegs>;
+
+class ADDUH_QB_DESC : ADDUH_QB_DESC_BASE<"adduh.qb", int_mips_adduh_qb,
+ NoItinerary, DSPRegs>,
+ ClearDefs, IsCommutable;
+
+class ADDUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"adduh_r.qb", int_mips_adduh_r_qb,
+ NoItinerary, DSPRegs>,
+ ClearDefs, IsCommutable;
+
+class SUBUH_QB_DESC : ADDUH_QB_DESC_BASE<"subuh.qb", int_mips_subuh_qb,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SUBUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"subuh_r.qb", int_mips_subuh_r_qb,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class ADDQH_PH_DESC : ADDUH_QB_DESC_BASE<"addqh.ph", int_mips_addqh_ph,
+ NoItinerary, DSPRegs>,
+ ClearDefs, IsCommutable;
+
+class ADDQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"addqh_r.ph", int_mips_addqh_r_ph,
+ NoItinerary, DSPRegs>,
+ ClearDefs, IsCommutable;
+
+class SUBQH_PH_DESC : ADDUH_QB_DESC_BASE<"subqh.ph", int_mips_subqh_ph,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SUBQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"subqh_r.ph", int_mips_subqh_r_ph,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class ADDQH_W_DESC : ADDUH_QB_DESC_BASE<"addqh.w", int_mips_addqh_w,
+ NoItinerary, CPURegs>,
+ ClearDefs, IsCommutable;
+
+class ADDQH_R_W_DESC : ADDUH_QB_DESC_BASE<"addqh_r.w", int_mips_addqh_r_w,
+ NoItinerary, CPURegs>,
+ ClearDefs, IsCommutable;
+
+class SUBQH_W_DESC : ADDUH_QB_DESC_BASE<"subqh.w", int_mips_subqh_w,
+ NoItinerary, CPURegs>, ClearDefs;
+
+class SUBQH_R_W_DESC : ADDUH_QB_DESC_BASE<"subqh_r.w", int_mips_subqh_r_w,
+ NoItinerary, CPURegs>, ClearDefs;
+
+// Comparison
+class CMPGDU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.eq.qb",
+ int_mips_cmpgdu_eq_qb,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+class CMPGDU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.lt.qb",
+ int_mips_cmpgdu_lt_qb,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+class CMPGDU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.le.qb",
+ int_mips_cmpgdu_le_qb,
+ NoItinerary, CPURegs, DSPRegs>,
+ IsCommutable;
+
+// Absolute
+class ABSQ_S_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.qb", int_mips_absq_s_qb,
+ NoItinerary, DSPRegs>;
+
+// Multiplication
+class MUL_PH_DESC : ADDUH_QB_DESC_BASE<"mul.ph", int_mips_mul_ph, NoItinerary,
+ DSPRegs>, IsCommutable;
+
+class MUL_S_PH_DESC : ADDUH_QB_DESC_BASE<"mul_s.ph", int_mips_mul_s_ph,
+ NoItinerary, DSPRegs>, IsCommutable;
+
+class MULQ_S_W_DESC : ADDUH_QB_DESC_BASE<"mulq_s.w", int_mips_mulq_s_w,
+ NoItinerary, CPURegs>, IsCommutable;
+
+class MULQ_RS_W_DESC : ADDUH_QB_DESC_BASE<"mulq_rs.w", int_mips_mulq_rs_w,
+ NoItinerary, CPURegs>, IsCommutable;
+
+class MULQ_S_PH_DESC : ADDU_QB_DESC_BASE<"mulq_s.ph", int_mips_mulq_s_ph,
+ NoItinerary, DSPRegs, DSPRegs>,
+ IsCommutable;
+
+// Dot product with accumulate/subtract
+class DPA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpa.w.ph">;
+
+class DPS_W_PH_DESC : DPA_W_PH_DESC_BASE<"dps.w.ph">;
+
+class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph">;
+
+class DPAQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_sa.w.ph">;
+
+class DPAX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpax.w.ph">;
+
+class DPSX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsx.w.ph">;
+
+class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph">;
+
+class DPSQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_sa.w.ph">;
+
+class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph">;
+
+// Precision reduce/expand
+class PRECR_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precr.qb.ph",
+ int_mips_precr_qb_ph,
+ NoItinerary, DSPRegs, DSPRegs>;
+
+class PRECR_SRA_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra.ph.w",
+ int_mips_precr_sra_ph_w,
+ NoItinerary, DSPRegs,
+ CPURegs>, ClearDefs;
+
+class PRECR_SRA_R_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra_r.ph.w",
+ int_mips_precr_sra_r_ph_w,
+ NoItinerary, DSPRegs,
+ CPURegs>, ClearDefs;
+
+// Shift
+class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", int_mips_shra_qb, immZExt3,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHRAV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav.qb", int_mips_shra_qb,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHRA_R_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.qb", int_mips_shra_r_qb,
+ immZExt3, NoItinerary, DSPRegs>,
+ ClearDefs;
+
+class SHRAV_R_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.qb", int_mips_shra_r_qb,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", int_mips_shrl_ph, immZExt4,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph,
+ NoItinerary, DSPRegs>, ClearDefs;
+
+// Misc
+class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, immZExt5,
+ NoItinerary>, ClearDefs;
+
+class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, immZExt2,
+ NoItinerary>, ClearDefs;
+
+class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, immZExt5,
+ NoItinerary>, ClearDefs;
+
+// Pseudos.
+def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32, NoItinerary>;
+
+// Instruction defs.
+// MIPS DSP Rev 1
+def ADDU_QB : ADDU_QB_ENC, ADDU_QB_DESC;
+def ADDU_S_QB : ADDU_S_QB_ENC, ADDU_S_QB_DESC;
+def SUBU_QB : SUBU_QB_ENC, SUBU_QB_DESC;
+def SUBU_S_QB : SUBU_S_QB_ENC, SUBU_S_QB_DESC;
+def ADDQ_PH : ADDQ_PH_ENC, ADDQ_PH_DESC;
+def ADDQ_S_PH : ADDQ_S_PH_ENC, ADDQ_S_PH_DESC;
+def SUBQ_PH : SUBQ_PH_ENC, SUBQ_PH_DESC;
+def SUBQ_S_PH : SUBQ_S_PH_ENC, SUBQ_S_PH_DESC;
+def ADDQ_S_W : ADDQ_S_W_ENC, ADDQ_S_W_DESC;
+def SUBQ_S_W : SUBQ_S_W_ENC, SUBQ_S_W_DESC;
+def ADDSC : ADDSC_ENC, ADDSC_DESC;
+def ADDWC : ADDWC_ENC, ADDWC_DESC;
+def MODSUB : MODSUB_ENC, MODSUB_DESC;
+def RADDU_W_QB : RADDU_W_QB_ENC, RADDU_W_QB_DESC;
+def ABSQ_S_PH : ABSQ_S_PH_ENC, ABSQ_S_PH_DESC;
+def ABSQ_S_W : ABSQ_S_W_ENC, ABSQ_S_W_DESC;
+def PRECRQ_QB_PH : PRECRQ_QB_PH_ENC, PRECRQ_QB_PH_DESC;
+def PRECRQ_PH_W : PRECRQ_PH_W_ENC, PRECRQ_PH_W_DESC;
+def PRECRQ_RS_PH_W : PRECRQ_RS_PH_W_ENC, PRECRQ_RS_PH_W_DESC;
+def PRECRQU_S_QB_PH : PRECRQU_S_QB_PH_ENC, PRECRQU_S_QB_PH_DESC;
+def PRECEQ_W_PHL : PRECEQ_W_PHL_ENC, PRECEQ_W_PHL_DESC;
+def PRECEQ_W_PHR : PRECEQ_W_PHR_ENC, PRECEQ_W_PHR_DESC;
+def PRECEQU_PH_QBL : PRECEQU_PH_QBL_ENC, PRECEQU_PH_QBL_DESC;
+def PRECEQU_PH_QBR : PRECEQU_PH_QBR_ENC, PRECEQU_PH_QBR_DESC;
+def PRECEQU_PH_QBLA : PRECEQU_PH_QBLA_ENC, PRECEQU_PH_QBLA_DESC;
+def PRECEQU_PH_QBRA : PRECEQU_PH_QBRA_ENC, PRECEQU_PH_QBRA_DESC;
+def PRECEU_PH_QBL : PRECEU_PH_QBL_ENC, PRECEU_PH_QBL_DESC;
+def PRECEU_PH_QBR : PRECEU_PH_QBR_ENC, PRECEU_PH_QBR_DESC;
+def PRECEU_PH_QBLA : PRECEU_PH_QBLA_ENC, PRECEU_PH_QBLA_DESC;
+def PRECEU_PH_QBRA : PRECEU_PH_QBRA_ENC, PRECEU_PH_QBRA_DESC;
+def SHLL_QB : SHLL_QB_ENC, SHLL_QB_DESC;
+def SHLLV_QB : SHLLV_QB_ENC, SHLLV_QB_DESC;
+def SHRL_QB : SHRL_QB_ENC, SHRL_QB_DESC;
+def SHRLV_QB : SHRLV_QB_ENC, SHRLV_QB_DESC;
+def SHLL_PH : SHLL_PH_ENC, SHLL_PH_DESC;
+def SHLLV_PH : SHLLV_PH_ENC, SHLLV_PH_DESC;
+def SHLL_S_PH : SHLL_S_PH_ENC, SHLL_S_PH_DESC;
+def SHLLV_S_PH : SHLLV_S_PH_ENC, SHLLV_S_PH_DESC;
+def SHRA_PH : SHRA_PH_ENC, SHRA_PH_DESC;
+def SHRAV_PH : SHRAV_PH_ENC, SHRAV_PH_DESC;
+def SHRA_R_PH : SHRA_R_PH_ENC, SHRA_R_PH_DESC;
+def SHRAV_R_PH : SHRAV_R_PH_ENC, SHRAV_R_PH_DESC;
+def SHLL_S_W : SHLL_S_W_ENC, SHLL_S_W_DESC;
+def SHLLV_S_W : SHLLV_S_W_ENC, SHLLV_S_W_DESC;
+def SHRA_R_W : SHRA_R_W_ENC, SHRA_R_W_DESC;
+def SHRAV_R_W : SHRAV_R_W_ENC, SHRAV_R_W_DESC;
+def MULEU_S_PH_QBL : MULEU_S_PH_QBL_ENC, MULEU_S_PH_QBL_DESC;
+def MULEU_S_PH_QBR : MULEU_S_PH_QBR_ENC, MULEU_S_PH_QBR_DESC;
+def MULEQ_S_W_PHL : MULEQ_S_W_PHL_ENC, MULEQ_S_W_PHL_DESC;
+def MULEQ_S_W_PHR : MULEQ_S_W_PHR_ENC, MULEQ_S_W_PHR_DESC;
+def MULQ_RS_PH : MULQ_RS_PH_ENC, MULQ_RS_PH_DESC;
+def MULSAQ_S_W_PH : MULSAQ_S_W_PH_ENC, MULSAQ_S_W_PH_DESC;
+def MAQ_S_W_PHL : MAQ_S_W_PHL_ENC, MAQ_S_W_PHL_DESC;
+def MAQ_S_W_PHR : MAQ_S_W_PHR_ENC, MAQ_S_W_PHR_DESC;
+def MAQ_SA_W_PHL : MAQ_SA_W_PHL_ENC, MAQ_SA_W_PHL_DESC;
+def MAQ_SA_W_PHR : MAQ_SA_W_PHR_ENC, MAQ_SA_W_PHR_DESC;
+def DPAU_H_QBL : DPAU_H_QBL_ENC, DPAU_H_QBL_DESC;
+def DPAU_H_QBR : DPAU_H_QBR_ENC, DPAU_H_QBR_DESC;
+def DPSU_H_QBL : DPSU_H_QBL_ENC, DPSU_H_QBL_DESC;
+def DPSU_H_QBR : DPSU_H_QBR_ENC, DPSU_H_QBR_DESC;
+def DPAQ_S_W_PH : DPAQ_S_W_PH_ENC, DPAQ_S_W_PH_DESC;
+def DPSQ_S_W_PH : DPSQ_S_W_PH_ENC, DPSQ_S_W_PH_DESC;
+def DPAQ_SA_L_W : DPAQ_SA_L_W_ENC, DPAQ_SA_L_W_DESC;
+def DPSQ_SA_L_W : DPSQ_SA_L_W_ENC, DPSQ_SA_L_W_DESC;
+def MULT_DSP : MULT_DSP_ENC, MULT_DSP_DESC;
+def MULTU_DSP : MULTU_DSP_ENC, MULTU_DSP_DESC;
+def MADD_DSP : MADD_DSP_ENC, MADD_DSP_DESC;
+def MADDU_DSP : MADDU_DSP_ENC, MADDU_DSP_DESC;
+def MSUB_DSP : MSUB_DSP_ENC, MSUB_DSP_DESC;
+def MSUBU_DSP : MSUBU_DSP_ENC, MSUBU_DSP_DESC;
+def CMPU_EQ_QB : CMPU_EQ_QB_ENC, CMPU_EQ_QB_DESC;
+def CMPU_LT_QB : CMPU_LT_QB_ENC, CMPU_LT_QB_DESC;
+def CMPU_LE_QB : CMPU_LE_QB_ENC, CMPU_LE_QB_DESC;
+def CMPGU_EQ_QB : CMPGU_EQ_QB_ENC, CMPGU_EQ_QB_DESC;
+def CMPGU_LT_QB : CMPGU_LT_QB_ENC, CMPGU_LT_QB_DESC;
+def CMPGU_LE_QB : CMPGU_LE_QB_ENC, CMPGU_LE_QB_DESC;
+def CMP_EQ_PH : CMP_EQ_PH_ENC, CMP_EQ_PH_DESC;
+def CMP_LT_PH : CMP_LT_PH_ENC, CMP_LT_PH_DESC;
+def CMP_LE_PH : CMP_LE_PH_ENC, CMP_LE_PH_DESC;
+def BITREV : BITREV_ENC, BITREV_DESC;
+def PACKRL_PH : PACKRL_PH_ENC, PACKRL_PH_DESC;
+def REPL_QB : REPL_QB_ENC, REPL_QB_DESC;
+def REPL_PH : REPL_PH_ENC, REPL_PH_DESC;
+def REPLV_QB : REPLV_QB_ENC, REPLV_QB_DESC;
+def REPLV_PH : REPLV_PH_ENC, REPLV_PH_DESC;
+def PICK_QB : PICK_QB_ENC, PICK_QB_DESC;
+def PICK_PH : PICK_PH_ENC, PICK_PH_DESC;
+def LWX : LWX_ENC, LWX_DESC;
+def LHX : LHX_ENC, LHX_DESC;
+def LBUX : LBUX_ENC, LBUX_DESC;
+def BPOSGE32 : BPOSGE32_ENC, BPOSGE32_DESC;
+def INSV : INSV_ENC, INSV_DESC;
+def EXTP : EXTP_ENC, EXTP_DESC;
+def EXTPV : EXTPV_ENC, EXTPV_DESC;
+def EXTPDP : EXTPDP_ENC, EXTPDP_DESC;
+def EXTPDPV : EXTPDPV_ENC, EXTPDPV_DESC;
+def EXTR_W : EXTR_W_ENC, EXTR_W_DESC;
+def EXTRV_W : EXTRV_W_ENC, EXTRV_W_DESC;
+def EXTR_R_W : EXTR_R_W_ENC, EXTR_R_W_DESC;
+def EXTRV_R_W : EXTRV_R_W_ENC, EXTRV_R_W_DESC;
+def EXTR_RS_W : EXTR_RS_W_ENC, EXTR_RS_W_DESC;
+def EXTRV_RS_W : EXTRV_RS_W_ENC, EXTRV_RS_W_DESC;
+def EXTR_S_H : EXTR_S_H_ENC, EXTR_S_H_DESC;
+def EXTRV_S_H : EXTRV_S_H_ENC, EXTRV_S_H_DESC;
+def SHILO : SHILO_ENC, SHILO_DESC;
+def SHILOV : SHILOV_ENC, SHILOV_DESC;
+def MTHLIP : MTHLIP_ENC, MTHLIP_DESC;
+def RDDSP : RDDSP_ENC, RDDSP_DESC;
+def WRDSP : WRDSP_ENC, WRDSP_DESC;
+
+// MIPS DSP Rev 2
+let Predicates = [HasDSPR2] in {
+
+def ADDU_PH : ADDU_PH_ENC, ADDU_PH_DESC;
+def ADDU_S_PH : ADDU_S_PH_ENC, ADDU_S_PH_DESC;
+def SUBU_PH : SUBU_PH_ENC, SUBU_PH_DESC;
+def SUBU_S_PH : SUBU_S_PH_ENC, SUBU_S_PH_DESC;
+def CMPGDU_EQ_QB : CMPGDU_EQ_QB_ENC, CMPGDU_EQ_QB_DESC;
+def CMPGDU_LT_QB : CMPGDU_LT_QB_ENC, CMPGDU_LT_QB_DESC;
+def CMPGDU_LE_QB : CMPGDU_LE_QB_ENC, CMPGDU_LE_QB_DESC;
+def ABSQ_S_QB : ABSQ_S_QB_ENC, ABSQ_S_QB_DESC;
+def ADDUH_QB : ADDUH_QB_ENC, ADDUH_QB_DESC;
+def ADDUH_R_QB : ADDUH_R_QB_ENC, ADDUH_R_QB_DESC;
+def SUBUH_QB : SUBUH_QB_ENC, SUBUH_QB_DESC;
+def SUBUH_R_QB : SUBUH_R_QB_ENC, SUBUH_R_QB_DESC;
+def ADDQH_PH : ADDQH_PH_ENC, ADDQH_PH_DESC;
+def ADDQH_R_PH : ADDQH_R_PH_ENC, ADDQH_R_PH_DESC;
+def SUBQH_PH : SUBQH_PH_ENC, SUBQH_PH_DESC;
+def SUBQH_R_PH : SUBQH_R_PH_ENC, SUBQH_R_PH_DESC;
+def ADDQH_W : ADDQH_W_ENC, ADDQH_W_DESC;
+def ADDQH_R_W : ADDQH_R_W_ENC, ADDQH_R_W_DESC;
+def SUBQH_W : SUBQH_W_ENC, SUBQH_W_DESC;
+def SUBQH_R_W : SUBQH_R_W_ENC, SUBQH_R_W_DESC;
+def MUL_PH : MUL_PH_ENC, MUL_PH_DESC;
+def MUL_S_PH : MUL_S_PH_ENC, MUL_S_PH_DESC;
+def MULQ_S_W : MULQ_S_W_ENC, MULQ_S_W_DESC;
+def MULQ_RS_W : MULQ_RS_W_ENC, MULQ_RS_W_DESC;
+def MULQ_S_PH : MULQ_S_PH_ENC, MULQ_S_PH_DESC;
+def DPA_W_PH : DPA_W_PH_ENC, DPA_W_PH_DESC;
+def DPS_W_PH : DPS_W_PH_ENC, DPS_W_PH_DESC;
+def DPAQX_S_W_PH : DPAQX_S_W_PH_ENC, DPAQX_S_W_PH_DESC;
+def DPAQX_SA_W_PH : DPAQX_SA_W_PH_ENC, DPAQX_SA_W_PH_DESC;
+def DPAX_W_PH : DPAX_W_PH_ENC, DPAX_W_PH_DESC;
+def DPSX_W_PH : DPSX_W_PH_ENC, DPSX_W_PH_DESC;
+def DPSQX_S_W_PH : DPSQX_S_W_PH_ENC, DPSQX_S_W_PH_DESC;
+def DPSQX_SA_W_PH : DPSQX_SA_W_PH_ENC, DPSQX_SA_W_PH_DESC;
+def MULSA_W_PH : MULSA_W_PH_ENC, MULSA_W_PH_DESC;
+def PRECR_QB_PH : PRECR_QB_PH_ENC, PRECR_QB_PH_DESC;
+def PRECR_SRA_PH_W : PRECR_SRA_PH_W_ENC, PRECR_SRA_PH_W_DESC;
+def PRECR_SRA_R_PH_W : PRECR_SRA_R_PH_W_ENC, PRECR_SRA_R_PH_W_DESC;
+def SHRA_QB : SHRA_QB_ENC, SHRA_QB_DESC;
+def SHRAV_QB : SHRAV_QB_ENC, SHRAV_QB_DESC;
+def SHRA_R_QB : SHRA_R_QB_ENC, SHRA_R_QB_DESC;
+def SHRAV_R_QB : SHRAV_R_QB_ENC, SHRAV_R_QB_DESC;
+def SHRL_PH : SHRL_PH_ENC, SHRL_PH_DESC;
+def SHRLV_PH : SHRLV_PH_ENC, SHRLV_PH_DESC;
+def APPEND : APPEND_ENC, APPEND_DESC;
+def BALIGN : BALIGN_ENC, BALIGN_DESC;
+def PREPEND : PREPEND_ENC, PREPEND_DESC;
+
+}
+
+// Pseudos.
+def MULSAQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMULSAQ_S_W_PH, NoItinerary,
+ MULSAQ_S_W_PH>;
+def MAQ_S_W_PHL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_S_W_PHL, NoItinerary,
+ MAQ_S_W_PHL>;
+def MAQ_S_W_PHR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_S_W_PHR, NoItinerary,
+ MAQ_S_W_PHR>;
+def MAQ_SA_W_PHL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_SA_W_PHL, NoItinerary,
+ MAQ_SA_W_PHL>;
+def MAQ_SA_W_PHR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_SA_W_PHR, NoItinerary,
+ MAQ_SA_W_PHR>;
+def DPAU_H_QBL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAU_H_QBL, NoItinerary,
+ DPAU_H_QBL>;
+def DPAU_H_QBR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAU_H_QBR, NoItinerary,
+ DPAU_H_QBR>;
+def DPSU_H_QBL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSU_H_QBL, NoItinerary,
+ DPSU_H_QBL>;
+def DPSU_H_QBR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSU_H_QBR, NoItinerary,
+ DPSU_H_QBR>;
+def DPAQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQ_S_W_PH, NoItinerary,
+ DPAQ_S_W_PH>;
+def DPSQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQ_S_W_PH, NoItinerary,
+ DPSQ_S_W_PH>;
+def DPAQ_SA_L_W_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQ_SA_L_W, NoItinerary,
+ DPAQ_SA_L_W>;
+def DPSQ_SA_L_W_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQ_SA_L_W, NoItinerary,
+ DPSQ_SA_L_W>;
+
+def MULT_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMULT, NoItinerary, MULT_DSP>,
+ IsCommutable;
+def MULTU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMULTU, NoItinerary, MULTU_DSP>,
+ IsCommutable;
+def MADD_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMADD_DSP, NoItinerary, MADD_DSP>,
+ IsCommutable, UseAC;
+def MADDU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMADDU_DSP, NoItinerary, MADDU_DSP>,
+ IsCommutable, UseAC;
+def MSUB_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMSUB_DSP, NoItinerary, MSUB_DSP>,
+ UseAC;
+def MSUBU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMSUBU_DSP, NoItinerary, MSUBU_DSP>,
+ UseAC;
+
+def SHILO_PSEUDO : SHILO_R1_PSEUDO_BASE<MipsSHILO, NoItinerary, SHILO>;
+def SHILOV_PSEUDO : SHILO_R2_PSEUDO_BASE<MipsSHILO, NoItinerary, SHILOV>;
+def MTHLIP_PSEUDO : SHILO_R2_PSEUDO_BASE<MipsMTHLIP, NoItinerary, MTHLIP>;
+
+let Predicates = [HasDSPR2] in {
+
+def DPA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPA_W_PH, NoItinerary, DPA_W_PH>;
+def DPS_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPS_W_PH, NoItinerary, DPS_W_PH>;
+def DPAQX_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQX_S_W_PH, NoItinerary,
+ DPAQX_S_W_PH>;
+def DPAQX_SA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQX_SA_W_PH, NoItinerary,
+ DPAQX_SA_W_PH>;
+def DPAX_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAX_W_PH, NoItinerary,
+ DPAX_W_PH>;
+def DPSX_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSX_W_PH, NoItinerary,
+ DPSX_W_PH>;
+def DPSQX_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQX_S_W_PH, NoItinerary,
+ DPSQX_S_W_PH>;
+def DPSQX_SA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQX_SA_W_PH, NoItinerary,
+ DPSQX_SA_W_PH>;
+def MULSA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMULSA_W_PH, NoItinerary,
+ MULSA_W_PH>;
+
+}
+
+// Patterns.
+class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> :
+ Pat<pattern, result>, Requires<[pred]>;
+
+class BitconvertPat<ValueType DstVT, ValueType SrcVT, RegisterClass DstRC,
+ RegisterClass SrcRC> :
+ DSPPat<(DstVT (bitconvert (SrcVT SrcRC:$src))),
+ (COPY_TO_REGCLASS SrcRC:$src, DstRC)>;
+
+def : BitconvertPat<i32, v2i16, CPURegs, DSPRegs>;
+def : BitconvertPat<i32, v4i8, CPURegs, DSPRegs>;
+def : BitconvertPat<v2i16, i32, DSPRegs, CPURegs>;
+def : BitconvertPat<v4i8, i32, DSPRegs, CPURegs>;
+
+def : DSPPat<(v2i16 (load addr:$a)),
+ (v2i16 (COPY_TO_REGCLASS (LW addr:$a), DSPRegs))>;
+def : DSPPat<(v4i8 (load addr:$a)),
+ (v4i8 (COPY_TO_REGCLASS (LW addr:$a), DSPRegs))>;
+def : DSPPat<(store (v2i16 DSPRegs:$val), addr:$a),
+ (SW (COPY_TO_REGCLASS DSPRegs:$val, CPURegs), addr:$a)>;
+def : DSPPat<(store (v4i8 DSPRegs:$val), addr:$a),
+ (SW (COPY_TO_REGCLASS DSPRegs:$val, CPURegs), addr:$a)>;
+
+// Extr patterns.
+class EXTR_W_TY1_R2_Pat<SDPatternOperator OpNode, Instruction Instr> :
+ DSPPat<(i32 (OpNode CPURegs:$rs)), (Instr AC0, CPURegs:$rs)>;
+
+class EXTR_W_TY1_R1_Pat<SDPatternOperator OpNode, Instruction Instr> :
+ DSPPat<(i32 (OpNode immZExt5:$shift)), (Instr AC0, immZExt5:$shift)>;
+
+def : EXTR_W_TY1_R1_Pat<MipsEXTP, EXTP>;
+def : EXTR_W_TY1_R2_Pat<MipsEXTP, EXTPV>;
+def : EXTR_W_TY1_R1_Pat<MipsEXTPDP, EXTPDP>;
+def : EXTR_W_TY1_R2_Pat<MipsEXTPDP, EXTPDPV>;
+def : EXTR_W_TY1_R1_Pat<MipsEXTR_W, EXTR_W>;
+def : EXTR_W_TY1_R2_Pat<MipsEXTR_W, EXTRV_W>;
+def : EXTR_W_TY1_R1_Pat<MipsEXTR_R_W, EXTR_R_W>;
+def : EXTR_W_TY1_R2_Pat<MipsEXTR_R_W, EXTRV_R_W>;
+def : EXTR_W_TY1_R1_Pat<MipsEXTR_RS_W, EXTR_RS_W>;
+def : EXTR_W_TY1_R2_Pat<MipsEXTR_RS_W, EXTRV_RS_W>;
+def : EXTR_W_TY1_R1_Pat<MipsEXTR_S_H, EXTR_S_H>;
+def : EXTR_W_TY1_R2_Pat<MipsEXTR_S_H, EXTRV_S_H>;
Modified: llvm/branches/R600/lib/Target/Mips/MipsISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/Mips/MipsISelLowering.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/Mips/MipsISelLowering.cpp (original)
+++ llvm/branches/R600/lib/Target/Mips/MipsISelLowering.cpp Tue Oct 2 09:15:33 2012
@@ -868,6 +868,8 @@
case ISD::SRL_PARTS: return LowerShiftRightParts(Op, DAG, false);
case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG);
}
return SDValue();
}
@@ -976,6 +978,70 @@
return BB;
}
*/
+
+MachineBasicBlock *
+MipsTargetLowering::EmitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
+ // $bb:
+ // bposge32_pseudo $vr0
+ // =>
+ // $bb:
+ // bposge32 $tbb
+ // $fbb:
+ // li $vr2, 0
+ // b $sink
+ // $tbb:
+ // li $vr1, 1
+ // $sink:
+ // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
+
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetRegisterClass *RC = &Mips::CPURegsRegClass;
+ DebugLoc DL = MI->getDebugLoc();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB));
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, FBB);
+ F->insert(It, TBB);
+ F->insert(It, Sink);
+
+ // Transfer the remainder of BB and its successor edges to Sink.
+ Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ Sink->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add successors.
+ BB->addSuccessor(FBB);
+ BB->addSuccessor(TBB);
+ FBB->addSuccessor(Sink);
+ TBB->addSuccessor(Sink);
+
+ // Insert the real bposge32 instruction to $BB.
+ BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB);
+
+ // Fill $FBB.
+ unsigned VR2 = RegInfo.createVirtualRegister(RC);
+ BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
+ .addReg(Mips::ZERO).addImm(0);
+ BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
+
+ // Fill $TBB.
+ unsigned VR1 = RegInfo.createVirtualRegister(RC);
+ BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
+ .addReg(Mips::ZERO).addImm(1);
+
+ // Insert phi function to $Sink.
+ BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return Sink;
+}
+
MachineBasicBlock *
MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
@@ -1084,6 +1150,8 @@
case Mips::ATOMIC_CMP_SWAP_I64:
case Mips::ATOMIC_CMP_SWAP_I64_P8:
return EmitAtomicCmpSwap(MI, BB, 8);
+ case Mips::BPOSGE32_PSEUDO:
+ return EmitBPOSGE32(MI, BB);
}
}
@@ -2282,6 +2350,151 @@
return CreateStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7);
}
+// This function expands mips intrinsic nodes which have 64-bit input operands
+// or output values.
+//
+// out64 = intrinsic-node in64
+// =>
+// lo = copy (extract-element (in64, 0))
+// hi = copy (extract-element (in64, 1))
+// mips-specific-node
+// v0 = copy lo
+// v1 = copy hi
+// out64 = merge-values (v0, v1)
+//
+static SDValue LowerDSPIntr(SDValue Op, SelectionDAG &DAG,
+ unsigned Opc, bool HasI64In, bool HasI64Out) {
+ DebugLoc DL = Op.getDebugLoc();
+ bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other;
+ SDValue Chain = HasChainIn ? Op->getOperand(0) : DAG.getEntryNode();
+ SmallVector<SDValue, 3> Ops;
+
+ if (HasI64In) {
+ SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
+ Op->getOperand(1 + HasChainIn),
+ DAG.getConstant(0, MVT::i32));
+ SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
+ Op->getOperand(1 + HasChainIn),
+ DAG.getConstant(1, MVT::i32));
+
+ Chain = DAG.getCopyToReg(Chain, DL, Mips::LO, InLo, SDValue());
+ Chain = DAG.getCopyToReg(Chain, DL, Mips::HI, InHi, Chain.getValue(1));
+
+ Ops.push_back(Chain);
+ Ops.append(Op->op_begin() + HasChainIn + 2, Op->op_end());
+ Ops.push_back(Chain.getValue(1));
+ } else {
+ Ops.push_back(Chain);
+ Ops.append(Op->op_begin() + HasChainIn + 1, Op->op_end());
+ }
+
+ if (!HasI64Out)
+ return DAG.getNode(Opc, DL, Op->value_begin(), Op->getNumValues(),
+ Ops.begin(), Ops.size());
+
+ SDValue Intr = DAG.getNode(Opc, DL, DAG.getVTList(MVT::Other, MVT::Glue),
+ Ops.begin(), Ops.size());
+ SDValue OutLo = DAG.getCopyFromReg(Intr.getValue(0), DL, Mips::LO, MVT::i32,
+ Intr.getValue(1));
+ SDValue OutHi = DAG.getCopyFromReg(OutLo.getValue(1), DL, Mips::HI, MVT::i32,
+ OutLo.getValue(2));
+ SDValue Out = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, OutLo, OutHi);
+
+ if (!HasChainIn)
+ return Out;
+
+ SDValue Vals[] = { Out, OutHi.getValue(1) };
+ return DAG.getMergeValues(Vals, 2, DL);
+}
+
+SDValue MipsTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) {
+ default:
+ return SDValue();
+ case Intrinsic::mips_shilo:
+ return LowerDSPIntr(Op, DAG, MipsISD::SHILO, true, true);
+ case Intrinsic::mips_dpau_h_qbl:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL, true, true);
+ case Intrinsic::mips_dpau_h_qbr:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR, true, true);
+ case Intrinsic::mips_dpsu_h_qbl:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL, true, true);
+ case Intrinsic::mips_dpsu_h_qbr:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR, true, true);
+ case Intrinsic::mips_dpa_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH, true, true);
+ case Intrinsic::mips_dps_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH, true, true);
+ case Intrinsic::mips_dpax_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH, true, true);
+ case Intrinsic::mips_dpsx_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH, true, true);
+ case Intrinsic::mips_mulsa_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH, true, true);
+ case Intrinsic::mips_mult:
+ return LowerDSPIntr(Op, DAG, MipsISD::MULT, false, true);
+ case Intrinsic::mips_multu:
+ return LowerDSPIntr(Op, DAG, MipsISD::MULTU, false, true);
+ case Intrinsic::mips_madd:
+ return LowerDSPIntr(Op, DAG, MipsISD::MADD_DSP, true, true);
+ case Intrinsic::mips_maddu:
+ return LowerDSPIntr(Op, DAG, MipsISD::MADDU_DSP, true, true);
+ case Intrinsic::mips_msub:
+ return LowerDSPIntr(Op, DAG, MipsISD::MSUB_DSP, true, true);
+ case Intrinsic::mips_msubu:
+ return LowerDSPIntr(Op, DAG, MipsISD::MSUBU_DSP, true, true);
+ }
+}
+
+SDValue MipsTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue()) {
+ default:
+ return SDValue();
+ case Intrinsic::mips_extp:
+ return LowerDSPIntr(Op, DAG, MipsISD::EXTP, true, false);
+ case Intrinsic::mips_extpdp:
+ return LowerDSPIntr(Op, DAG, MipsISD::EXTPDP, true, false);
+ case Intrinsic::mips_extr_w:
+ return LowerDSPIntr(Op, DAG, MipsISD::EXTR_W, true, false);
+ case Intrinsic::mips_extr_r_w:
+ return LowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W, true, false);
+ case Intrinsic::mips_extr_rs_w:
+ return LowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W, true, false);
+ case Intrinsic::mips_extr_s_h:
+ return LowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H, true, false);
+ case Intrinsic::mips_mthlip:
+ return LowerDSPIntr(Op, DAG, MipsISD::MTHLIP, true, true);
+ case Intrinsic::mips_mulsaq_s_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH, true, true);
+ case Intrinsic::mips_maq_s_w_phl:
+ return LowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL, true, true);
+ case Intrinsic::mips_maq_s_w_phr:
+ return LowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR, true, true);
+ case Intrinsic::mips_maq_sa_w_phl:
+ return LowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL, true, true);
+ case Intrinsic::mips_maq_sa_w_phr:
+ return LowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR, true, true);
+ case Intrinsic::mips_dpaq_s_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH, true, true);
+ case Intrinsic::mips_dpsq_s_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH, true, true);
+ case Intrinsic::mips_dpaq_sa_l_w:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W, true, true);
+ case Intrinsic::mips_dpsq_sa_l_w:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W, true, true);
+ case Intrinsic::mips_dpaqx_s_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH, true, true);
+ case Intrinsic::mips_dpaqx_sa_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH, true, true);
+ case Intrinsic::mips_dpsqx_s_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH, true, true);
+ case Intrinsic::mips_dpsqx_sa_w_ph:
+ return LowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH, true, true);
+ }
+}
+
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
Modified: llvm/branches/R600/lib/Target/Mips/MipsISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/Mips/MipsISelLowering.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/Mips/MipsISelLowering.h (original)
+++ llvm/branches/R600/lib/Target/Mips/MipsISelLowering.h Tue Oct 2 09:15:33 2012
@@ -202,6 +202,8 @@
bool IsSRA) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue
LowerFormalArguments(SDValue Chain,
@@ -260,6 +262,8 @@
virtual unsigned getJumpTableEncoding() const;
+ MachineBasicBlock *EmitBPOSGE32(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size, unsigned BinOpcode, bool Nand = false) const;
MachineBasicBlock *EmitAtomicBinaryPartword(MachineInstr *MI,
Modified: llvm/branches/R600/lib/Target/Mips/MipsInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/Mips/MipsInstrInfo.td?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/Mips/MipsInstrInfo.td (original)
+++ llvm/branches/R600/lib/Target/Mips/MipsInstrInfo.td Tue Oct 2 09:15:33 2012
@@ -1233,3 +1233,8 @@
include "Mips16InstrFormats.td"
include "Mips16InstrInfo.td"
+
+// DSP
+include "MipsDSPInstrFormats.td"
+include "MipsDSPInstrInfo.td"
+
Modified: llvm/branches/R600/lib/Target/Mips/MipsMCInstLower.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/Mips/MipsMCInstLower.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/Mips/MipsMCInstLower.h (original)
+++ llvm/branches/R600/lib/Target/Mips/MipsMCInstLower.h Tue Oct 2 09:15:33 2012
@@ -33,11 +33,11 @@
MipsMCInstLower(MipsAsmPrinter &asmprinter);
void Initialize(Mangler *mang, MCContext *C);
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+ MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const;
private:
MCOperand LowerSymbolOperand(const MachineOperand &MO,
MachineOperandType MOTy, unsigned Offset) const;
- MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const;
};
}
Modified: llvm/branches/R600/lib/Target/Mips/MipsMachineFunction.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/Mips/MipsMachineFunction.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/Mips/MipsMachineFunction.h (original)
+++ llvm/branches/R600/lib/Target/Mips/MipsMachineFunction.h Tue Oct 2 09:15:33 2012
@@ -45,9 +45,7 @@
// Range of frame object indices.
// InArgFIRange: Range of indices of all frame objects created during call to
// LowerFormalArguments.
- // OutArgFIRange: Range of indices of all frame objects created during call to
- // LowerCall except for the frame object for restoring $gp.
- std::pair<int, int> InArgFIRange, OutArgFIRange;
+ std::pair<int, int> InArgFIRange;
unsigned MaxCallFrameSize;
bool EmitNOAT;
@@ -56,7 +54,7 @@
MipsFunctionInfo(MachineFunction& MF)
: MF(MF), SRetReturnReg(0), GlobalBaseReg(0),
VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)),
- OutArgFIRange(std::make_pair(-1, 0)), MaxCallFrameSize(0), EmitNOAT(false)
+ MaxCallFrameSize(0), EmitNOAT(false)
{}
bool isInArgFI(int FI) const {
@@ -64,16 +62,6 @@
}
void setLastInArgFI(int FI) { InArgFIRange.second = FI; }
- bool isOutArgFI(int FI) const {
- return FI <= OutArgFIRange.first && FI >= OutArgFIRange.second;
- }
- void extendOutArgFIRange(int FirstFI, int LastFI) {
- if (!OutArgFIRange.second)
- // this must be the first time this function was called.
- OutArgFIRange.first = FirstFI;
- OutArgFIRange.second = LastFI;
- }
-
unsigned getSRetReturnReg() const { return SRetReturnReg; }
void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
Modified: llvm/branches/R600/lib/Target/Mips/MipsRegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/Mips/MipsRegisterInfo.td?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/Mips/MipsRegisterInfo.td (original)
+++ llvm/branches/R600/lib/Target/Mips/MipsRegisterInfo.td Tue Oct 2 09:15:33 2012
@@ -14,6 +14,8 @@
def sub_fpeven : SubRegIndex;
def sub_fpodd : SubRegIndex;
def sub_32 : SubRegIndex;
+def sub_lo : SubRegIndex;
+def sub_hi : SubRegIndex;
}
// We have banks of 32 registers each.
@@ -247,33 +249,11 @@
def HWR29_64 : Register<"29">;
// Accum registers
- def LO0 : Register<"ac0"> {
- let Aliases = [LO];
- }
- def HI0 : Register<"hi0"> {
- let Aliases = [HI];
- }
- def LO1 : Register<"ac1">;
- def HI1 : Register<"hi1">;
- def LO2 : Register<"ac2">;
- def HI2 : Register<"hi2">;
- def LO3 : Register<"ac3">;
- def HI3 : Register<"hi3">;
-
- let SubRegIndices = [sub_32] in {
- def LO0_64 : RegisterWithSubRegs<"ac0", [LO0]> {
- let Aliases = [LO64];
- }
- def HI0_64 : RegisterWithSubRegs<"hi0", [HI0]> {
- let Aliases = [HI64];
- }
- def LO1_64 : RegisterWithSubRegs<"ac1", [LO1]>;
- def HI1_64 : RegisterWithSubRegs<"hi1", [HI1]>;
- def LO2_64 : RegisterWithSubRegs<"ac2", [LO2]>;
- def HI2_64 : RegisterWithSubRegs<"hi2", [HI2]>;
- def LO3_64 : RegisterWithSubRegs<"ac3", [LO3]>;
- def HI3_64 : RegisterWithSubRegs<"hi3", [HI3]>;
- }
+ let SubRegIndices = [sub_lo, sub_hi] in
+ def AC0 : RegisterWithSubRegs<"ac0", [LO, HI]>;
+ def AC1 : Register<"ac1">;
+ def AC2 : Register<"ac2">;
+ def AC3 : Register<"ac3">;
def DSPCtrl : Register<"dspctrl">;
}
@@ -322,6 +302,7 @@
def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>;
+def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>;
// 64bit fp:
// * FGR64 - 32 64-bit registers
@@ -357,9 +338,5 @@
def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>;
def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>;
-// Accum Registers
-def HIRegs : RegisterClass<"Mips", [i32], 32, (sequence "HI%u", 0, 3)>;
-def LORegs : RegisterClass<"Mips", [i32], 32, (sequence "LO%u", 0, 3)>;
-
-def HI64Regs : RegisterClass<"Mips", [i64], 64, (sequence "HI%u_64", 0, 3)>;
-def LO64Regs : RegisterClass<"Mips", [i64], 64, (sequence "LO%u_64", 0, 3)>;
+// Accumulator Registers
+def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>;
Modified: llvm/branches/R600/lib/Target/Mips/MipsSERegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/Mips/MipsSERegisterInfo.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/Mips/MipsSERegisterInfo.cpp (original)
+++ llvm/branches/R600/lib/Target/Mips/MipsSERegisterInfo.cpp Tue Oct 2 09:15:33 2012
@@ -91,8 +91,7 @@
// getFrameRegister() returns.
unsigned FrameReg;
- if (MipsFI->isOutArgFI(FrameIndex) ||
- (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
+ if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)
FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
else
FrameReg = getFrameRegister(MF);
@@ -106,12 +105,8 @@
// incoming argument, callee-saved register location or local variable.
int64_t Offset;
- if (MipsFI->isOutArgFI(FrameIndex))
- Offset = SPOffset;
- else
- Offset = SPOffset + (int64_t)StackSize;
-
- Offset += MI.getOperand(OpNo + 1).getImm();
+ Offset = SPOffset + (int64_t)StackSize;
+ Offset += MI.getOperand(OpNo + 1).getImm();
DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
Modified: llvm/branches/R600/lib/Target/Mips/MipsSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/Mips/MipsSubtarget.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/Mips/MipsSubtarget.cpp (original)
+++ llvm/branches/R600/lib/Target/Mips/MipsSubtarget.cpp Tue Oct 2 09:15:33 2012
@@ -31,7 +31,8 @@
MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little),
IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false),
IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false),
- HasMinMax(false), HasSwap(false), HasBitCount(false), InMips16Mode(false)
+ HasMinMax(false), HasSwap(false), HasBitCount(false), InMips16Mode(false),
+ HasDSP(false), HasDSPR2(false), IsAndroid(false)
{
std::string CPUName = CPU;
if (CPUName.empty())
Modified: llvm/branches/R600/lib/Target/PowerPC/PPCFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/PowerPC/PPCFrameLowering.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/PowerPC/PPCFrameLowering.cpp (original)
+++ llvm/branches/R600/lib/Target/PowerPC/PPCFrameLowering.cpp Tue Oct 2 09:15:33 2012
@@ -193,7 +193,7 @@
// to adjust the stack pointer (we fit in the Red Zone). For 64-bit
// SVR4, we also require a stack frame if we need to spill the CR,
// since this spill area is addressed relative to the stack pointer.
- bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone);
+ bool DisableRedZone = MF.getFunction()->getFnAttributes().hasNoRedZoneAttr();
// FIXME SVR4 The 32-bit SVR4 ABI has no red zone. However, it can
// still generate stackless code if all local vars are reg-allocated.
// Try: (FrameSize <= 224
@@ -255,7 +255,7 @@
// Naked functions have no stack frame pushed, so we don't have a frame
// pointer.
- if (MF.getFunction()->hasFnAttr(Attribute::Naked))
+ if (MF.getFunction()->getFnAttributes().hasNakedAttr())
return false;
return MF.getTarget().Options.DisableFramePointerElim(MF) ||
Modified: llvm/branches/R600/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/PowerPC/PPCISelLowering.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/branches/R600/lib/Target/PowerPC/PPCISelLowering.cpp Tue Oct 2 09:15:33 2012
@@ -6002,7 +6002,7 @@
bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) ||
MFI->hasVarSizedObjects()) &&
MFI->getStackSize() &&
- !MF.getFunction()->hasFnAttr(Attribute::Naked);
+ !MF.getFunction()->getFnAttributes().hasNakedAttr();
unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
(is31 ? PPC::R31 : PPC::R1);
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
Modified: llvm/branches/R600/lib/Target/PowerPC/PPCRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/PowerPC/PPCRegisterInfo.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/PowerPC/PPCRegisterInfo.cpp (original)
+++ llvm/branches/R600/lib/Target/PowerPC/PPCRegisterInfo.cpp Tue Oct 2 09:15:33 2012
@@ -596,7 +596,7 @@
// to Offset to get the correct offset.
// Naked functions have stack size 0, although getStackSize may not reflect that
// because we didn't call all the pieces that compute it for naked functions.
- if (!MF.getFunction()->hasFnAttr(Attribute::Naked))
+ if (!MF.getFunction()->getFnAttributes().hasNakedAttr())
Offset += MFI->getStackSize();
// If we can, encode the offset directly into the instruction. If this is a
Modified: llvm/branches/R600/lib/Target/TargetData.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/TargetData.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/TargetData.cpp (original)
+++ llvm/branches/R600/lib/Target/TargetData.cpp Tue Oct 2 09:15:33 2012
@@ -314,6 +314,8 @@
TargetData::setAlignment(AlignTypeEnum align_type, unsigned abi_align,
unsigned pref_align, uint32_t bit_width) {
assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
+ assert(pref_align < (1 << 16) && "Alignment doesn't fit in bitfield");
+ assert(bit_width < (1 << 24) && "Bit width doesn't fit in bitfield");
for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
if (Alignments[i].AlignType == align_type &&
Alignments[i].TypeBitWidth == bit_width) {
Modified: llvm/branches/R600/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp (original)
+++ llvm/branches/R600/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp Tue Oct 2 09:15:33 2012
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "asm-printer"
#include "X86ATTInstPrinter.h"
#include "X86InstComments.h"
+#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -38,6 +39,12 @@
void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
StringRef Annot) {
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+
+ if (TSFlags & X86II::LOCK)
+ OS << "\tlock\n";
+
// Try to print any aliases first.
if (!printAliasInstr(MI, OS))
printInstruction(MI, OS);
Modified: llvm/branches/R600/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp (original)
+++ llvm/branches/R600/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp Tue Oct 2 09:15:33 2012
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "asm-printer"
#include "X86IntelInstPrinter.h"
#include "X86InstComments.h"
+#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCExpr.h"
@@ -32,6 +33,12 @@
void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
StringRef Annot) {
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+
+ if (TSFlags & X86II::LOCK)
+ OS << "\tlock\n";
+
printInstruction(MI, OS);
// Next always print the annotation.
Modified: llvm/branches/R600/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp (original)
+++ llvm/branches/R600/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp Tue Oct 2 09:15:33 2012
@@ -243,9 +243,14 @@
case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
return N86::EDI;
- case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3:
- case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
- return RegNo-X86::ST0;
+ case X86::ST0: return 0;
+ case X86::ST1: return 1;
+ case X86::ST2: return 2;
+ case X86::ST3: return 3;
+ case X86::ST4: return 4;
+ case X86::ST5: return 5;
+ case X86::ST6: return 6;
+ case X86::ST7: return 7;
case X86::XMM0: case X86::XMM8:
case X86::YMM0: case X86::YMM8: case X86::MM0:
Modified: llvm/branches/R600/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp (original)
+++ llvm/branches/R600/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp Tue Oct 2 09:15:33 2012
@@ -11,11 +11,13 @@
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCValue.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
#include "llvm/Object/MachOFormat.h"
using namespace llvm;
@@ -23,7 +25,7 @@
namespace {
class X86MachObjectWriter : public MCMachObjectTargetWriter {
- void RecordScatteredRelocation(MachObjectWriter *Writer,
+ bool RecordScatteredRelocation(MachObjectWriter *Writer,
const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
@@ -335,7 +337,7 @@
Writer->addRelocation(Fragment->getParent(), MRE);
}
-void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
+bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
@@ -381,6 +383,19 @@
// Relocations are written out in reverse order, so the PAIR comes first.
if (Type == macho::RIT_Difference ||
Type == macho::RIT_Generic_LocalDifference) {
+ // If the offset is too large to fit in a scattered relocation,
+ // we're hosed. It's an unfortunate limitation of the MachO format.
+ if (FixupOffset > 0xffffff) {
+ char Buffer[32];
+ format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer));
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ Twine("Section too large, can't encode "
+ "r_address (") + Buffer +
+ ") into 24 bits of scattered "
+ "relocation entry.");
+ llvm_unreachable("fatal error returned?!");
+ }
+
macho::RelocationEntry MRE;
MRE.Word0 = ((0 << 0) |
(macho::RIT_Pair << 24) |
@@ -389,6 +404,16 @@
macho::RF_Scattered);
MRE.Word1 = Value2;
Writer->addRelocation(Fragment->getParent(), MRE);
+ } else {
+ // If the offset is more than 24-bits, it won't fit in a scattered
+ // relocation offset field, so we fall back to using a non-scattered
+ // relocation. This is a bit risky, as if the offset reaches out of
+ // the block and the linker is doing scattered loading on this
+ // symbol, things can go badly.
+ //
+ // Required for 'as' compatibility.
+ if (FixupOffset > 0xffffff)
+ return false;
}
macho::RelocationEntry MRE;
@@ -399,6 +424,7 @@
macho::RF_Scattered);
MRE.Word1 = Value;
Writer->addRelocation(Fragment->getParent(), MRE);
+ return true;
}
void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
@@ -469,9 +495,11 @@
// If this is a difference or a defined symbol plus an offset, then we need a
// scattered relocation entry. Differences always require scattered
// relocations.
- if (Target.getSymB())
- return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
- Target, Log2Size, FixedValue);
+ if (Target.getSymB()) {
+ RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, Log2Size, FixedValue);
+ return;
+ }
// Get the symbol data, if any.
MCSymbolData *SD = 0;
@@ -483,9 +511,13 @@
uint32_t Offset = Target.getConstant();
if (IsPCRel)
Offset += 1 << Log2Size;
- if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD))
- return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
- Target, Log2Size, FixedValue);
+ // Try to record the scattered relocation if needed. Fall back to non
+ // scattered if necessary (see comments in RecordScatteredRelocation()
+ // for details).
+ if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD) &&
+ RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+ Target, Log2Size, FixedValue))
+ return;
// See <reloc.h>.
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
Modified: llvm/branches/R600/lib/Target/X86/X86AsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86AsmPrinter.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86AsmPrinter.cpp (original)
+++ llvm/branches/R600/lib/Target/X86/X86AsmPrinter.cpp Tue Oct 2 09:15:33 2012
@@ -243,7 +243,7 @@
if (AsmVariant == 0) O << '%';
unsigned Reg = MO.getReg();
if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
- EVT VT = (strcmp(Modifier+6,"64") == 0) ?
+ MVT::SimpleValueType VT = (strcmp(Modifier+6,"64") == 0) ?
MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
Reg = getX86SubSuperRegister(Reg, VT);
Modified: llvm/branches/R600/lib/Target/X86/X86FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86FrameLowering.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86FrameLowering.cpp (original)
+++ llvm/branches/R600/lib/Target/X86/X86FrameLowering.cpp Tue Oct 2 09:15:33 2012
@@ -674,7 +674,7 @@
// function, and use up to 128 bytes of stack space, don't have a frame
// pointer, calls, or dynamic alloca then we do not need to adjust the
// stack pointer (we fit in the Red Zone).
- if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
+ if (Is64Bit && !Fn->getFnAttributes().hasNoRedZoneAttr() &&
!RegInfo->needsStackRealignment(MF) &&
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
Modified: llvm/branches/R600/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/branches/R600/lib/Target/X86/X86ISelDAGToDAG.cpp Tue Oct 2 09:15:33 2012
@@ -428,7 +428,7 @@
void X86DAGToDAGISel::PreprocessISelDAG() {
// OptForSize is used in pattern predicates that isel is matching.
- OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ OptForSize = MF->getFunction()->getFnAttributes().hasOptimizeForSizeAttr();
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {
@@ -2184,13 +2184,16 @@
SDValue N1 = Node->getOperand(1);
bool isSigned = Opcode == ISD::SMUL_LOHI;
+ bool hasBMI2 = Subtarget->hasBMI2();
if (!isSigned) {
switch (NVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
- case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
- case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
+ case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
+ MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
+ case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
+ MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
}
} else {
switch (NVT.getSimpleVT().SimpleTy) {
@@ -2202,13 +2205,31 @@
}
}
- unsigned LoReg, HiReg;
- switch (NVT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unsupported VT!");
- case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
- case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
- case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
- case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
+ unsigned SrcReg, LoReg, HiReg;
+ switch (Opc) {
+ default: llvm_unreachable("Unknown MUL opcode!");
+ case X86::IMUL8r:
+ case X86::MUL8r:
+ SrcReg = LoReg = X86::AL; HiReg = X86::AH;
+ break;
+ case X86::IMUL16r:
+ case X86::MUL16r:
+ SrcReg = LoReg = X86::AX; HiReg = X86::DX;
+ break;
+ case X86::IMUL32r:
+ case X86::MUL32r:
+ SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
+ break;
+ case X86::IMUL64r:
+ case X86::MUL64r:
+ SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
+ break;
+ case X86::MULX32rr:
+ SrcReg = X86::EDX; LoReg = HiReg = 0;
+ break;
+ case X86::MULX64rr:
+ SrcReg = X86::RDX; LoReg = HiReg = 0;
+ break;
}
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
@@ -2220,22 +2241,47 @@
std::swap(N0, N1);
}
- SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
N0, SDValue()).getValue(1);
+ SDValue ResHi, ResLo;
if (foldedLoad) {
+ SDValue Chain;
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
InFlag };
- SDNode *CNode =
- CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
- array_lengthof(Ops));
- InFlag = SDValue(CNode, 1);
+ if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
+ SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ ResHi = SDValue(CNode, 0);
+ ResLo = SDValue(CNode, 1);
+ Chain = SDValue(CNode, 2);
+ InFlag = SDValue(CNode, 3);
+ } else {
+ SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ Chain = SDValue(CNode, 0);
+ InFlag = SDValue(CNode, 1);
+ }
// Update the chain.
- ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
+ ReplaceUses(N1.getValue(1), Chain);
} else {
- SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag);
- InFlag = SDValue(CNode, 0);
+ SDValue Ops[] = { N1, InFlag };
+ if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
+ SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ ResHi = SDValue(CNode, 0);
+ ResLo = SDValue(CNode, 1);
+ InFlag = SDValue(CNode, 2);
+ } else {
+ SDVTList VTs = CurDAG->getVTList(MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops,
+ array_lengthof(Ops));
+ InFlag = SDValue(CNode, 0);
+ }
}
// Prevent use of AH in a REX instruction by referencing AX instead.
@@ -2260,19 +2306,25 @@
}
// Copy the low half of the result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- LoReg, NVT, InFlag);
- InFlag = Result.getValue(2);
- ReplaceUses(SDValue(Node, 0), Result);
- DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+ if (ResLo.getNode() == 0) {
+ assert(LoReg && "Register for low half is not defined!");
+ ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
+ InFlag);
+ InFlag = ResLo.getValue(2);
+ }
+ ReplaceUses(SDValue(Node, 0), ResLo);
+ DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n');
}
// Copy the high half of the result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- HiReg, NVT, InFlag);
- InFlag = Result.getValue(2);
- ReplaceUses(SDValue(Node, 1), Result);
- DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+ if (ResHi.getNode() == 0) {
+ assert(HiReg && "Register for high half is not defined!");
+ ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
+ InFlag);
+ InFlag = ResHi.getValue(2);
+ }
+ ReplaceUses(SDValue(Node, 1), ResHi);
+ DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');
}
return NULL;
@@ -2473,7 +2525,13 @@
MVT::i8, Reg);
// Emit a testb.
- return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm);
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
+ Subreg, Imm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
}
// For example, "testl %eax, $2048" to "testb %ah, $8".
@@ -2504,8 +2562,13 @@
// Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only
// target GR8_NOREX registers, so make sure the register class is
// forced.
- return CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, MVT::i32,
- Subreg, ShiftedImm);
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl,
+ MVT::i32, Subreg, ShiftedImm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
}
// For example, "testl %eax, $32776" to "testw %ax, $32776".
@@ -2521,7 +2584,13 @@
MVT::i16, Reg);
// Emit a testw.
- return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm);
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32,
+ Subreg, Imm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
}
// For example, "testq %rax, $268468232" to "testl %eax, $268468232".
@@ -2537,7 +2606,13 @@
MVT::i32, Reg);
// Emit a testl.
- return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm);
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32,
+ Subreg, Imm);
+ // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
+ // one, do not call ReplaceAllUsesWith.
+ ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
+ SDValue(NewNode, 0));
+ return NULL;
}
}
break;
Modified: llvm/branches/R600/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86ISelLowering.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/R600/lib/Target/X86/X86ISelLowering.cpp Tue Oct 2 09:15:33 2012
@@ -1342,7 +1342,7 @@
// cases like PR2962. This should be removed when PR2962 is fixed.
const Function *F = MF.getFunction();
if (IsZeroVal &&
- !F->hasFnAttr(Attribute::NoImplicitFloat)) {
+ !F->getFnAttributes().hasNoImplicitFloatAttr()) {
if (Size >= 16 &&
(Subtarget->isUnalignedMemAccessFast() ||
((DstAlign == 0 || DstAlign >= 16) &&
@@ -2010,7 +2010,7 @@
unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
TotalNumIntRegs);
- bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
+ bool NoImplicitFloatOps = Fn->getFnAttributes().hasNoImplicitFloatAttr();
assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
"SSE register cannot be used when SSE is disabled!");
assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat &&
@@ -2486,7 +2486,7 @@
OpFlags = X86II::MO_DARWIN_STUB;
} else if (Subtarget->isPICStyleRIPRel() &&
isa<Function>(GV) &&
- cast<Function>(GV)->hasFnAttr(Attribute::NonLazyBind)) {
+ cast<Function>(GV)->getFnAttributes().hasNonLazyBindAttr()) {
// If the function is marked as non-lazy, generate an indirect call
// which loads from the GOT directly. This avoids runtime overhead
// at the cost of eager binding (and one extra byte of encoding).
@@ -6629,7 +6629,7 @@
bool HasAVX = Subtarget->hasAVX();
bool HasAVX2 = Subtarget->hasAVX2();
MachineFunction &MF = DAG.getMachineFunction();
- bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ bool OptForSize = MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr();
assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
@@ -9669,7 +9669,7 @@
// Sanity Check: Make sure using fp_offset makes sense.
assert(!getTargetMachine().Options.UseSoftFloat &&
!(DAG.getMachineFunction()
- .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
+ .getFunction()->getFnAttributes().hasNoImplicitFloatAttr()) &&
Subtarget->hasSSE1());
}
@@ -12110,7 +12110,7 @@
SrcReg = MI->getOperand(CurOp++).getReg();
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
- EVT VT = *RC->vt_begin();
+ MVT::SimpleValueType VT = *RC->vt_begin();
unsigned AccPhyReg = getX86SubSuperRegister(X86::EAX, VT);
unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT);
@@ -15438,7 +15438,7 @@
return SDValue();
const Function *F = DAG.getMachineFunction().getFunction();
- bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
+ bool NoImplicitFloatOps = F->getFnAttributes().hasNoImplicitFloatAttr();
bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps
&& Subtarget->hasSSE2();
if ((VT.isVector() ||
Modified: llvm/branches/R600/lib/Target/X86/X86InstrCompiler.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86InstrCompiler.td?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86InstrCompiler.td (original)
+++ llvm/branches/R600/lib/Target/X86/X86InstrCompiler.td Tue Oct 2 09:15:33 2012
@@ -561,7 +561,6 @@
// TODO: Get this to fold the constant into the instruction.
let isCodeGenOnly = 1, Defs = [EFLAGS] in
def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
- "lock\n\t"
"or{l}\t{$zero, $dst|$dst, $zero}",
[], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK;
@@ -581,72 +580,72 @@
def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
- !strconcat("lock\n\t", mnemonic, "{b}\t",
+ !strconcat(mnemonic, "{b}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_NONMEM>, LOCK;
def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- !strconcat("lock\n\t", mnemonic, "{w}\t",
+ !strconcat(mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_NONMEM>, OpSize, LOCK;
def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- !strconcat("lock\n\t", mnemonic, "{l}\t",
+ !strconcat(mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_NONMEM>, LOCK;
def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- !strconcat("lock\n\t", mnemonic, "{q}\t",
+ !strconcat(mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_NONMEM>, LOCK;
def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
- !strconcat("lock\n\t", mnemonic, "{b}\t",
+ !strconcat(mnemonic, "{b}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, LOCK;
def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
- !strconcat("lock\n\t", mnemonic, "{w}\t",
+ !strconcat(mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, OpSize, LOCK;
def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
- !strconcat("lock\n\t", mnemonic, "{l}\t",
+ !strconcat(mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, LOCK;
def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
- !strconcat("lock\n\t", mnemonic, "{q}\t",
+ !strconcat(mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, LOCK;
def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
- !strconcat("lock\n\t", mnemonic, "{w}\t",
+ !strconcat(mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, OpSize, LOCK;
def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
- !strconcat("lock\n\t", mnemonic, "{l}\t",
+ !strconcat(mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, LOCK;
def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
- !strconcat("lock\n\t", mnemonic, "{q}\t",
+ !strconcat(mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, LOCK;
@@ -666,16 +665,16 @@
let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
def #NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),
- !strconcat("lock\n\t", mnemonic, "{b}\t$dst"),
+ !strconcat(mnemonic, "{b}\t$dst"),
[], IIC_UNARY_MEM>, LOCK;
def #NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),
- !strconcat("lock\n\t", mnemonic, "{w}\t$dst"),
+ !strconcat(mnemonic, "{w}\t$dst"),
[], IIC_UNARY_MEM>, OpSize, LOCK;
def #NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),
- !strconcat("lock\n\t", mnemonic, "{l}\t$dst"),
+ !strconcat(mnemonic, "{l}\t$dst"),
[], IIC_UNARY_MEM>, LOCK;
def #NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
- !strconcat("lock\n\t", mnemonic, "{q}\t$dst"),
+ !strconcat(mnemonic, "{q}\t$dst"),
[], IIC_UNARY_MEM>, LOCK;
}
}
@@ -689,7 +688,7 @@
InstrItinClass itin> {
let isCodeGenOnly = 1 in {
def #NAME# : I<Opc, Form, (outs), (ins x86memop:$ptr),
- !strconcat("lock\n\t", mnemonic, "\t$ptr"),
+ !strconcat(mnemonic, "\t$ptr"),
[(frag addr:$ptr)], itin>, TB, LOCK;
}
}
@@ -700,23 +699,19 @@
let isCodeGenOnly = 1 in {
let Defs = [AL, EFLAGS], Uses = [AL] in
def #NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
- !strconcat("lock\n\t", mnemonic,
- "{b}\t{$swap, $ptr|$ptr, $swap}"),
+ !strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"),
[(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK;
let Defs = [AX, EFLAGS], Uses = [AX] in
def #NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap),
- !strconcat("lock\n\t", mnemonic,
- "{w}\t{$swap, $ptr|$ptr, $swap}"),
+ !strconcat(mnemonic, "{w}\t{$swap, $ptr|$ptr, $swap}"),
[(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize, LOCK;
let Defs = [EAX, EFLAGS], Uses = [EAX] in
def #NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap),
- !strconcat("lock\n\t", mnemonic,
- "{l}\t{$swap, $ptr|$ptr, $swap}"),
+ !strconcat(mnemonic, "{l}\t{$swap, $ptr|$ptr, $swap}"),
[(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, LOCK;
let Defs = [RAX, EFLAGS], Uses = [RAX] in
def #NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap),
- !strconcat("lock\n\t", mnemonic,
- "{q}\t{$swap, $ptr|$ptr, $swap}"),
+ !strconcat(mnemonic, "{q}\t{$swap, $ptr|$ptr, $swap}"),
[(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK;
}
}
@@ -744,31 +739,27 @@
let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in {
def #NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst),
(ins GR8:$val, i8mem:$ptr),
- !strconcat("lock\n\t", mnemonic,
- "{b}\t{$val, $ptr|$ptr, $val}"),
+ !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
[(set GR8:$dst,
(!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
itin8>;
def #NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst),
(ins GR16:$val, i16mem:$ptr),
- !strconcat("lock\n\t", mnemonic,
- "{w}\t{$val, $ptr|$ptr, $val}"),
+ !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
[(set
GR16:$dst,
(!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
itin>, OpSize;
def #NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$val, i32mem:$ptr),
- !strconcat("lock\n\t", mnemonic,
- "{l}\t{$val, $ptr|$ptr, $val}"),
+ !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
[(set
GR32:$dst,
(!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
itin>;
def #NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$val, i64mem:$ptr),
- !strconcat("lock\n\t", mnemonic,
- "{q}\t{$val, $ptr|$ptr, $val}"),
+ !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
[(set
GR64:$dst,
(!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
Modified: llvm/branches/R600/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86InstrInfo.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/branches/R600/lib/Target/X86/X86InstrInfo.cpp Tue Oct 2 09:15:33 2012
@@ -561,6 +561,16 @@
{ X86::VSQRTPSYr_Int, X86::VSQRTPSYm_Int, TB_ALIGN_32 },
{ X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
{ X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
+
+ // BMI/BMI2 foldable instructions
+ { X86::RORX32ri, X86::RORX32mi, 0 },
+ { X86::RORX64ri, X86::RORX64mi, 0 },
+ { X86::SARX32rr, X86::SARX32rm, 0 },
+ { X86::SARX64rr, X86::SARX64rm, 0 },
+ { X86::SHRX32rr, X86::SHRX32rm, 0 },
+ { X86::SHRX64rr, X86::SHRX64rm, 0 },
+ { X86::SHLX32rr, X86::SHLX32rm, 0 },
+ { X86::SHLX64rr, X86::SHLX64rm, 0 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
@@ -1140,6 +1150,10 @@
{ X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_16 },
{ X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4mrY, TB_ALIGN_32 },
{ X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_32 },
+
+ // BMI/BMI2 foldable instructions
+ { X86::MULX32rr, X86::MULX32rm, 0 },
+ { X86::MULX64rr, X86::MULX64rm, 0 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
@@ -3806,7 +3820,7 @@
// Unless optimizing for size, don't fold to avoid partial
// register update stalls
- if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+ if (!MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr() &&
hasPartialRegUpdate(MI->getOpcode()))
return 0;
@@ -3847,7 +3861,7 @@
// Unless optimizing for size, don't fold to avoid partial
// register update stalls
- if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+ if (!MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr() &&
hasPartialRegUpdate(MI->getOpcode()))
return 0;
Modified: llvm/branches/R600/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86InstrInfo.td?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/branches/R600/lib/Target/X86/X86InstrInfo.td Tue Oct 2 09:15:33 2012
@@ -552,17 +552,17 @@
def Has3DNow : Predicate<"Subtarget->has3DNow()">;
def Has3DNowA : Predicate<"Subtarget->has3DNowA()">;
def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
-def UseSSE1 : Predicate<"Subtarget->hasSSE1() && Subtarget->hasNoAVX()">;
+def UseSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">;
def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
-def UseSSE2 : Predicate<"Subtarget->hasSSE2() && Subtarget->hasNoAVX()">;
+def UseSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">;
def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
-def UseSSE3 : Predicate<"Subtarget->hasSSE3() && Subtarget->hasNoAVX()">;
+def UseSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">;
def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
-def UseSSSE3 : Predicate<"Subtarget->hasSSSE3() && Subtarget->hasNoAVX()">;
+def UseSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">;
def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
-def UseSSE41 : Predicate<"Subtarget->hasSSE41() && Subtarget->hasNoAVX()">;
+def UseSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">;
def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
-def UseSSE42 : Predicate<"Subtarget->hasSSE42() && Subtarget->hasNoAVX()">;
+def UseSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">;
def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
def HasAVX : Predicate<"Subtarget->hasAVX()">;
def HasAVX2 : Predicate<"Subtarget->hasAVX2()">;
Modified: llvm/branches/R600/lib/Target/X86/X86InstrShiftRotate.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86InstrShiftRotate.td?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86InstrShiftRotate.td (original)
+++ llvm/branches/R600/lib/Target/X86/X86InstrShiftRotate.td Tue Oct 2 09:15:33 2012
@@ -839,6 +839,16 @@
} // Defs = [EFLAGS]
+def ROT32L2R_imm8 : SDNodeXForm<imm, [{
+ // Convert a ROTL shamt to a ROTR shamt on 32-bit integer.
+ return getI8Imm(32 - N->getZExtValue());
+}]>;
+
+def ROT64L2R_imm8 : SDNodeXForm<imm, [{
+ // Convert a ROTL shamt to a ROTR shamt on 64-bit integer.
+ return getI8Imm(64 - N->getZExtValue());
+}]>;
+
multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> {
let neverHasSideEffects = 1 in {
def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2),
@@ -873,4 +883,72 @@
defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem>, T8XD, VEX_W;
defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8, OpSize;
defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8, OpSize, VEX_W;
+
+ // Prefer RORX which is non-destructive and doesn't update EFLAGS.
+ let AddedComplexity = 10 in {
+ def : Pat<(rotl GR32:$src, (i8 imm:$shamt)),
+ (RORX32ri GR32:$src, (ROT32L2R_imm8 imm:$shamt))>;
+ def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
+ (RORX64ri GR64:$src, (ROT64L2R_imm8 imm:$shamt))>;
+ }
+
+ def : Pat<(rotl (loadi32 addr:$src), (i8 imm:$shamt)),
+ (RORX32mi addr:$src, (ROT32L2R_imm8 imm:$shamt))>;
+ def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)),
+ (RORX64mi addr:$src, (ROT64L2R_imm8 imm:$shamt))>;
+
+ // Prefer SARX/SHRX/SHLX over SAR/SHR/SHL with variable shift BUT not
+ // immedidate shift, i.e. the following code is considered better
+ //
+ // mov %edi, %esi
+ // shl $imm, %esi
+ // ... %edi, ...
+ //
+ // than
+ //
+ // movb $imm, %sil
+ // shlx %sil, %edi, %esi
+ // ... %edi, ...
+ //
+ let AddedComplexity = 1 in {
+ def : Pat<(sra GR32:$src1, GR8:$src2),
+ (SARX32rr GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(sra GR64:$src1, GR8:$src2),
+ (SARX64rr GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(srl GR32:$src1, GR8:$src2),
+ (SHRX32rr GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(srl GR64:$src1, GR8:$src2),
+ (SHRX64rr GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(shl GR32:$src1, GR8:$src2),
+ (SHLX32rr GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(shl GR64:$src1, GR8:$src2),
+ (SHLX64rr GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ }
+
+ // Patterns on SARXrm/SHRXrm/SHLXrm are explicitly omitted to favor
+ //
+ // mov (%ecx), %esi
+ // shl $imm, $esi
+ //
+ // over
+ //
+ // movb $imm %al
+ // shlx %al, (%ecx), %esi
+ //
+ // As SARXrr/SHRXrr/SHLXrr is favored on variable shift, the peephole
+ // optimization will fold them into SARXrm/SHRXrm/SHLXrm if possible.
}
Modified: llvm/branches/R600/lib/Target/X86/X86RegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86RegisterInfo.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86RegisterInfo.cpp (original)
+++ llvm/branches/R600/lib/Target/X86/X86RegisterInfo.cpp Tue Oct 2 09:15:33 2012
@@ -399,7 +399,7 @@
const Function *F = MF.getFunction();
unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
- F->hasFnAttr(Attribute::StackAlignment));
+ F->getFnAttributes().hasStackAlignmentAttr());
// If we've requested that we force align the stack do so now.
if (ForceStackAlign)
@@ -590,9 +590,10 @@
}
namespace llvm {
-unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
- switch (VT.getSimpleVT().SimpleTy) {
- default: return Reg;
+unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
+ bool High) {
+ switch (VT) {
+ default: llvm_unreachable("Unexpected VT");
case MVT::i8:
if (High) {
switch (Reg) {
@@ -608,7 +609,7 @@
}
} else {
switch (Reg) {
- default: return 0;
+ default: llvm_unreachable("Unexpected register");
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::AL;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -645,7 +646,7 @@
}
case MVT::i16:
switch (Reg) {
- default: return Reg;
+ default: llvm_unreachable("Unexpected register");
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::AX;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -681,7 +682,7 @@
}
case MVT::i32:
switch (Reg) {
- default: return Reg;
+ default: llvm_unreachable("Unexpected register");
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::EAX;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -733,7 +734,7 @@
}
}
switch (Reg) {
- default: return Reg;
+ default: llvm_unreachable("Unexpected register");
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::RAX;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
Modified: llvm/branches/R600/lib/Target/X86/X86RegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86RegisterInfo.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86RegisterInfo.h (original)
+++ llvm/branches/R600/lib/Target/X86/X86RegisterInfo.h Tue Oct 2 09:15:33 2012
@@ -141,8 +141,8 @@
// getX86SubSuperRegister - X86 utility function. It returns the sub or super
// register of a specific X86 register.
-// e.g. getX86SubSuperRegister(X86::EAX, EVT::i16) return X86:AX
-unsigned getX86SubSuperRegister(unsigned, EVT, bool High=false);
+// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) return X86:AX
+unsigned getX86SubSuperRegister(unsigned, MVT::SimpleValueType, bool High=false);
} // End llvm namespace
Modified: llvm/branches/R600/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/X86/X86Subtarget.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/branches/R600/lib/Target/X86/X86Subtarget.h Tue Oct 2 09:15:33 2012
@@ -202,7 +202,6 @@
bool hasSSE42() const { return X86SSELevel >= SSE42; }
bool hasAVX() const { return X86SSELevel >= AVX; }
bool hasAVX2() const { return X86SSELevel >= AVX2; }
- bool hasNoAVX() const { return X86SSELevel < AVX; }
bool hasSSE4A() const { return HasSSE4A; }
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
Modified: llvm/branches/R600/lib/Transforms/IPO/ArgumentPromotion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/IPO/ArgumentPromotion.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/IPO/ArgumentPromotion.cpp (original)
+++ llvm/branches/R600/lib/Transforms/IPO/ArgumentPromotion.cpp Tue Oct 2 09:15:33 2012
@@ -592,14 +592,6 @@
Type *RetTy = FTy->getReturnType();
- // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which
- // have zero fixed arguments.
- bool ExtraArgHack = false;
- if (Params.empty() && FTy->isVarArg()) {
- ExtraArgHack = true;
- Params.push_back(Type::getInt32Ty(F->getContext()));
- }
-
// Construct the new function type using the new arguments.
FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
@@ -711,9 +703,6 @@
}
}
- if (ExtraArgHack)
- Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext())));
-
// Push any varargs arguments on the list.
for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
Args.push_back(*AI);
@@ -870,16 +859,9 @@
}
// Increment I2 past all of the arguments added for this promoted pointer.
- for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i)
- ++I2;
+ std::advance(I2, ArgIndices.size());
}
- // Notify the alias analysis implementation that we inserted a new argument.
- if (ExtraArgHack)
- AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())),
- NF->arg_begin());
-
-
// Tell the alias analysis that the old function is about to disappear.
AA.replaceWithNewValue(F, NF);
Modified: llvm/branches/R600/lib/Transforms/IPO/DeadArgumentElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/IPO/DeadArgumentElimination.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/IPO/DeadArgumentElimination.cpp (original)
+++ llvm/branches/R600/lib/Transforms/IPO/DeadArgumentElimination.cpp Tue Oct 2 09:15:33 2012
@@ -717,9 +717,9 @@
// here. Currently, this should not be possible, but special handling might be
// required when new return value attributes are added.
if (NRetTy->isVoidTy())
- RAttrs &= ~Attribute::typeIncompatible(NRetTy);
+ RAttrs &= ~Attributes::typeIncompatible(NRetTy);
else
- assert((RAttrs & Attribute::typeIncompatible(NRetTy)) == 0
+ assert((RAttrs & Attributes::typeIncompatible(NRetTy)) == 0
&& "Return attributes no longer compatible?");
if (RAttrs)
@@ -786,7 +786,7 @@
Attributes RAttrs = CallPAL.getRetAttributes();
Attributes FnAttrs = CallPAL.getFnAttributes();
// Adjust in case the function was changed to return void.
- RAttrs &= ~Attribute::typeIncompatible(NF->getReturnType());
+ RAttrs &= ~Attributes::typeIncompatible(NF->getReturnType());
if (RAttrs)
AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs));
Modified: llvm/branches/R600/lib/Transforms/IPO/GlobalOpt.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/IPO/GlobalOpt.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/IPO/GlobalOpt.cpp (original)
+++ llvm/branches/R600/lib/Transforms/IPO/GlobalOpt.cpp Tue Oct 2 09:15:33 2012
@@ -962,7 +962,9 @@
// If we get here we could have other crazy uses that are transitively
// loaded.
assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
- isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser)) &&
+ isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
+ isa<BitCastInst>(GlobalUser) ||
+ isa<GetElementPtrInst>(GlobalUser)) &&
"Only expect load and stores!");
}
}
Modified: llvm/branches/R600/lib/Transforms/IPO/InlineAlways.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/IPO/InlineAlways.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/IPO/InlineAlways.cpp (original)
+++ llvm/branches/R600/lib/Transforms/IPO/InlineAlways.cpp Tue Oct 2 09:15:33 2012
@@ -65,7 +65,7 @@
/// \brief Minimal filter to detect invalid constructs for inlining.
static bool isInlineViable(Function &F) {
- bool ReturnsTwice = F.hasFnAttr(Attribute::ReturnsTwice);
+ bool ReturnsTwice = F.getFnAttributes().hasReturnsTwiceAttr();
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
// Disallow inlining of functions which contain an indirect branch.
if (isa<IndirectBrInst>(BI->getTerminator()))
@@ -114,7 +114,7 @@
if (Callee->isDeclaration()) return InlineCost::getNever();
// Return never for anything not marked as always inline.
- if (!Callee->hasFnAttr(Attribute::AlwaysInline))
+ if (!Callee->getFnAttributes().hasAlwaysInlineAttr())
return InlineCost::getNever();
// Do some minimal analysis to preclude non-viable functions.
Modified: llvm/branches/R600/lib/Transforms/IPO/Inliner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/IPO/Inliner.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/IPO/Inliner.cpp (original)
+++ llvm/branches/R600/lib/Transforms/IPO/Inliner.cpp Tue Oct 2 09:15:33 2012
@@ -93,10 +93,10 @@
// If the inlined function had a higher stack protection level than the
// calling function, then bump up the caller's stack protection level.
- if (Callee->hasFnAttr(Attribute::StackProtectReq))
+ if (Callee->getFnAttributes().hasStackProtectReqAttr())
Caller->addFnAttr(Attribute::StackProtectReq);
- else if (Callee->hasFnAttr(Attribute::StackProtect) &&
- !Caller->hasFnAttr(Attribute::StackProtectReq))
+ else if (Callee->getFnAttributes().hasStackProtectAttr() &&
+ !Caller->getFnAttributes().hasStackProtectReqAttr())
Caller->addFnAttr(Attribute::StackProtect);
// Look at all of the allocas that we inlined through this call site. If we
@@ -209,7 +209,7 @@
// would decrease the threshold.
Function *Caller = CS.getCaller();
bool OptSize = Caller && !Caller->isDeclaration() &&
- Caller->hasFnAttr(Attribute::OptimizeForSize);
+ Caller->getFnAttributes().hasOptimizeForSizeAttr();
if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
OptSizeThreshold < thres)
thres = OptSizeThreshold;
@@ -217,7 +217,7 @@
// Listen to the inlinehint attribute when it would increase the threshold.
Function *Callee = CS.getCalledFunction();
bool InlineHint = Callee && !Callee->isDeclaration() &&
- Callee->hasFnAttr(Attribute::InlineHint);
+ Callee->getFnAttributes().hasInlineHintAttr();
if (InlineHint && HintThreshold > thres)
thres = HintThreshold;
@@ -533,7 +533,7 @@
// Handle the case when this function is called and we only want to care
// about always-inline functions. This is a bit of a hack to share code
// between here and the InlineAlways pass.
- if (AlwaysInlineOnly && !F->hasFnAttr(Attribute::AlwaysInline))
+ if (AlwaysInlineOnly && !F->getFnAttributes().hasAlwaysInlineAttr())
continue;
// If the only remaining users of the function are dead constants, remove
Modified: llvm/branches/R600/lib/Transforms/IPO/PassManagerBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/IPO/PassManagerBuilder.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/IPO/PassManagerBuilder.cpp (original)
+++ llvm/branches/R600/lib/Transforms/IPO/PassManagerBuilder.cpp Tue Oct 2 09:15:33 2012
@@ -41,7 +41,7 @@
cl::desc("Run GVN instead of Early CSE after vectorization passes"));
static cl::opt<bool> UseNewSROA("use-new-sroa",
- cl::init(true), cl::Hidden,
+ cl::init(false), cl::Hidden,
cl::desc("Enable the new, experimental SROA pass"));
PassManagerBuilder::PassManagerBuilder() {
@@ -211,13 +211,12 @@
// FIXME: We shouldn't bother with this anymore.
MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
- // GlobalOpt already deletes dead functions and globals, at -O3 try a
+ // GlobalOpt already deletes dead functions and globals, at -O2 try a
// late pass of GlobalDCE. It is capable of deleting dead cycles.
- if (OptLevel > 2)
+ if (OptLevel > 1) {
MPM.add(createGlobalDCEPass()); // Remove dead fns and globals.
-
- if (OptLevel > 1)
MPM.add(createConstantMergePass()); // Merge dup global constants
+ }
}
addExtensionsToPM(EP_OptimizerLast, MPM);
}
Modified: llvm/branches/R600/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/branches/R600/lib/Transforms/InstCombine/InstCombineCalls.cpp Tue Oct 2 09:15:33 2012
@@ -1037,7 +1037,7 @@
if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
Attributes RAttrs = CallerPAL.getRetAttributes();
- if (RAttrs & Attribute::typeIncompatible(NewRetTy))
+ if (RAttrs & Attributes::typeIncompatible(NewRetTy))
return false; // Attribute not compatible with transformed value.
}
@@ -1067,7 +1067,7 @@
return false; // Cannot transform this parameter value.
Attributes Attrs = CallerPAL.getParamAttributes(i + 1);
- if (Attrs & Attribute::typeIncompatible(ParamTy))
+ if (Attrs & Attributes::typeIncompatible(ParamTy))
return false; // Attribute not compatible with transformed value.
// If the parameter is passed as a byval argument, then we have to have a
@@ -1141,7 +1141,7 @@
// If the return value is not being used, the type may not be compatible
// with the existing attributes. Wipe out any problematic attributes.
- RAttrs &= ~Attribute::typeIncompatible(NewRetTy);
+ RAttrs &= ~Attributes::typeIncompatible(NewRetTy);
// Add the new return attributes.
if (RAttrs)
Modified: llvm/branches/R600/lib/Transforms/InstCombine/InstCombineSelect.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/InstCombine/InstCombineSelect.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/InstCombine/InstCombineSelect.cpp (original)
+++ llvm/branches/R600/lib/Transforms/InstCombine/InstCombineSelect.cpp Tue Oct 2 09:15:33 2012
@@ -903,7 +903,7 @@
return &SI;
}
- if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) {
+ if (VectorType *VecTy = dyn_cast<VectorType>(SI.getType())) {
unsigned VWidth = VecTy->getNumElements();
APInt UndefElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
@@ -912,6 +912,28 @@
return ReplaceInstUsesWith(SI, V);
return &SI;
}
+
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(CondVal)) {
+ // Form a shufflevector instruction.
+ SmallVector<Constant *, 8> Mask(VWidth);
+ Type *Int32Ty = Type::getInt32Ty(CV->getContext());
+ for (unsigned i = 0; i != VWidth; ++i) {
+ Constant *Elem = cast<Constant>(CV->getOperand(i));
+ if (ConstantInt *E = dyn_cast<ConstantInt>(Elem))
+ Mask[i] = ConstantInt::get(Int32Ty, i + (E->isZero() ? VWidth : 0));
+ else if (isa<UndefValue>(Elem))
+ Mask[i] = UndefValue::get(Int32Ty);
+ else
+ return 0;
+ }
+ Constant *MaskVal = ConstantVector::get(Mask);
+ Value *V = Builder->CreateShuffleVector(TrueVal, FalseVal, MaskVal);
+ return ReplaceInstUsesWith(SI, V);
+ }
+
+ if (isa<ConstantAggregateZero>(CondVal)) {
+ return ReplaceInstUsesWith(SI, FalseVal);
+ }
}
return 0;
Modified: llvm/branches/R600/lib/Transforms/Instrumentation/AddressSanitizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Instrumentation/AddressSanitizer.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Instrumentation/AddressSanitizer.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Instrumentation/AddressSanitizer.cpp Tue Oct 2 09:15:33 2012
@@ -854,7 +854,7 @@
// If needed, insert __asan_init before checking for AddressSafety attr.
maybeInsertAsanInitAtFunctionEntry(F);
- if (!F.hasFnAttr(Attribute::AddressSafety)) return false;
+ if (!F.getFnAttributes().hasAddressSafetyAttr()) return false;
if (!ClDebugFunc.empty() && ClDebugFunc != F.getName())
return false;
Modified: llvm/branches/R600/lib/Transforms/Scalar/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Scalar/CodeGenPrepare.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Scalar/CodeGenPrepare.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Scalar/CodeGenPrepare.cpp Tue Oct 2 09:15:33 2012
@@ -149,7 +149,7 @@
TLInfo = &getAnalysis<TargetLibraryInfo>();
DT = getAnalysisIfAvailable<DominatorTree>();
PFI = getAnalysisIfAvailable<ProfileInfo>();
- OptSize = F.hasFnAttr(Attribute::OptimizeForSize);
+ OptSize = F.getFnAttributes().hasOptimizeForSizeAttr();
/// This optimization identifies DIV instructions that can be
/// profitably bypassed and carried out with a shorter, faster divide.
@@ -226,7 +226,8 @@
// edge, just collapse it.
BasicBlock *SinglePred = BB->getSinglePredecessor();
- if (!SinglePred || SinglePred == BB) continue;
+ // Don't merge if BB's address is taken.
+ if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
if (Term && !Term->isConditional()) {
@@ -788,7 +789,7 @@
}
// If we eliminated all predecessors of the block, delete the block now.
- if (Changed && pred_begin(BB) == pred_end(BB))
+ if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
BB->eraseFromParent();
return Changed;
Modified: llvm/branches/R600/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp Tue Oct 2 09:15:33 2012
@@ -235,6 +235,11 @@
// This case never fires - remove it.
CI.getCaseSuccessor()->removePredecessor(BB);
SI->removeCase(CI); // Does not invalidate the iterator.
+
+ // The condition can be modified by removePredecessor's PHI simplification
+ // logic.
+ Cond = SI->getCondition();
+
++NumDeadCases;
Changed = true;
} else if (State == LazyValueInfo::True) {
Modified: llvm/branches/R600/lib/Transforms/Scalar/LoopUnrollPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Scalar/LoopUnrollPass.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Scalar/LoopUnrollPass.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Scalar/LoopUnrollPass.cpp Tue Oct 2 09:15:33 2012
@@ -145,7 +145,7 @@
// not user specified.
unsigned Threshold = CurrentThreshold;
if (!UserThreshold &&
- Header->getParent()->hasFnAttr(Attribute::OptimizeForSize))
+ Header->getParent()->getFnAttributes().hasOptimizeForSizeAttr())
Threshold = OptSizeUnrollThreshold;
// Find trip count and trip multiple if count is not available
Modified: llvm/branches/R600/lib/Transforms/Scalar/LoopUnswitch.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Scalar/LoopUnswitch.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Scalar/LoopUnswitch.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Scalar/LoopUnswitch.cpp Tue Oct 2 09:15:33 2012
@@ -638,7 +638,7 @@
// Check to see if it would be profitable to unswitch current loop.
// Do not do non-trivial unswitch while optimizing for size.
- if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize))
+ if (OptimizeForSize || F->getFnAttributes().hasOptimizeForSizeAttr())
return false;
UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
Modified: llvm/branches/R600/lib/Transforms/Scalar/SROA.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Scalar/SROA.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Scalar/SROA.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Scalar/SROA.cpp Tue Oct 2 09:15:33 2012
@@ -30,7 +30,6 @@
#include "llvm/DebugInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
#include "llvm/IRBuilder.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
@@ -42,18 +41,15 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/InstVisitor.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -155,16 +151,12 @@
/// memory use itself with a particular partition. As a consequence there is
/// intentionally overlap between various uses of the same partition.
struct PartitionUse : public ByteRange {
- /// \brief The user of this range of the alloca.
- AssertingVH<Instruction> User;
+ /// \brief The use in question. Provides access to both user and used value.
+ Use* U;
- /// \brief The particular pointer value derived from this alloca in use.
- AssertingVH<Instruction> Ptr;
-
- PartitionUse() : ByteRange(), User(), Ptr() {}
- PartitionUse(uint64_t BeginOffset, uint64_t EndOffset,
- Instruction *User, Instruction *Ptr)
- : ByteRange(BeginOffset, EndOffset), User(User), Ptr(Ptr) {}
+ PartitionUse() : ByteRange(), U() {}
+ PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U)
+ : ByteRange(BeginOffset, EndOffset), U(U) {}
};
/// \brief Construct a partitioning of a particular alloca.
@@ -202,11 +194,11 @@
use_iterator use_begin(const_iterator I) { return Uses[I - begin()].begin(); }
use_iterator use_end(unsigned Idx) { return Uses[Idx].end(); }
use_iterator use_end(const_iterator I) { return Uses[I - begin()].end(); }
- void use_push_back(unsigned Idx, const PartitionUse &U) {
- Uses[Idx].push_back(U);
+ void use_push_back(unsigned Idx, const PartitionUse &PU) {
+ Uses[Idx].push_back(PU);
}
- void use_push_back(const_iterator I, const PartitionUse &U) {
- Uses[I - begin()].push_back(U);
+ void use_push_back(const_iterator I, const PartitionUse &PU) {
+ Uses[I - begin()].push_back(PU);
}
void use_erase(unsigned Idx, use_iterator UI) { Uses[Idx].erase(UI); }
void use_erase(const_iterator I, use_iterator UI) {
@@ -267,10 +259,9 @@
/// When manipulating PHI nodes or selects, they can use more than one
/// partition of an alloca. We store a special mapping to allow finding the
/// partition referenced by each of these operands, if any.
- iterator findPartitionForPHIOrSelectOperand(Instruction &I, Value *Op) {
- SmallDenseMap<std::pair<Instruction *, Value *>,
- std::pair<unsigned, unsigned> >::const_iterator MapIt
- = PHIOrSelectOpMap.find(std::make_pair(&I, Op));
+ iterator findPartitionForPHIOrSelectOperand(Use *U) {
+ SmallDenseMap<Use *, std::pair<unsigned, unsigned> >::const_iterator MapIt
+ = PHIOrSelectOpMap.find(U);
if (MapIt == PHIOrSelectOpMap.end())
return end();
@@ -282,11 +273,9 @@
///
/// Similar to mapping these operands back to the partitions, this maps
/// directly to the use structure of that partition.
- use_iterator findPartitionUseForPHIOrSelectOperand(Instruction &I,
- Value *Op) {
- SmallDenseMap<std::pair<Instruction *, Value *>,
- std::pair<unsigned, unsigned> >::const_iterator MapIt
- = PHIOrSelectOpMap.find(std::make_pair(&I, Op));
+ use_iterator findPartitionUseForPHIOrSelectOperand(Use *U) {
+ SmallDenseMap<Use *, std::pair<unsigned, unsigned> >::const_iterator MapIt
+ = PHIOrSelectOpMap.find(U);
assert(MapIt != PHIOrSelectOpMap.end());
return Uses[MapIt->second.first].begin() + MapIt->second.second;
}
@@ -373,8 +362,7 @@
SmallDenseMap<Instruction *, std::pair<uint64_t, bool> > PHIOrSelectSizes;
/// \brief Auxiliary information for particular PHI or select operands.
- SmallDenseMap<std::pair<Instruction *, Value *>,
- std::pair<unsigned, unsigned>, 4> PHIOrSelectOpMap;
+ SmallDenseMap<Use *, std::pair<unsigned, unsigned>, 4> PHIOrSelectOpMap;
/// \brief A utility routine called from the constructor.
///
@@ -401,6 +389,8 @@
const uint64_t AllocSize;
AllocaPartitioning &P;
+ SmallPtrSet<Use *, 8> VisitedUses;
+
struct OffsetUse {
Use *U;
int64_t Offset;
@@ -412,14 +402,12 @@
int64_t Offset;
void enqueueUsers(Instruction &I, int64_t UserOffset) {
- SmallPtrSet<User *, 8> UserSet;
for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
UI != UE; ++UI) {
- if (!UserSet.insert(*UI))
- continue;
-
- OffsetUse OU = { &UI.getUse(), UserOffset };
- Queue.push_back(OU);
+ if (VisitedUses.insert(&UI.getUse())) {
+ OffsetUse OU = { &UI.getUse(), UserOffset };
+ Queue.push_back(OU);
+ }
}
}
@@ -662,11 +650,14 @@
bool Inserted = false;
llvm::tie(PMI, Inserted)
= MemTransferPartitionMap.insert(std::make_pair(&II, NewIdx));
- if (!Inserted && Offsets.IsSplittable) {
+ if (Offsets.IsSplittable &&
+ (!Inserted || II.getRawSource() == II.getRawDest())) {
// We've found a memory transfer intrinsic which refers to the alloca as
- // both a source and dest. We refuse to split these to simplify splitting
- // logic. If possible, SROA will still split them into separate allocas
- // and then re-analyze.
+ // both a source and dest. This is detected either by direct equality of
+ // the operand values, or when we visit the intrinsic twice due to two
+ // different chains of values leading to it. We refuse to split these to
+ // simplify splitting logic. If possible, SROA will still split them into
+ // separate allocas and then re-analyze.
Offsets.IsSplittable = false;
P.Partitions[PMI->second].IsSplittable = false;
P.Partitions[NewIdx].IsSplittable = false;
@@ -855,12 +846,11 @@
B = llvm::prior(B);
for (iterator I = B, E = P.end(); I != E && I->BeginOffset < EndOffset;
++I) {
- PartitionUse NewUse(std::max(I->BeginOffset, BeginOffset),
- std::min(I->EndOffset, EndOffset),
- &User, cast<Instruction>(*U));
- P.use_push_back(I, NewUse);
+ PartitionUse NewPU(std::max(I->BeginOffset, BeginOffset),
+ std::min(I->EndOffset, EndOffset), U);
+ P.use_push_back(I, NewPU);
if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser()))
- P.PHIOrSelectOpMap[std::make_pair(&User, U->get())]
+ P.PHIOrSelectOpMap[U]
= std::make_pair(I - P.begin(), P.Uses[I - P.begin()].size() - 1);
}
}
@@ -1112,22 +1102,16 @@
Type *AllocaPartitioning::getCommonType(iterator I) const {
Type *Ty = 0;
for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
- if (isa<IntrinsicInst>(*UI->User))
+ if (isa<IntrinsicInst>(*UI->U->getUser()))
continue;
if (UI->BeginOffset != I->BeginOffset || UI->EndOffset != I->EndOffset)
continue;
Type *UserTy = 0;
- if (LoadInst *LI = dyn_cast<LoadInst>(&*UI->User)) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(UI->U->getUser())) {
UserTy = LI->getType();
- } else if (StoreInst *SI = dyn_cast<StoreInst>(&*UI->User)) {
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) {
UserTy = SI->getValueOperand()->getType();
- } else if (SelectInst *SI = dyn_cast<SelectInst>(&*UI->User)) {
- if (PointerType *PtrTy = dyn_cast<PointerType>(SI->getType()))
- UserTy = PtrTy->getElementType();
- } else if (PHINode *PN = dyn_cast<PHINode>(&*UI->User)) {
- if (PointerType *PtrTy = dyn_cast<PointerType>(PN->getType()))
- UserTy = PtrTy->getElementType();
}
if (Ty && Ty != UserTy)
@@ -1154,8 +1138,8 @@
for (const_use_iterator UI = use_begin(I), UE = use_end(I);
UI != UE; ++UI) {
OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") "
- << "used by: " << *UI->User << "\n";
- if (MemTransferInst *II = dyn_cast<MemTransferInst>(&*UI->User)) {
+ << "used by: " << *UI->U->getUser() << "\n";
+ if (MemTransferInst *II = dyn_cast<MemTransferInst>(UI->U->getUser())) {
const MemTransferOffsets &MTO = MemTransferInstData.lookup(II);
bool IsDest;
if (!MTO.IsSplittable)
@@ -1335,6 +1319,7 @@
static char ID;
private:
+ friend class PHIOrSelectSpeculator;
friend class AllocaPartitionRewriter;
friend class AllocaPartitionVectorRewriter;
@@ -1707,19 +1692,20 @@
(EndOffset - BeginOffset) != VecSize)
return false;
- if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&*I->User)) {
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
if (MI->isVolatile())
return false;
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(&*I->User)) {
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
const AllocaPartitioning::MemTransferOffsets &MTO
= P.getMemTransferOffsets(*MTI);
if (!MTO.IsSplittable)
return false;
}
- } else if (I->Ptr->getType()->getPointerElementType()->isStructTy()) {
+ } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) {
// Disable vector promotion when there are loads or stores of an FCA.
return false;
- } else if (!isa<LoadInst>(*I->User) && !isa<StoreInst>(*I->User)) {
+ } else if (!isa<LoadInst>(I->U->getUser()) &&
+ !isa<StoreInst>(I->U->getUser())) {
return false;
}
}
@@ -1748,20 +1734,20 @@
// splittable later) and lose the ability to promote each element access.
bool WholeAllocaOp = false;
for (; I != E; ++I) {
- if (LoadInst *LI = dyn_cast<LoadInst>(&*I->User)) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
if (LI->isVolatile() || !LI->getType()->isIntegerTy())
return false;
if (LI->getType() == Ty)
WholeAllocaOp = true;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(&*I->User)) {
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
if (SI->isVolatile() || !SI->getValueOperand()->getType()->isIntegerTy())
return false;
if (SI->getValueOperand()->getType() == Ty)
WholeAllocaOp = true;
- } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&*I->User)) {
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
if (MI->isVolatile())
return false;
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(&*I->User)) {
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
const AllocaPartitioning::MemTransferOffsets &MTO
= P.getMemTransferOffsets(*MTI);
if (!MTO.IsSplittable)
@@ -1775,6 +1761,278 @@
}
namespace {
+/// \brief Visitor to speculate PHIs and Selects where possible.
+class PHIOrSelectSpeculator : public InstVisitor<PHIOrSelectSpeculator> {
+ // Befriend the base class so it can delegate to private visit methods.
+ friend class llvm::InstVisitor<PHIOrSelectSpeculator>;
+
+ const TargetData &TD;
+ AllocaPartitioning &P;
+ SROA &Pass;
+
+public:
+ PHIOrSelectSpeculator(const TargetData &TD, AllocaPartitioning &P, SROA &Pass)
+ : TD(TD), P(P), Pass(Pass) {}
+
+ /// \brief Visit the users of the alloca partition and rewrite them.
+ void visitUsers(AllocaPartitioning::const_use_iterator I,
+ AllocaPartitioning::const_use_iterator E) {
+ for (; I != E; ++I)
+ visit(cast<Instruction>(I->U->getUser()));
+ }
+
+private:
+ // By default, skip this instruction.
+ void visitInstruction(Instruction &I) {}
+
+ /// PHI instructions that use an alloca and are subsequently loaded can be
+ /// rewritten to load both input pointers in the pred blocks and then PHI the
+ /// results, allowing the load of the alloca to be promoted.
+ /// From this:
+ /// %P2 = phi [i32* %Alloca, i32* %Other]
+ /// %V = load i32* %P2
+ /// to:
+ /// %V1 = load i32* %Alloca -> will be mem2reg'd
+ /// ...
+ /// %V2 = load i32* %Other
+ /// ...
+ /// %V = phi [i32 %V1, i32 %V2]
+ ///
+ /// We can do this to a select if its only uses are loads and if the operands
+ /// to the select can be loaded unconditionally.
+ ///
+ /// FIXME: This should be hoisted into a generic utility, likely in
+ /// Transforms/Util/Local.h
+ bool isSafePHIToSpeculate(PHINode &PN, SmallVectorImpl<LoadInst *> &Loads) {
+ // For now, we can only do this promotion if the load is in the same block
+ // as the PHI, and if there are no stores between the phi and load.
+ // TODO: Allow recursive phi users.
+ // TODO: Allow stores.
+ BasicBlock *BB = PN.getParent();
+ unsigned MaxAlign = 0;
+ for (Value::use_iterator UI = PN.use_begin(), UE = PN.use_end();
+ UI != UE; ++UI) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ if (LI == 0 || !LI->isSimple()) return false;
+
+ // For now we only allow loads in the same block as the PHI. This is
+ // a common case that happens when instcombine merges two loads through
+ // a PHI.
+ if (LI->getParent() != BB) return false;
+
+ // Ensure that there are no instructions between the PHI and the load that
+ // could store.
+ for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI)
+ if (BBI->mayWriteToMemory())
+ return false;
+
+ MaxAlign = std::max(MaxAlign, LI->getAlignment());
+ Loads.push_back(LI);
+ }
+
+ // We can only transform this if it is safe to push the loads into the
+ // predecessor blocks. The only thing to watch out for is that we can't put
+ // a possibly trapping load in the predecessor if it is a critical edge.
+ for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num;
+ ++Idx) {
+ TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator();
+ Value *InVal = PN.getIncomingValue(Idx);
+
+ // If the value is produced by the terminator of the predecessor (an
+ // invoke) or it has side-effects, there is no valid place to put a load
+ // in the predecessor.
+ if (TI == InVal || TI->mayHaveSideEffects())
+ return false;
+
+ // If the predecessor has a single successor, then the edge isn't
+ // critical.
+ if (TI->getNumSuccessors() == 1)
+ continue;
+
+ // If this pointer is always safe to load, or if we can prove that there
+ // is already a load in the block, then we can move the load to the pred
+ // block.
+ if (InVal->isDereferenceablePointer() ||
+ isSafeToLoadUnconditionally(InVal, TI, MaxAlign, &TD))
+ continue;
+
+ return false;
+ }
+
+ return true;
+ }
+
+ void visitPHINode(PHINode &PN) {
+ DEBUG(dbgs() << " original: " << PN << "\n");
+
+ SmallVector<LoadInst *, 4> Loads;
+ if (!isSafePHIToSpeculate(PN, Loads))
+ return;
+
+ assert(!Loads.empty());
+
+ Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
+ IRBuilder<> PHIBuilder(&PN);
+ PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),
+ PN.getName() + ".sroa.speculated");
+
+ // Get the TBAA tag and alignment to use from one of the loads. It doesn't
+ // matter which one we get and if any differ, it doesn't matter.
+ LoadInst *SomeLoad = cast<LoadInst>(Loads.back());
+ MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
+ unsigned Align = SomeLoad->getAlignment();
+
+ // Rewrite all loads of the PN to use the new PHI.
+ do {
+ LoadInst *LI = Loads.pop_back_val();
+ LI->replaceAllUsesWith(NewPN);
+ Pass.DeadInsts.push_back(LI);
+ } while (!Loads.empty());
+
+ // Inject loads into all of the pred blocks.
+ for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
+ BasicBlock *Pred = PN.getIncomingBlock(Idx);
+ TerminatorInst *TI = Pred->getTerminator();
+ Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx));
+ Value *InVal = PN.getIncomingValue(Idx);
+ IRBuilder<> PredBuilder(TI);
+
+ LoadInst *Load
+ = PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." +
+ Pred->getName()));
+ ++NumLoadsSpeculated;
+ Load->setAlignment(Align);
+ if (TBAATag)
+ Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+ NewPN->addIncoming(Load, Pred);
+
+ Instruction *Ptr = dyn_cast<Instruction>(InVal);
+ if (!Ptr)
+ // No uses to rewrite.
+ continue;
+
+ // Try to lookup and rewrite any partition uses corresponding to this phi
+ // input.
+ AllocaPartitioning::iterator PI
+ = P.findPartitionForPHIOrSelectOperand(InUse);
+ if (PI == P.end())
+ continue;
+
+ // Replace the Use in the PartitionUse for this operand with the Use
+ // inside the load.
+ AllocaPartitioning::use_iterator UI
+ = P.findPartitionUseForPHIOrSelectOperand(InUse);
+ assert(isa<PHINode>(*UI->U->getUser()));
+ UI->U = &Load->getOperandUse(Load->getPointerOperandIndex());
+ }
+ DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
+ }
+
+ /// Select instructions that use an alloca and are subsequently loaded can be
+ /// rewritten to load both input pointers and then select between the result,
+ /// allowing the load of the alloca to be promoted.
+ /// From this:
+ /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other
+ /// %V = load i32* %P2
+ /// to:
+ /// %V1 = load i32* %Alloca -> will be mem2reg'd
+ /// %V2 = load i32* %Other
+ /// %V = select i1 %cond, i32 %V1, i32 %V2
+ ///
+ /// We can do this to a select if its only uses are loads and if the operand
+ /// to the select can be loaded unconditionally.
+ bool isSafeSelectToSpeculate(SelectInst &SI,
+ SmallVectorImpl<LoadInst *> &Loads) {
+ Value *TValue = SI.getTrueValue();
+ Value *FValue = SI.getFalseValue();
+ bool TDerefable = TValue->isDereferenceablePointer();
+ bool FDerefable = FValue->isDereferenceablePointer();
+
+ for (Value::use_iterator UI = SI.use_begin(), UE = SI.use_end();
+ UI != UE; ++UI) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ if (LI == 0 || !LI->isSimple()) return false;
+
+ // Both operands to the select need to be dereferencable, either
+ // absolutely (e.g. allocas) or at this point because we can see other
+ // accesses to it.
+ if (!TDerefable && !isSafeToLoadUnconditionally(TValue, LI,
+ LI->getAlignment(), &TD))
+ return false;
+ if (!FDerefable && !isSafeToLoadUnconditionally(FValue, LI,
+ LI->getAlignment(), &TD))
+ return false;
+ Loads.push_back(LI);
+ }
+
+ return true;
+ }
+
+ void visitSelectInst(SelectInst &SI) {
+ DEBUG(dbgs() << " original: " << SI << "\n");
+ IRBuilder<> IRB(&SI);
+
+ // If the select isn't safe to speculate, just use simple logic to emit it.
+ SmallVector<LoadInst *, 4> Loads;
+ if (!isSafeSelectToSpeculate(SI, Loads))
+ return;
+
+ Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) };
+ AllocaPartitioning::iterator PIs[2];
+ AllocaPartitioning::PartitionUse PUs[2];
+ for (unsigned i = 0, e = 2; i != e; ++i) {
+ PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]);
+ if (PIs[i] != P.end()) {
+ // If the pointer is within the partitioning, remove the select from
+ // its uses. We'll add in the new loads below.
+ AllocaPartitioning::use_iterator UI
+ = P.findPartitionUseForPHIOrSelectOperand(Ops[i]);
+ PUs[i] = *UI;
+ P.use_erase(PIs[i], UI);
+ }
+ }
+
+ Value *TV = SI.getTrueValue();
+ Value *FV = SI.getFalseValue();
+ // Replace the loads of the select with a select of two loads.
+ while (!Loads.empty()) {
+ LoadInst *LI = Loads.pop_back_val();
+
+ IRB.SetInsertPoint(LI);
+ LoadInst *TL =
+ IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true");
+ LoadInst *FL =
+ IRB.CreateLoad(FV, LI->getName() + ".sroa.speculate.load.false");
+ NumLoadsSpeculated += 2;
+
+ // Transfer alignment and TBAA info if present.
+ TL->setAlignment(LI->getAlignment());
+ FL->setAlignment(LI->getAlignment());
+ if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {
+ TL->setMetadata(LLVMContext::MD_tbaa, Tag);
+ FL->setMetadata(LLVMContext::MD_tbaa, Tag);
+ }
+
+ Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
+ LI->getName() + ".sroa.speculated");
+
+ LoadInst *Loads[2] = { TL, FL };
+ for (unsigned i = 0, e = 2; i != e; ++i) {
+ if (PIs[i] != P.end()) {
+ Use *LoadUse = &Loads[i]->getOperandUse(0);
+ assert(PUs[i].U->get() == LoadUse->get());
+ PUs[i].U = LoadUse;
+ P.use_push_back(PIs[i], PUs[i]);
+ }
+ }
+
+ DEBUG(dbgs() << " speculated to: " << *V << "\n");
+ LI->replaceAllUsesWith(V);
+ Pass.DeadInsts.push_back(LI);
+ }
+ }
+};
+
/// \brief Visitor to rewrite instructions using a partition of an alloca to
/// use a new alloca.
///
@@ -1813,6 +2071,7 @@
// The offset of the partition user currently being rewritten.
uint64_t BeginOffset, EndOffset;
+ Use *OldUse;
Instruction *OldPtr;
// The name prefix to use when rewriting instructions for this alloca.
@@ -1851,9 +2110,10 @@
for (; I != E; ++I) {
BeginOffset = I->BeginOffset;
EndOffset = I->EndOffset;
- OldPtr = I->Ptr;
+ OldUse = I->U;
+ OldPtr = cast<Instruction>(I->U->get());
NamePrefix = (Twine(NewAI.getName()) + "." + Twine(BeginOffset)).str();
- CanSROA &= visit(I->User);
+ CanSROA &= visit(cast<Instruction>(I->U->getUser()));
}
if (VecTy) {
assert(CanSROA);
@@ -1881,6 +2141,20 @@
return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName(""));
}
+ unsigned getAdjustedAlign(uint64_t Offset) {
+ unsigned NewAIAlign = NewAI.getAlignment();
+ if (!NewAIAlign)
+ NewAIAlign = TD.getABITypeAlignment(NewAI.getAllocatedType());
+ return MinAlign(NewAIAlign, Offset);
+ }
+ unsigned getAdjustedAlign() {
+ return getAdjustedAlign(BeginOffset - NewAllocaBeginOffset);
+ }
+
+ bool isTypeAlignSufficient(Type *Ty) {
+ return TD.getABITypeAlignment(Ty) >= getAdjustedAlign();
+ }
+
ConstantInt *getIndex(IRBuilder<> &IRB, uint64_t Offset) {
assert(VecTy && "Can only call getIndex when rewriting a vector");
uint64_t RelOffset = Offset - NewAllocaBeginOffset;
@@ -1893,7 +2167,8 @@
Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy,
uint64_t Offset) {
assert(IntPromotionTy && "Alloca is not an integer we can extract from");
- Value *V = IRB.CreateLoad(&NewAI, getName(".load"));
+ Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load"));
assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t RelOffset = Offset - NewAllocaBeginOffset;
if (RelOffset)
@@ -1909,7 +2184,7 @@
StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) {
IntegerType *Ty = cast<IntegerType>(V->getType());
if (Ty == IntPromotionTy)
- return IRB.CreateStore(V, &NewAI);
+ return IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
assert(Ty->getBitWidth() < IntPromotionTy->getBitWidth() &&
"Cannot insert a larger integer!");
@@ -1921,10 +2196,12 @@
APInt Mask = ~Ty->getMask().zext(IntPromotionTy->getBitWidth())
.shl(RelOffset*8);
- Value *Old = IRB.CreateAnd(IRB.CreateLoad(&NewAI, getName(".oldload")),
+ Value *Old = IRB.CreateAnd(IRB.CreateAlignedLoad(&NewAI,
+ NewAI.getAlignment(),
+ getName(".oldload")),
Mask, getName(".mask"));
- return IRB.CreateStore(IRB.CreateOr(Old, V, getName(".insert")),
- &NewAI);
+ return IRB.CreateAlignedStore(IRB.CreateOr(Old, V, getName(".insert")),
+ &NewAI, NewAI.getAlignment());
}
void deleteIfTriviallyDead(Value *V) {
@@ -1946,12 +2223,12 @@
Value *Result;
if (LI.getType() == VecTy->getElementType() ||
BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
- Result
- = IRB.CreateExtractElement(IRB.CreateLoad(&NewAI, getName(".load")),
- getIndex(IRB, BeginOffset),
- getName(".extract"));
+ Result = IRB.CreateExtractElement(
+ IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
+ getIndex(IRB, BeginOffset), getName(".extract"));
} else {
- Result = IRB.CreateLoad(&NewAI, getName(".load"));
+ Result = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load"));
}
if (Result->getType() != LI.getType())
Result = getValueCast(IRB, Result, LI.getType());
@@ -1986,6 +2263,8 @@
Value *NewPtr = getAdjustedAllocaPtr(IRB,
LI.getPointerOperand()->getType());
LI.setOperand(0, NewPtr);
+ if (LI.getAlignment() || !isTypeAlignSufficient(LI.getType()))
+ LI.setAlignment(getAdjustedAlign());
DEBUG(dbgs() << " to: " << LI << "\n");
deleteIfTriviallyDead(OldOp);
@@ -1999,13 +2278,14 @@
BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
if (V->getType() != ElementTy)
V = getValueCast(IRB, V, ElementTy);
- V = IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")), V,
- getIndex(IRB, BeginOffset),
+ LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load"));
+ V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset),
getName(".insert"));
} else if (V->getType() != VecTy) {
V = getValueCast(IRB, V, VecTy);
}
- StoreInst *Store = IRB.CreateStore(V, &NewAI);
+ StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
Pass.DeadInsts.push_back(&SI);
(void)Store;
@@ -2036,6 +2316,12 @@
Value *NewPtr = getAdjustedAllocaPtr(IRB,
SI.getPointerOperand()->getType());
SI.setOperand(1, NewPtr);
+ if (SI.getAlignment() ||
+ !isTypeAlignSufficient(SI.getValueOperand()->getType()))
+ SI.setAlignment(getAdjustedAlign());
+ if (SI.getAlignment())
+ SI.setAlignment(MinAlign(NewAI.getAlignment(),
+ BeginOffset - NewAllocaBeginOffset));
DEBUG(dbgs() << " to: " << SI << "\n");
deleteIfTriviallyDead(OldOp);
@@ -2051,6 +2337,9 @@
// pointer to the new alloca.
if (!isa<Constant>(II.getLength())) {
II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType()));
+ Type *CstTy = II.getAlignmentCst()->getType();
+ II.setAlignment(ConstantInt::get(CstTy, getAdjustedAlign()));
+
deleteIfTriviallyDead(OldPtr);
return false;
}
@@ -2070,11 +2359,10 @@
!TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) {
Type *SizeTy = II.getLength()->getType();
Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset);
-
CallInst *New
= IRB.CreateMemSet(getAdjustedAllocaPtr(IRB,
II.getRawDest()->getType()),
- II.getValue(), Size, II.getAlignment(),
+ II.getValue(), Size, getAdjustedAlign(),
II.isVolatile());
(void)New;
DEBUG(dbgs() << " to: " << *New << "\n");
@@ -2112,11 +2400,13 @@
// If this is an element-wide memset of a vectorizable alloca, insert it.
if (VecTy && (BeginOffset > NewAllocaBeginOffset ||
EndOffset < NewAllocaEndOffset)) {
- StoreInst *Store = IRB.CreateStore(
- IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")), V,
- getIndex(IRB, BeginOffset),
+ StoreInst *Store = IRB.CreateAlignedStore(
+ IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI,
+ NewAI.getAlignment(),
+ getName(".load")),
+ V, getIndex(IRB, BeginOffset),
getName(".insert")),
- &NewAI);
+ &NewAI, NewAI.getAlignment());
(void)Store;
DEBUG(dbgs() << " to: " << *Store << "\n");
return true;
@@ -2134,7 +2424,8 @@
assert(V->getType() == VecTy);
}
- Value *New = IRB.CreateStore(V, &NewAI, II.isVolatile());
+ Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
+ II.isVolatile());
(void)New;
DEBUG(dbgs() << " to: " << *New << "\n");
return !II.isVolatile();
@@ -2153,6 +2444,16 @@
const AllocaPartitioning::MemTransferOffsets &MTO
= P.getMemTransferOffsets(II);
+ // Compute the relative offset within the transfer.
+ unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ APInt RelOffset(IntPtrWidth, BeginOffset - (IsDest ? MTO.DestBegin
+ : MTO.SourceBegin));
+
+ unsigned Align = II.getAlignment();
+ if (Align > 1)
+ Align = MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(),
+ MinAlign(II.getAlignment(), getAdjustedAlign()));
+
// For unsplit intrinsics, we simply modify the source and destination
// pointers in place. This isn't just an optimization, it is a matter of
// correctness. With unsplit intrinsics we may be dealing with transfers
@@ -2167,6 +2468,9 @@
else
II.setSource(getAdjustedAllocaPtr(IRB, II.getRawSource()->getType()));
+ Type *CstTy = II.getAlignmentCst()->getType();
+ II.setAlignment(ConstantInt::get(CstTy, Align));
+
DEBUG(dbgs() << " to: " << II << "\n");
deleteIfTriviallyDead(OldOp);
return false;
@@ -2177,11 +2481,6 @@
// memmove with memcpy, and we don't need to worry about all manner of
// downsides to splitting and transforming the operations.
- // Compute the relative offset within the transfer.
- unsigned IntPtrWidth = TD.getPointerSizeInBits();
- APInt RelOffset(IntPtrWidth, BeginOffset - (IsDest ? MTO.DestBegin
- : MTO.SourceBegin));
-
// If this doesn't map cleanly onto the alloca type, and that type isn't
// a single value type, just emit a memcpy.
bool EmitMemCpy
@@ -2239,8 +2538,7 @@
CallInst *New = IRB.CreateMemCpy(IsDest ? OurPtr : OtherPtr,
IsDest ? OtherPtr : OurPtr,
- Size, II.getAlignment(),
- II.isVolatile());
+ Size, Align, II.isVolatile());
(void)New;
DEBUG(dbgs() << " to: " << *New << "\n");
return false;
@@ -2254,22 +2552,25 @@
Value *Src;
if (IsVectorElement && !IsDest) {
// We have to extract rather than load.
- Src = IRB.CreateExtractElement(IRB.CreateLoad(SrcPtr,
- getName(".copyload")),
- getIndex(IRB, BeginOffset),
- getName(".copyextract"));
+ Src = IRB.CreateExtractElement(
+ IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")),
+ getIndex(IRB, BeginOffset),
+ getName(".copyextract"));
} else {
- Src = IRB.CreateLoad(SrcPtr, II.isVolatile(), getName(".copyload"));
+ Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
+ getName(".copyload"));
}
if (IsVectorElement && IsDest) {
// We have to insert into a loaded copy before storing.
- Src = IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")),
- Src, getIndex(IRB, BeginOffset),
- getName(".insert"));
+ Src = IRB.CreateInsertElement(
+ IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
+ Src, getIndex(IRB, BeginOffset),
+ getName(".insert"));
}
- Value *Store = IRB.CreateStore(Src, DstPtr, II.isVolatile());
+ StoreInst *Store = cast<StoreInst>(
+ IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile()));
(void)Store;
DEBUG(dbgs() << " to: " << *Store << "\n");
return !II.isVolatile();
@@ -2300,215 +2601,25 @@
return true;
}
- /// PHI instructions that use an alloca and are subsequently loaded can be
- /// rewritten to load both input pointers in the pred blocks and then PHI the
- /// results, allowing the load of the alloca to be promoted.
- /// From this:
- /// %P2 = phi [i32* %Alloca, i32* %Other]
- /// %V = load i32* %P2
- /// to:
- /// %V1 = load i32* %Alloca -> will be mem2reg'd
- /// ...
- /// %V2 = load i32* %Other
- /// ...
- /// %V = phi [i32 %V1, i32 %V2]
- ///
- /// We can do this to a select if its only uses are loads and if the operand
- /// to the select can be loaded unconditionally.
- ///
- /// FIXME: This should be hoisted into a generic utility, likely in
- /// Transforms/Util/Local.h
- bool isSafePHIToSpeculate(PHINode &PN, SmallVectorImpl<LoadInst *> &Loads) {
- // For now, we can only do this promotion if the load is in the same block
- // as the PHI, and if there are no stores between the phi and load.
- // TODO: Allow recursive phi users.
- // TODO: Allow stores.
- BasicBlock *BB = PN.getParent();
- unsigned MaxAlign = 0;
- for (Value::use_iterator UI = PN.use_begin(), UE = PN.use_end();
- UI != UE; ++UI) {
- LoadInst *LI = dyn_cast<LoadInst>(*UI);
- if (LI == 0 || !LI->isSimple()) return false;
-
- // For now we only allow loads in the same block as the PHI. This is
- // a common case that happens when instcombine merges two loads through
- // a PHI.
- if (LI->getParent() != BB) return false;
-
- // Ensure that there are no instructions between the PHI and the load that
- // could store.
- for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI)
- if (BBI->mayWriteToMemory())
- return false;
-
- MaxAlign = std::max(MaxAlign, LI->getAlignment());
- Loads.push_back(LI);
- }
-
- // We can only transform this if it is safe to push the loads into the
- // predecessor blocks. The only thing to watch out for is that we can't put
- // a possibly trapping load in the predecessor if it is a critical edge.
- for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num;
- ++Idx) {
- TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator();
- Value *InVal = PN.getIncomingValue(Idx);
-
- // If the value is produced by the terminator of the predecessor (an
- // invoke) or it has side-effects, there is no valid place to put a load
- // in the predecessor.
- if (TI == InVal || TI->mayHaveSideEffects())
- return false;
-
- // If the predecessor has a single successor, then the edge isn't
- // critical.
- if (TI->getNumSuccessors() == 1)
- continue;
-
- // If this pointer is always safe to load, or if we can prove that there
- // is already a load in the block, then we can move the load to the pred
- // block.
- if (InVal->isDereferenceablePointer() ||
- isSafeToLoadUnconditionally(InVal, TI, MaxAlign, &TD))
- continue;
-
- return false;
- }
-
- return true;
- }
-
bool visitPHINode(PHINode &PN) {
DEBUG(dbgs() << " original: " << PN << "\n");
+
// We would like to compute a new pointer in only one place, but have it be
// as local as possible to the PHI. To do that, we re-use the location of
// the old pointer, which necessarily must be in the right position to
// dominate the PHI.
IRBuilder<> PtrBuilder(cast<Instruction>(OldPtr));
- SmallVector<LoadInst *, 4> Loads;
- if (!isSafePHIToSpeculate(PN, Loads)) {
- Value *NewPtr = getAdjustedAllocaPtr(PtrBuilder, OldPtr->getType());
- // Replace the operands which were using the old pointer.
- User::op_iterator OI = PN.op_begin(), OE = PN.op_end();
- for (; OI != OE; ++OI)
- if (*OI == OldPtr)
- *OI = NewPtr;
-
- DEBUG(dbgs() << " to: " << PN << "\n");
- deleteIfTriviallyDead(OldPtr);
- return false;
- }
- assert(!Loads.empty());
-
- Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
- IRBuilder<> PHIBuilder(&PN);
- PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues());
- NewPN->takeName(&PN);
-
- // Get the TBAA tag and alignment to use from one of the loads. It doesn't
- // matter which one we get and if any differ, it doesn't matter.
- LoadInst *SomeLoad = cast<LoadInst>(Loads.back());
- MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
- unsigned Align = SomeLoad->getAlignment();
Value *NewPtr = getAdjustedAllocaPtr(PtrBuilder, OldPtr->getType());
+ // Replace the operands which were using the old pointer.
+ User::op_iterator OI = PN.op_begin(), OE = PN.op_end();
+ for (; OI != OE; ++OI)
+ if (*OI == OldPtr)
+ *OI = NewPtr;
- // Rewrite all loads of the PN to use the new PHI.
- do {
- LoadInst *LI = Loads.pop_back_val();
- LI->replaceAllUsesWith(NewPN);
- Pass.DeadInsts.push_back(LI);
- } while (!Loads.empty());
-
- // Inject loads into all of the pred blocks.
- for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
- BasicBlock *Pred = PN.getIncomingBlock(Idx);
- TerminatorInst *TI = Pred->getTerminator();
- Value *InVal = PN.getIncomingValue(Idx);
- IRBuilder<> PredBuilder(TI);
-
- // Map the value to the new alloca pointer if this was the old alloca
- // pointer.
- bool ThisOperand = InVal == OldPtr;
- if (ThisOperand)
- InVal = NewPtr;
-
- LoadInst *Load
- = PredBuilder.CreateLoad(InVal, getName(".sroa.speculate." +
- Pred->getName()));
- ++NumLoadsSpeculated;
- Load->setAlignment(Align);
- if (TBAATag)
- Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
- NewPN->addIncoming(Load, Pred);
-
- if (ThisOperand)
- continue;
- Instruction *OtherPtr = dyn_cast<Instruction>(InVal);
- if (!OtherPtr)
- // No uses to rewrite.
- continue;
-
- // Try to lookup and rewrite any partition uses corresponding to this phi
- // input.
- AllocaPartitioning::iterator PI
- = P.findPartitionForPHIOrSelectOperand(PN, OtherPtr);
- if (PI != P.end()) {
- // If the other pointer is within the partitioning, replace the PHI in
- // its uses with the load we just speculated, or add another load for
- // it to rewrite if we've already replaced the PHI.
- AllocaPartitioning::use_iterator UI
- = P.findPartitionUseForPHIOrSelectOperand(PN, OtherPtr);
- if (isa<PHINode>(*UI->User))
- UI->User = Load;
- else {
- AllocaPartitioning::PartitionUse OtherUse = *UI;
- OtherUse.User = Load;
- P.use_push_back(PI, OtherUse);
- }
- }
- }
- DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
- return NewPtr == &NewAI;
- }
-
- /// Select instructions that use an alloca and are subsequently loaded can be
- /// rewritten to load both input pointers and then select between the result,
- /// allowing the load of the alloca to be promoted.
- /// From this:
- /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other
- /// %V = load i32* %P2
- /// to:
- /// %V1 = load i32* %Alloca -> will be mem2reg'd
- /// %V2 = load i32* %Other
- /// %V = select i1 %cond, i32 %V1, i32 %V2
- ///
- /// We can do this to a select if its only uses are loads and if the operand
- /// to the select can be loaded unconditionally.
- bool isSafeSelectToSpeculate(SelectInst &SI,
- SmallVectorImpl<LoadInst *> &Loads) {
- Value *TValue = SI.getTrueValue();
- Value *FValue = SI.getFalseValue();
- bool TDerefable = TValue->isDereferenceablePointer();
- bool FDerefable = FValue->isDereferenceablePointer();
-
- for (Value::use_iterator UI = SI.use_begin(), UE = SI.use_end();
- UI != UE; ++UI) {
- LoadInst *LI = dyn_cast<LoadInst>(*UI);
- if (LI == 0 || !LI->isSimple()) return false;
-
- // Both operands to the select need to be dereferencable, either
- // absolutely (e.g. allocas) or at this point because we can see other
- // accesses to it.
- if (!TDerefable && !isSafeToLoadUnconditionally(TValue, LI,
- LI->getAlignment(), &TD))
- return false;
- if (!FDerefable && !isSafeToLoadUnconditionally(FValue, LI,
- LI->getAlignment(), &TD))
- return false;
- Loads.push_back(LI);
- }
-
- return true;
+ DEBUG(dbgs() << " to: " << PN << "\n");
+ deleteIfTriviallyDead(OldPtr);
+ return false;
}
bool visitSelectInst(SelectInst &SI) {
@@ -2521,66 +2632,12 @@
assert(SI.getFalseValue() != OldPtr && "Pointer is both operands!");
else
assert(SI.getFalseValue() == OldPtr && "Pointer isn't an operand!");
- Value *NewPtr = getAdjustedAllocaPtr(IRB, OldPtr->getType());
-
- // If the select isn't safe to speculate, just use simple logic to emit it.
- SmallVector<LoadInst *, 4> Loads;
- if (!isSafeSelectToSpeculate(SI, Loads)) {
- SI.setOperand(IsTrueVal ? 1 : 2, NewPtr);
- DEBUG(dbgs() << " to: " << SI << "\n");
- deleteIfTriviallyDead(OldPtr);
- return false;
- }
-
- Value *OtherPtr = IsTrueVal ? SI.getFalseValue() : SI.getTrueValue();
- AllocaPartitioning::iterator PI
- = P.findPartitionForPHIOrSelectOperand(SI, OtherPtr);
- AllocaPartitioning::PartitionUse OtherUse;
- if (PI != P.end()) {
- // If the other pointer is within the partitioning, remove the select
- // from its uses. We'll add in the new loads below.
- AllocaPartitioning::use_iterator UI
- = P.findPartitionUseForPHIOrSelectOperand(SI, OtherPtr);
- OtherUse = *UI;
- P.use_erase(PI, UI);
- }
-
- Value *TV = IsTrueVal ? NewPtr : SI.getTrueValue();
- Value *FV = IsTrueVal ? SI.getFalseValue() : NewPtr;
- // Replace the loads of the select with a select of two loads.
- while (!Loads.empty()) {
- LoadInst *LI = Loads.pop_back_val();
-
- IRB.SetInsertPoint(LI);
- LoadInst *TL =
- IRB.CreateLoad(TV, getName("." + LI->getName() + ".true"));
- LoadInst *FL =
- IRB.CreateLoad(FV, getName("." + LI->getName() + ".false"));
- NumLoadsSpeculated += 2;
- if (PI != P.end()) {
- LoadInst *OtherLoad = IsTrueVal ? FL : TL;
- assert(OtherUse.Ptr == OtherLoad->getOperand(0));
- OtherUse.User = OtherLoad;
- P.use_push_back(PI, OtherUse);
- }
-
- // Transfer alignment and TBAA info if present.
- TL->setAlignment(LI->getAlignment());
- FL->setAlignment(LI->getAlignment());
- if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {
- TL->setMetadata(LLVMContext::MD_tbaa, Tag);
- FL->setMetadata(LLVMContext::MD_tbaa, Tag);
- }
-
- Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL);
- V->takeName(LI);
- DEBUG(dbgs() << " speculated to: " << *V << "\n");
- LI->replaceAllUsesWith(V);
- Pass.DeadInsts.push_back(LI);
- }
+ Value *NewPtr = getAdjustedAllocaPtr(IRB, OldPtr->getType());
+ SI.setOperand(IsTrueVal ? 1 : 2, NewPtr);
+ DEBUG(dbgs() << " to: " << SI << "\n");
deleteIfTriviallyDead(OldPtr);
- return NewPtr == &NewAI;
+ return false;
}
};
@@ -2928,6 +2985,14 @@
if (P.use_begin(PI) == P.use_end(PI))
return false; // No live uses left of this partition.
+ DEBUG(dbgs() << "Speculating PHIs and selects in partition "
+ << "[" << PI->BeginOffset << "," << PI->EndOffset << ")\n");
+
+ PHIOrSelectSpeculator Speculator(*TD, P, *this);
+ DEBUG(dbgs() << " speculating ");
+ DEBUG(P.print(dbgs(), PI, ""));
+ Speculator.visitUsers(P.use_begin(PI), P.use_end(PI));
+
// Try to compute a friendly type for this partition of the alloca. This
// won't always succeed, in which case we fall back to a legal integer type
// or an i8 array of an appropriate size.
@@ -2959,9 +3024,19 @@
assert(PI == P.begin() && "Begin offset is zero on later partition");
NewAI = &AI;
} else {
- // FIXME: The alignment here is overly conservative -- we could in many
- // cases get away with much weaker alignment constraints.
- NewAI = new AllocaInst(AllocaTy, 0, AI.getAlignment(),
+ unsigned Alignment = AI.getAlignment();
+ if (!Alignment) {
+ // The minimum alignment which users can rely on when the explicit
+ // alignment is omitted or zero is that required by the ABI for this
+ // type.
+ Alignment = TD->getABITypeAlignment(AI.getAllocatedType());
+ }
+ Alignment = MinAlign(Alignment, PI->BeginOffset);
+ // If we will get at least this much alignment from the type alone, leave
+ // the alloca's alignment unconstrained.
+ if (Alignment <= TD->getABITypeAlignment(AllocaTy))
+ Alignment = 0;
+ NewAI = new AllocaInst(AllocaTy, 0, Alignment,
AI.getName() + ".sroa." + Twine(PI - P.begin()),
&AI);
++NumNewAllocas;
Modified: llvm/branches/R600/lib/Transforms/Utils/IntegerDivision.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Utils/IntegerDivision.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Utils/IntegerDivision.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Utils/IntegerDivision.cpp Tue Oct 2 09:15:33 2012
@@ -23,11 +23,69 @@
using namespace llvm;
+/// Generate code to compute the remainder of two signed integers. Returns the
+/// remainder, which will have the sign of the dividend. Builder's insert point
+/// should be pointing where the caller wants code generated, e.g. at the srem
+/// instruction. This will generate a urem in the process, and Builder's insert
+/// point will be pointing at the uren (if present, i.e. not folded), ready to
+/// be expanded if the user wishes
+static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ ConstantInt *ThirtyOne = Builder.getInt32(31);
+
+ // ; %dividend_sgn = ashr i32 %dividend, 31
+ // ; %divisor_sgn = ashr i32 %divisor, 31
+ // ; %dvd_xor = xor i32 %dividend, %dividend_sgn
+ // ; %dvs_xor = xor i32 %divisor, %divisor_sgn
+ // ; %u_dividend = sub i32 %dvd_xor, %dividend_sgn
+ // ; %u_divisor = sub i32 %dvs_xor, %divisor_sgn
+ // ; %urem = urem i32 %dividend, %divisor
+ // ; %xored = xor i32 %urem, %dividend_sgn
+ // ; %srem = sub i32 %xored, %dividend_sgn
+ Value *DividendSign = Builder.CreateAShr(Dividend, ThirtyOne);
+ Value *DivisorSign = Builder.CreateAShr(Divisor, ThirtyOne);
+ Value *DvdXor = Builder.CreateXor(Dividend, DividendSign);
+ Value *DvsXor = Builder.CreateXor(Divisor, DivisorSign);
+ Value *UDividend = Builder.CreateSub(DvdXor, DividendSign);
+ Value *UDivisor = Builder.CreateSub(DvsXor, DivisorSign);
+ Value *URem = Builder.CreateURem(UDividend, UDivisor);
+ Value *Xored = Builder.CreateXor(URem, DividendSign);
+ Value *SRem = Builder.CreateSub(Xored, DividendSign);
+
+ if (Instruction *URemInst = dyn_cast<Instruction>(URem))
+ Builder.SetInsertPoint(URemInst);
+
+ return SRem;
+}
+
+
+/// Generate code to compute the remainder of two unsigned integers. Returns the
+/// remainder. Builder's insert point should be pointing where the caller wants
+/// code generated, e.g. at the urem instruction. This will generate a udiv in
+/// the process, and Builder's insert point will be pointing at the udiv (if
+/// present, i.e. not folded), ready to be expanded if the user wishes
+static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor,
+ IRBuilder<> &Builder) {
+ // Remainder = Dividend - Quotient*Divisor
+
+ // ; %quotient = udiv i32 %dividend, %divisor
+ // ; %product = mul i32 %divisor, %quotient
+ // ; %remainder = sub i32 %dividend, %product
+ Value *Quotient = Builder.CreateUDiv(Dividend, Divisor);
+ Value *Product = Builder.CreateMul(Divisor, Quotient);
+ Value *Remainder = Builder.CreateSub(Dividend, Product);
+
+ if (Instruction *UDiv = dyn_cast<Instruction>(Quotient))
+ Builder.SetInsertPoint(UDiv);
+
+ return Remainder;
+}
+
/// Generate code to divide two signed integers. Returns the quotient, rounded
-/// towards 0. Builder's insert point should be pointing at the sdiv
-/// instruction. This will generate a udiv in the process, and Builder's insert
-/// point will be pointing at the udiv (if present, i.e. not folded), ready to
-/// be expanded if the user wishes.
+/// towards 0. Builder's insert point should be pointing where the caller wants
+/// code generated, e.g. at the sdiv instruction. This will generate a udiv in
+/// the process, and Builder's insert point will be pointing at the udiv (if
+/// present, i.e. not folded), ready to be expanded if the user wishes.
static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
IRBuilder<> &Builder) {
// Implementation taken from compiler-rt's __divsi3
@@ -62,8 +120,8 @@
}
/// Generates code to divide two unsigned scalar 32-bit integers. Returns the
-/// quotient, rounded towards 0. Builder's insert point should be pointing at
-/// the udiv instruction.
+/// quotient, rounded towards 0. Builder's insert point should be pointing where
+/// the caller wants code generated, e.g. at the udiv instruction.
static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
IRBuilder<> &Builder) {
// The basic algorithm can be found in the compiler-rt project's
@@ -265,6 +323,56 @@
return Q_5;
}
+/// Generate code to calculate the remainder of two integers, replacing Rem with
+/// the generated code. This currently generates code using the udiv expansion,
+/// but future work includes generating more specialized code, e.g. when more
+/// information about the operands are known. Currently only implements 32bit
+/// scalar division (due to udiv's limitation), but future work is removing this
+/// limitation.
+///
+/// @brief Replace Rem with generated code.
+bool llvm::expandRemainder(BinaryOperator *Rem) {
+ assert((Rem->getOpcode() == Instruction::SRem ||
+ Rem->getOpcode() == Instruction::URem) &&
+ "Trying to expand remainder from a non-remainder function");
+
+ IRBuilder<> Builder(Rem);
+
+ // First prepare the sign if it's a signed remainder
+ if (Rem->getOpcode() == Instruction::SRem) {
+ Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0),
+ Rem->getOperand(1), Builder);
+
+ Rem->replaceAllUsesWith(Remainder);
+ Rem->dropAllReferences();
+ Rem->eraseFromParent();
+
+ // If we didn't actually generate a udiv instruction, we're done
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
+ if (!BO || BO->getOpcode() != Instruction::URem)
+ return true;
+
+ Rem = BO;
+ }
+
+ Value *Remainder = generatedUnsignedRemainderCode(Rem->getOperand(0),
+ Rem->getOperand(1),
+ Builder);
+
+ Rem->replaceAllUsesWith(Remainder);
+ Rem->dropAllReferences();
+ Rem->eraseFromParent();
+
+ // Expand the udiv
+ if (BinaryOperator *UDiv = dyn_cast<BinaryOperator>(Builder.GetInsertPoint())) {
+ assert(UDiv->getOpcode() == Instruction::UDiv && "Non-udiv in expansion?");
+ expandDivision(UDiv);
+ }
+
+ return true;
+}
+
+
/// Generate code to divide two integers, replacing Div with the generated
/// code. This currently generates code similarly to compiler-rt's
/// implementations, but future work includes generating more specialized code
@@ -287,7 +395,7 @@
if (Div->getOpcode() == Instruction::SDiv) {
// Lower the code to unsigned division, and reset Div to point to the udiv.
Value *Quotient = generateSignedDivisionCode(Div->getOperand(0),
- Div->getOperand(1), Builder);
+ Div->getOperand(1), Builder);
Div->replaceAllUsesWith(Quotient);
Div->dropAllReferences();
Div->eraseFromParent();
Modified: llvm/branches/R600/lib/Transforms/Utils/SimplifyCFG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Utils/SimplifyCFG.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Utils/SimplifyCFG.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Utils/SimplifyCFG.cpp Tue Oct 2 09:15:33 2012
@@ -58,9 +58,10 @@
SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
cl::desc("Sink common instructions down to the end block"));
-STATISTIC(NumSpeculations, "Number of speculative executed instructions");
+STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
+STATISTIC(NumSpeculations, "Number of speculative executed instructions");
namespace {
/// ValueEqualityComparisonCase - Represents a case of a switch.
@@ -1172,10 +1173,14 @@
// Check that BBEnd has two predecessors and the other predecessor ends with
// an unconditional branch.
- SmallVector<BasicBlock*, 16> Preds(pred_begin(BBEnd), pred_end(BBEnd));
- if (Preds.size() != 2)
+ pred_iterator PI = pred_begin(BBEnd), PE = pred_end(BBEnd);
+ BasicBlock *Pred0 = *PI++;
+ if (PI == PE) // Only one predecessor.
+ return false;
+ BasicBlock *Pred1 = *PI++;
+ if (PI != PE) // More than two predecessors.
return false;
- BasicBlock *BB2 = (Preds[0] == BB1) ? Preds[1] : Preds[0];
+ BasicBlock *BB2 = (Pred0 == BB1) ? Pred1 : Pred0;
BranchInst *BI2 = dyn_cast<BranchInst>(BB2->getTerminator());
if (!BI2 || !BI2->isUnconditional())
return false;
@@ -3240,83 +3245,230 @@
return true;
}
-/// BuildLookupTable - Build a lookup table with the contents of Results, using
-/// DefaultResult to fill the holes in the table. If the table ends up
-/// containing the same result in each element, set *SingleResult to that value
-/// and return NULL.
-static GlobalVariable *BuildLookupTable(Module &M,
- uint64_t TableSize,
- ConstantInt *Offset,
- const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Results,
- Constant *DefaultResult,
- Constant **SingleResult) {
- assert(Results.size() && "Need values to build lookup table");
- assert(TableSize >= Results.size() && "Table needs to hold all values");
+namespace {
+ /// SwitchLookupTable - This class represents a lookup table that can be used
+ /// to replace a switch.
+ class SwitchLookupTable {
+ public:
+ /// SwitchLookupTable - Create a lookup table to use as a switch replacement
+ /// with the contents of Values, using DefaultValue to fill any holes in the
+ /// table.
+ SwitchLookupTable(Module &M,
+ uint64_t TableSize,
+ ConstantInt *Offset,
+ const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
+ Constant *DefaultValue,
+ const TargetData *TD);
+
+ /// BuildLookup - Build instructions with Builder to retrieve the value at
+ /// the position given by Index in the lookup table.
+ Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
+
+ /// WouldFitInRegister - Return true if a table with TableSize elements of
+ /// type ElementType would fit in a target-legal register.
+ static bool WouldFitInRegister(const TargetData *TD,
+ uint64_t TableSize,
+ const Type *ElementType);
+
+ private:
+ // Depending on the contents of the table, it can be represented in
+ // different ways.
+ enum {
+ // For tables where each element contains the same value, we just have to
+ // store that single value and return it for each lookup.
+ SingleValueKind,
+
+ // For small tables with integer elements, we can pack them into a bitmap
+ // that fits into a target-legal register. Values are retrieved by
+ // shift and mask operations.
+ BitMapKind,
+
+ // The table is stored as an array of values. Values are retrieved by load
+ // instructions from the table.
+ ArrayKind
+ } Kind;
+
+ // For SingleValueKind, this is the single value.
+ Constant *SingleValue;
+
+ // For BitMapKind, this is the bitmap.
+ ConstantInt *BitMap;
+ IntegerType *BitMapElementTy;
+
+ // For ArrayKind, this is the array.
+ GlobalVariable *Array;
+ };
+}
+
+SwitchLookupTable::SwitchLookupTable(Module &M,
+ uint64_t TableSize,
+ ConstantInt *Offset,
+ const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
+ Constant *DefaultValue,
+ const TargetData *TD) {
+ assert(Values.size() && "Can't build lookup table without values!");
+ assert(TableSize >= Values.size() && "Can't fit values in table!");
// If all values in the table are equal, this is that value.
- Constant *SameResult = Results.begin()->second;
+ SingleValue = Values.begin()->second;
// Build up the table contents.
- std::vector<Constant*> TableContents(TableSize);
- for (size_t I = 0, E = Results.size(); I != E; ++I) {
- ConstantInt *CaseVal = Results[I].first;
- Constant *CaseRes = Results[I].second;
+ SmallVector<Constant*, 64> TableContents(TableSize);
+ for (size_t I = 0, E = Values.size(); I != E; ++I) {
+ ConstantInt *CaseVal = Values[I].first;
+ Constant *CaseRes = Values[I].second;
+ assert(CaseRes->getType() == DefaultValue->getType());
- uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
+ uint64_t Idx = (CaseVal->getValue() - Offset->getValue())
+ .getLimitedValue();
TableContents[Idx] = CaseRes;
- if (CaseRes != SameResult)
- SameResult = NULL;
+ if (CaseRes != SingleValue)
+ SingleValue = NULL;
}
// Fill in any holes in the table with the default result.
- if (Results.size() < TableSize) {
- for (unsigned i = 0; i < TableSize; ++i) {
- if (!TableContents[i])
- TableContents[i] = DefaultResult;
+ if (Values.size() < TableSize) {
+ for (uint64_t I = 0; I < TableSize; ++I) {
+ if (!TableContents[I])
+ TableContents[I] = DefaultValue;
}
- if (DefaultResult != SameResult)
- SameResult = NULL;
+ if (DefaultValue != SingleValue)
+ SingleValue = NULL;
}
- // Same result was used in the entire table; just return that.
- if (SameResult) {
- *SingleResult = SameResult;
- return NULL;
+ // If each element in the table contains the same value, we only need to store
+ // that single value.
+ if (SingleValue) {
+ Kind = SingleValueKind;
+ return;
}
- ArrayType *ArrayTy = ArrayType::get(DefaultResult->getType(), TableSize);
+ // If the type is integer and the table fits in a register, build a bitmap.
+ if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) {
+ IntegerType *IT = cast<IntegerType>(DefaultValue->getType());
+ APInt TableInt(TableSize * IT->getBitWidth(), 0);
+ for (uint64_t I = TableSize; I > 0; --I) {
+ TableInt <<= IT->getBitWidth();
+ // Insert values into the bitmap. Undef values are set to zero.
+ if (!isa<UndefValue>(TableContents[I - 1])) {
+ ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
+ TableInt |= Val->getValue().zext(TableInt.getBitWidth());
+ }
+ }
+ BitMap = ConstantInt::get(M.getContext(), TableInt);
+ BitMapElementTy = IT;
+ Kind = BitMapKind;
+ ++NumBitMaps;
+ return;
+ }
+
+ // Store the table in an array.
+ ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize);
Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
- GlobalVariable *GV = new GlobalVariable(M, ArrayTy, /*constant=*/ true,
- GlobalVariable::PrivateLinkage,
- Initializer,
- "switch.table");
- GV->setUnnamedAddr(true);
- return GV;
+ Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true,
+ GlobalVariable::PrivateLinkage,
+ Initializer,
+ "switch.table");
+ Array->setUnnamedAddr(true);
+ Kind = ArrayKind;
+}
+
+Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
+ switch (Kind) {
+ case SingleValueKind:
+ return SingleValue;
+ case BitMapKind: {
+ // Type of the bitmap (e.g. i59).
+ IntegerType *MapTy = BitMap->getType();
+
+ // Cast Index to the same type as the bitmap.
+ // Note: The Index is <= the number of elements in the table, so
+ // truncating it to the width of the bitmask is safe.
+ Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
+
+ // Multiply the shift amount by the element width.
+ ShiftAmt = Builder.CreateMul(ShiftAmt,
+ ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
+ "switch.shiftamt");
+
+ // Shift down.
+ Value *DownShifted = Builder.CreateLShr(BitMap, ShiftAmt,
+ "switch.downshift");
+ // Mask off.
+ return Builder.CreateTrunc(DownShifted, BitMapElementTy,
+ "switch.masked");
+ }
+ case ArrayKind: {
+ Value *GEPIndices[] = { Builder.getInt32(0), Index };
+ Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices,
+ "switch.gep");
+ return Builder.CreateLoad(GEP, "switch.load");
+ }
+ }
+ llvm_unreachable("Unknown lookup table kind!");
+}
+
+bool SwitchLookupTable::WouldFitInRegister(const TargetData *TD,
+ uint64_t TableSize,
+ const Type *ElementType) {
+ if (!TD)
+ return false;
+ const IntegerType *IT = dyn_cast<IntegerType>(ElementType);
+ if (!IT)
+ return false;
+ // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
+ // are <= 15, we could try to narrow the type.
+
+ // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
+ if (TableSize >= UINT_MAX/IT->getBitWidth())
+ return false;
+ return TD->fitsInLegalInteger(TableSize * IT->getBitWidth());
+}
+
+/// ShouldBuildLookupTable - Determine whether a lookup table should be built
+/// for this switch, based on the number of caes, size of the table and the
+/// types of the results.
+static bool ShouldBuildLookupTable(SwitchInst *SI,
+ uint64_t TableSize,
+ const TargetData *TD,
+ const SmallDenseMap<PHINode*, Type*>& ResultTypes) {
+ // The table density should be at least 40%. This is the same criterion as for
+ // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
+ // FIXME: Find the best cut-off.
+ if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
+ return false; // TableSize overflowed, or mul below might overflow.
+ if (SI->getNumCases() * 10 >= TableSize * 4)
+ return true;
+
+ // If each table would fit in a register, we should build it anyway.
+ for (SmallDenseMap<PHINode*, Type*>::const_iterator I = ResultTypes.begin(),
+ E = ResultTypes.end(); I != E; ++I) {
+ if (!SwitchLookupTable::WouldFitInRegister(TD, TableSize, I->second))
+ return false;
+ }
+ return true;
}
/// SwitchToLookupTable - If the switch is only used to initialize one or more
/// phi nodes in a common successor block with different constant values,
/// replace the switch with lookup tables.
static bool SwitchToLookupTable(SwitchInst *SI,
- IRBuilder<> &Builder) {
+ IRBuilder<> &Builder,
+ const TargetData* TD) {
assert(SI->getNumCases() > 1 && "Degenerate switch?");
// FIXME: Handle unreachable cases.
// FIXME: If the switch is too sparse for a lookup table, perhaps we could
// split off a dense part and build a lookup table for that.
- // FIXME: If the results are all integers and the lookup table would fit in a
- // target-legal register, we should store them as a bitmap and use shift/mask
- // to look up the result.
-
// FIXME: This creates arrays of GEPs to constant strings, which means each
// GEP needs a runtime relocation in PIC code. We should just build one big
// string and lookup indices into that.
- // Ignore the switch if the number of cases are too small.
+ // Ignore the switch if the number of cases is too small.
// This is similar to the check when building jump tables in
// SelectionDAGBuilder::handleJTSwitchCase.
// FIXME: Determine the best cut-off.
@@ -3370,33 +3522,12 @@
}
APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
- // The table density should be at lest 40%. This is the same criterion as for
- // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
- // FIXME: Find the best cut-off.
- // Be careful to avoid overlow in the density computation.
- if (RangeSpread.zextOrSelf(64).ugt(UINT64_MAX / 4 - 1))
- return false;
uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
- if (SI->getNumCases() * 10 < TableSize * 4)
+ if (!ShouldBuildLookupTable(SI, TableSize, TD, ResultTypes))
return false;
- // Build the lookup tables.
- SmallDenseMap<PHINode*, GlobalVariable*> LookupTables;
- SmallDenseMap<PHINode*, Constant*> SingleResults;
-
- Module &Mod = *CommonDest->getParent()->getParent();
- for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end();
- I != E; ++I) {
- PHINode *PHI = *I;
-
- Constant *SingleResult = NULL;
- LookupTables[PHI] = BuildLookupTable(Mod, TableSize, MinCaseVal,
- ResultLists[PHI], DefaultResults[PHI],
- &SingleResult);
- SingleResults[PHI] = SingleResult;
- }
-
// Create the BB that does the lookups.
+ Module &Mod = *CommonDest->getParent()->getParent();
BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(),
"switch.lookup",
CommonDest->getParent(),
@@ -3414,19 +3545,13 @@
// Populate the BB that does the lookups.
Builder.SetInsertPoint(LookupBB);
bool ReturnedEarly = false;
- for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end();
- I != E; ++I) {
- PHINode *PHI = *I;
- // There was a single result for this phi; just use that.
- if (Constant *SingleResult = SingleResults[PHI]) {
- PHI->addIncoming(SingleResult, LookupBB);
- continue;
- }
+ for (size_t I = 0, E = PHIs.size(); I != E; ++I) {
+ PHINode *PHI = PHIs[I];
+
+ SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI],
+ DefaultResults[PHI], TD);
- Value *GEPIndices[] = { Builder.getInt32(0), TableIndex };
- Value *GEP = Builder.CreateInBoundsGEP(LookupTables[PHI], GEPIndices,
- "switch.gep");
- Value *Result = Builder.CreateLoad(GEP, "switch.load");
+ Value *Result = Table.BuildLookup(TableIndex, Builder);
// If the result is used to return immediately from the function, we want to
// do that right here.
@@ -3494,7 +3619,7 @@
if (ForwardSwitchConditionToPHI(SI))
return SimplifyCFG(BB) | true;
- if (SwitchToLookupTable(SI, Builder))
+ if (SwitchToLookupTable(SI, Builder, TD))
return SimplifyCFG(BB) | true;
return false;
Modified: llvm/branches/R600/lib/Transforms/Utils/ValueMapper.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Transforms/Utils/ValueMapper.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/Transforms/Utils/ValueMapper.cpp (original)
+++ llvm/branches/R600/lib/Transforms/Utils/ValueMapper.cpp Tue Oct 2 09:15:33 2012
@@ -21,7 +21,7 @@
using namespace llvm;
// Out of line method to get vtable etc for class.
-void ValueMapTypeRemapper::Anchor() {}
+void ValueMapTypeRemapper::anchor() {}
Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper) {
Modified: llvm/branches/R600/lib/VMCore/Attributes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/Attributes.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/Attributes.cpp (original)
+++ llvm/branches/R600/lib/VMCore/Attributes.cpp Tue Oct 2 09:15:33 2012
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Attributes.h"
+#include "AttributesImpl.h"
+#include "LLVMContextImpl.h"
#include "llvm/Type.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/FoldingSet.h"
@@ -94,21 +96,52 @@
return Result;
}
-Attributes Attribute::typeIncompatible(Type *Ty) {
- Attributes Incompatible = None;
+Attributes Attributes::typeIncompatible(Type *Ty) {
+ Attributes Incompatible = Attribute::None;
if (!Ty->isIntegerTy())
// Attributes that only apply to integers.
- Incompatible |= SExt | ZExt;
+ Incompatible |= Attribute::SExt | Attribute::ZExt;
if (!Ty->isPointerTy())
// Attributes that only apply to pointers.
- Incompatible |= ByVal | Nest | NoAlias | StructRet | NoCapture;
+ Incompatible |= Attribute::ByVal | Attribute::Nest | Attribute::NoAlias |
+ Attribute::StructRet | Attribute::NoCapture;
return Incompatible;
}
//===----------------------------------------------------------------------===//
+// AttributeImpl Definition
+//===----------------------------------------------------------------------===//
+
+Attributes::Attributes(AttributesImpl *A) : Bits(0) {}
+
+Attributes Attributes::get(LLVMContext &Context, Attributes::Builder &B) {
+ // If there are no attributes, return an empty Attributes class.
+ if (B.Bits == 0)
+ return Attributes();
+
+ // Otherwise, build a key to look up the existing attributes.
+ LLVMContextImpl *pImpl = Context.pImpl;
+ FoldingSetNodeID ID;
+ ID.AddInteger(B.Bits);
+
+ void *InsertPoint;
+ AttributesImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint);
+
+ if (!PA) {
+ // If we didn't find any existing attributes of the same shape then create a
+ // new one and insert it.
+ PA = new AttributesImpl(B.Bits);
+ pImpl->AttrsSet.InsertNode(PA, InsertPoint);
+ }
+
+ // Return the AttributesList that we found or created.
+ return Attributes(PA);
+}
+
+//===----------------------------------------------------------------------===//
// AttributeListImpl Definition
//===----------------------------------------------------------------------===//
Added: llvm/branches/R600/lib/VMCore/AttributesImpl.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/AttributesImpl.h?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/lib/VMCore/AttributesImpl.h (added)
+++ llvm/branches/R600/lib/VMCore/AttributesImpl.h Tue Oct 2 09:15:33 2012
@@ -0,0 +1,40 @@
+//===-- AttributesImpl.h - Attributes Internals -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines various helper methods and classes used by LLVMContextImpl
+// for creating and managing attributes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ATTRIBUTESIMPL_H
+#define LLVM_ATTRIBUTESIMPL_H
+
+#include "llvm/ADT/FoldingSet.h"
+
+namespace llvm {
+
+class AttributesImpl : public FoldingSetNode {
+ uint64_t Bits; // FIXME: We will be expanding this.
+
+ void operator=(const AttributesImpl &) LLVM_DELETED_FUNCTION;
+ AttributesImpl(const AttributesImpl &) LLVM_DELETED_FUNCTION;
+public:
+ AttributesImpl(uint64_t bits) : Bits(bits) {}
+
+ void Profile(FoldingSetNodeID &ID) const {
+ Profile(ID, Bits);
+ }
+ static void Profile(FoldingSetNodeID &ID, uint64_t Bits) {
+ ID.AddInteger(Bits);
+ }
+};
+
+} // end llvm namespace
+
+#endif
Modified: llvm/branches/R600/lib/VMCore/Function.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/Function.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/Function.cpp (original)
+++ llvm/branches/R600/lib/VMCore/Function.cpp Tue Oct 2 09:15:33 2012
@@ -78,7 +78,7 @@
/// in its containing function.
bool Argument::hasByValAttr() const {
if (!getType()->isPointerTy()) return false;
- return getParent()->paramHasAttr(getArgNo()+1, Attribute::ByVal);
+ return getParent()->getParamAttributes(getArgNo()+1).hasByValAttr();
}
unsigned Argument::getParamAlignment() const {
@@ -91,21 +91,21 @@
/// it in its containing function.
bool Argument::hasNestAttr() const {
if (!getType()->isPointerTy()) return false;
- return getParent()->paramHasAttr(getArgNo()+1, Attribute::Nest);
+ return getParent()->getParamAttributes(getArgNo()+1).hasNestAttr();
}
/// hasNoAliasAttr - Return true if this argument has the noalias attribute on
/// it in its containing function.
bool Argument::hasNoAliasAttr() const {
if (!getType()->isPointerTy()) return false;
- return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoAlias);
+ return getParent()->getParamAttributes(getArgNo()+1).hasNoAliasAttr();
}
/// hasNoCaptureAttr - Return true if this argument has the nocapture attribute
/// on it in its containing function.
bool Argument::hasNoCaptureAttr() const {
if (!getType()->isPointerTy()) return false;
- return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoCapture);
+ return getParent()->getParamAttributes(getArgNo()+1).hasNoCaptureAttr();
}
/// hasSRetAttr - Return true if this argument has the sret attribute on
@@ -114,7 +114,7 @@
if (!getType()->isPointerTy()) return false;
if (this != getParent()->arg_begin())
return false; // StructRet param must be first param
- return getParent()->paramHasAttr(1, Attribute::StructRet);
+ return getParent()->getParamAttributes(1).hasStructRetAttr();
}
/// addAttr - Add a Attribute to an argument
Modified: llvm/branches/R600/lib/VMCore/IRBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/IRBuilder.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/IRBuilder.cpp (original)
+++ llvm/branches/R600/lib/VMCore/IRBuilder.cpp Tue Oct 2 09:15:33 2012
@@ -80,7 +80,7 @@
CallInst *IRBuilderBase::
CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
- bool isVolatile, MDNode *TBAATag) {
+ bool isVolatile, MDNode *TBAATag, MDNode *TBAAStructTag) {
Dst = getCastedInt8PtrValue(Dst);
Src = getCastedInt8PtrValue(Src);
@@ -94,6 +94,10 @@
// Set the TBAA info if present.
if (TBAATag)
CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
+ // Set the TBAA Struct info if present.
+ if (TBAAStructTag)
+ CI->setMetadata(LLVMContext::MD_tbaa_struct, TBAAStructTag);
return CI;
}
Modified: llvm/branches/R600/lib/VMCore/LLVMContextImpl.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/LLVMContextImpl.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/LLVMContextImpl.cpp (original)
+++ llvm/branches/R600/lib/VMCore/LLVMContextImpl.cpp Tue Oct 2 09:15:33 2012
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "LLVMContextImpl.h"
+#include "llvm/Attributes.h"
#include "llvm/Module.h"
#include "llvm/ADT/STLExtras.h"
#include <algorithm>
@@ -93,6 +94,11 @@
E = CDSConstants.end(); I != E; ++I)
delete I->second;
CDSConstants.clear();
+
+ // Destroy attributes.
+ for (FoldingSetIterator<AttributesImpl> I = AttrsSet.begin(),
+ E = AttrsSet.end(); I != E; ++I)
+ delete &*I;
// Destroy MDNodes. ~MDNode can move and remove nodes between the MDNodeSet
// and the NonUniquedMDNodes sets, so copy the values out first.
@@ -107,6 +113,7 @@
(*I)->destroy();
assert(MDNodeSet.empty() && NonUniquedMDNodes.empty() &&
"Destroying all MDNodes didn't empty the Context's sets.");
+
// Destroy MDStrings.
DeleteContainerSeconds(MDStringCache);
}
Modified: llvm/branches/R600/lib/VMCore/LLVMContextImpl.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/LLVMContextImpl.h?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/LLVMContextImpl.h (original)
+++ llvm/branches/R600/lib/VMCore/LLVMContextImpl.h Tue Oct 2 09:15:33 2012
@@ -16,6 +16,7 @@
#define LLVM_LLVMCONTEXT_IMPL_H
#include "llvm/LLVMContext.h"
+#include "AttributesImpl.h"
#include "ConstantsContext.h"
#include "LeaksContext.h"
#include "llvm/Constants.h"
@@ -253,10 +254,13 @@
typedef DenseMap<DenseMapAPFloatKeyInfo::KeyTy, ConstantFP*,
DenseMapAPFloatKeyInfo> FPMapTy;
FPMapTy FPConstants;
+
+ FoldingSet<AttributesImpl> AttrsSet;
StringMap<Value*> MDStringCache;
-
+
FoldingSet<MDNode> MDNodeSet;
+
// MDNodes may be uniqued or not uniqued. When they're not uniqued, they
// aren't in the MDNodeSet, but they're still shared between objects, so no
// one object can destroy them. This set allows us to at least destroy them
Modified: llvm/branches/R600/lib/VMCore/ValueTypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/ValueTypes.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/ValueTypes.cpp (original)
+++ llvm/branches/R600/lib/VMCore/ValueTypes.cpp Tue Oct 2 09:15:33 2012
@@ -56,31 +56,31 @@
}
bool EVT::isExtended16BitVector() const {
- return isExtendedVector() && getSizeInBits() == 16;
+ return isExtendedVector() && getExtendedSizeInBits() == 16;
}
bool EVT::isExtended32BitVector() const {
- return isExtendedVector() && getSizeInBits() == 32;
+ return isExtendedVector() && getExtendedSizeInBits() == 32;
}
bool EVT::isExtended64BitVector() const {
- return isExtendedVector() && getSizeInBits() == 64;
+ return isExtendedVector() && getExtendedSizeInBits() == 64;
}
bool EVT::isExtended128BitVector() const {
- return isExtendedVector() && getSizeInBits() == 128;
+ return isExtendedVector() && getExtendedSizeInBits() == 128;
}
bool EVT::isExtended256BitVector() const {
- return isExtendedVector() && getSizeInBits() == 256;
+ return isExtendedVector() && getExtendedSizeInBits() == 256;
}
bool EVT::isExtended512BitVector() const {
- return isExtendedVector() && getSizeInBits() == 512;
+ return isExtendedVector() && getExtendedSizeInBits() == 512;
}
bool EVT::isExtended1024BitVector() const {
- return isExtendedVector() && getSizeInBits() == 1024;
+ return isExtendedVector() && getExtendedSizeInBits() == 1024;
}
EVT EVT::getExtendedVectorElementType() const {
Modified: llvm/branches/R600/lib/VMCore/Verifier.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/VMCore/Verifier.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/lib/VMCore/Verifier.cpp (original)
+++ llvm/branches/R600/lib/VMCore/Verifier.cpp Tue Oct 2 09:15:33 2012
@@ -546,7 +546,7 @@
MutI.getAsString() + " are incompatible!", V);
}
- Attributes TypeI = Attrs & Attribute::typeIncompatible(Ty);
+ Attributes TypeI = Attrs & Attributes::typeIncompatible(Ty);
Assert1(!TypeI, "Wrong type for attribute " +
TypeI.getAsString(), V);
Added: llvm/branches/R600/test/Analysis/CallGraph/do-nothing-intrinsic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Analysis/CallGraph/do-nothing-intrinsic.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/Analysis/CallGraph/do-nothing-intrinsic.ll (added)
+++ llvm/branches/R600/test/Analysis/CallGraph/do-nothing-intrinsic.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,13 @@
+; RUN: opt < %s -basiccg
+; PR13903
+
+define void @main() {
+ invoke void @llvm.donothing()
+ to label %ret unwind label %unw
+unw:
+ %tmp = landingpad i8 personality i8 0 cleanup
+ br label %ret
+ret:
+ ret void
+}
+declare void @llvm.donothing() nounwind readnone
Modified: llvm/branches/R600/test/CodeGen/ARM/2010-12-07-PEIBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/2010-12-07-PEIBug.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/2010-12-07-PEIBug.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/2010-12-07-PEIBug.ll Tue Oct 2 09:15:33 2012
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a9 | FileCheck %s
; rdar://8728956
define hidden void @foo() nounwind ssp {
Added: llvm/branches/R600/test/CodeGen/ARM/2012-05-04-vmov.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/2012-05-04-vmov.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/2012-05-04-vmov.ll (added)
+++ llvm/branches/R600/test/CodeGen/ARM/2012-05-04-vmov.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,11 @@
+; RUN: llc -O1 -march=arm -mcpu=cortex-a9 < %s | FileCheck -check-prefix=A9-CHECK %s
+; RUN: llc -O1 -march=arm -mcpu=swift < %s | FileCheck -check-prefix=SWIFT-CHECK %s
+; Check that swift doesn't use vmov.32. <rdar://problem/10453003>.
+
+define <2 x i32> @testuvec(<2 x i32> %A, <2 x i32> %B) nounwind {
+entry:
+ %div = udiv <2 x i32> %A, %B
+ ret <2 x i32> %div
+; A9-CHECK: vmov.32
+; SWIFT-CHECK-NOT: vmov.32
+}
Added: llvm/branches/R600/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll (added)
+++ llvm/branches/R600/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,14 @@
+; RUN: llc -march=arm -mcpu=swift < %s | FileCheck %s
+; <rdar://problem/10451892>
+
+define void @f(i32 %x, i32* %p) nounwind ssp {
+entry:
+; CHECK-NOT: vdup.32
+ %vecinit.i = insertelement <2 x i32> undef, i32 %x, i32 0
+ %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %x, i32 1
+ %0 = bitcast i32* %p to i8*
+ tail call void @llvm.arm.neon.vst1.v2i32(i8* %0, <2 x i32> %vecinit1.i, i32 4)
+ ret void
+}
+
+declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32) nounwind
Added: llvm/branches/R600/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll (added)
+++ llvm/branches/R600/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 2>&1 | FileCheck %s
+
+; Check for error message:
+; CHECK: non-trivial scalar-to-vector conversion, possible invalid constraint for vector type
+
+define void @f() nounwind ssp {
+ %1 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } asm "vldm $4, { ${0:q}, ${1:q}, ${2:q}, ${3:q} }", "=r,=r,=r,=r,r"(i64* undef) nounwind, !srcloc !0
+ ret void
+}
+
+!0 = metadata !{i32 318437}
Added: llvm/branches/R600/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll (added)
+++ llvm/branches/R600/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 2>&1 | FileCheck %s
+
+; Check for error message:
+; CHECK: scalar-to-vector conversion failed, possible invalid constraint for vector type
+
+define hidden void @f(i32* %corr, i32 %order) nounwind ssp {
+ tail call void asm sideeffect "vst1.s32 { ${1:q}, ${2:q} }, [$0]", "r,{q0},{q1}"(i32* %corr, <2 x i64>* undef, <2 x i64>* undef) nounwind, !srcloc !0
+ ret void
+}
+
+!0 = metadata !{i32 257}
Added: llvm/branches/R600/test/CodeGen/ARM/atomicrmw_minmax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/atomicrmw_minmax.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/atomicrmw_minmax.ll (added)
+++ llvm/branches/R600/test/CodeGen/ARM/atomicrmw_minmax.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,21 @@
+; RUN: llc -march=arm -mcpu=cortex-a9 < %s | FileCheck %s
+
+; CHECK: max:
+define i32 @max(i8 %ctx, i32* %ptr, i32 %val)
+{
+; CHECK: ldrex
+; CHECK: cmp [[old:r[0-9]*]], [[val:r[0-9]*]]
+; CHECK: movhi {{r[0-9]*}}, [[old]]
+ %old = atomicrmw umax i32* %ptr, i32 %val monotonic
+ ret i32 %old
+}
+
+; CHECK: min:
+define i32 @min(i8 %ctx, i32* %ptr, i32 %val)
+{
+; CHECK: ldrex
+; CHECK: cmp [[old:r[0-9]*]], [[val:r[0-9]*]]
+; CHECK: movlo {{r[0-9]*}}, [[old]]
+ %old = atomicrmw umin i32* %ptr, i32 %val monotonic
+ ret i32 %old
+}
Modified: llvm/branches/R600/test/CodeGen/ARM/avoid-cpsr-rmw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/avoid-cpsr-rmw.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/avoid-cpsr-rmw.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/avoid-cpsr-rmw.ll Tue Oct 2 09:15:33 2012
@@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=swift | FileCheck %s
; Avoid some 's' 16-bit instruction which partially update CPSR (and add false
; dependency) when it isn't dependent on last CPSR defining instruction.
; rdar://8928208
Added: llvm/branches/R600/test/CodeGen/ARM/call-noret.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/call-noret.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/call-noret.ll (added)
+++ llvm/branches/R600/test/CodeGen/ARM/call-noret.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=swift | FileCheck %s -check-prefix=SWIFT
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=T2
+; rdar://8979299
+
+define void @t1() noreturn nounwind ssp {
+entry:
+; ARM: t1:
+; ARM: mov lr, pc
+; ARM: b _bar
+
+; SWIFT: t1:
+; SWIFT: mov lr, pc
+; SWIFT: b _bar
+
+; T2: t1:
+; T2: blx _bar
+ tail call void @bar() noreturn nounwind
+ unreachable
+}
+
+define void @t2() noreturn nounwind ssp {
+entry:
+; ARM: t2:
+; ARM: mov lr, pc
+; ARM: b _t1
+
+; SWIFT: t2:
+; SWIFT: mov lr, pc
+; SWIFT: b _t1
+
+; T2: t2:
+; T2: mov lr, pc
+; T2: b.w _t1
+ tail call void @t1() noreturn nounwind
+ unreachable
+}
+
+declare void @bar() noreturn
Modified: llvm/branches/R600/test/CodeGen/ARM/div.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/div.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/div.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/div.ll Tue Oct 2 09:15:33 2012
@@ -1,9 +1,13 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=CHECK-ARM
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK-ARM
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=swift | FileCheck %s -check-prefix=CHECK-SWIFT
define i32 @f1(i32 %a, i32 %b) {
entry:
; CHECK-ARM: f1
; CHECK-ARM: __divsi3
+
+; CHECK-SWIFT: f1
+; CHECK-SWIFT: sdiv
%tmp1 = sdiv i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
@@ -12,6 +16,9 @@
entry:
; CHECK-ARM: f2
; CHECK-ARM: __udivsi3
+
+; CHECK-SWIFT: f2
+; CHECK-SWIFT: udiv
%tmp1 = udiv i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
@@ -20,6 +27,10 @@
entry:
; CHECK-ARM: f3
; CHECK-ARM: __modsi3
+
+; CHECK-SWIFT: f3
+; CHECK-SWIFT: sdiv
+; CHECK-SWIFT: mls
%tmp1 = srem i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
@@ -28,6 +39,10 @@
entry:
; CHECK-ARM: f4
; CHECK-ARM: __umodsi3
+
+; CHECK-SWIFT: f4
+; CHECK-SWIFT: udiv
+; CHECK-SWIFT: mls
%tmp1 = urem i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
Modified: llvm/branches/R600/test/CodeGen/ARM/domain-conv-vmovs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/domain-conv-vmovs.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/domain-conv-vmovs.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/domain-conv-vmovs.ll Tue Oct 2 09:15:33 2012
@@ -79,8 +79,8 @@
; internal fault).
call void @bar()
; CHECL: bl bar
-; CHECK: vext.32
-; CHECK: vext.32
+; CHECK: vext.32
+; CHECK: vext.32
ret float %val
}
Modified: llvm/branches/R600/test/CodeGen/ARM/fabss.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/fabss.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/fabss.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/fabss.ll Tue Oct 2 09:15:33 2012
@@ -14,12 +14,12 @@
declare float @fabsf(float)
; VFP2: test:
-; VFP2: vabs.f32 s1, s1
+; VFP2: vabs.f32 s2, s2
; NFP1: test:
; NFP1: vabs.f32 d1, d1
; NFP0: test:
-; NFP0: vabs.f32 s1, s1
+; NFP0: vabs.f32 s2, s2
; CORTEXA8: test:
; CORTEXA8: vadd.f32 [[D1:d[0-9]+]]
Modified: llvm/branches/R600/test/CodeGen/ARM/fadds.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/fadds.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/fadds.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/fadds.ll Tue Oct 2 09:15:33 2012
@@ -10,14 +10,14 @@
}
; VFP2: test:
-; VFP2: vadd.f32 s0, s1, s0
+; VFP2: vadd.f32 s
; NFP1: test:
-; NFP1: vadd.f32 d0, d1, d0
+; NFP1: vadd.f32 d
; NFP0: test:
-; NFP0: vadd.f32 s0, s1, s0
+; NFP0: vadd.f32 s
; CORTEXA8: test:
-; CORTEXA8: vadd.f32 d0, d1, d0
+; CORTEXA8: vadd.f32 d
; CORTEXA9: test:
; CORTEXA9: vadd.f32 s{{.}}, s{{.}}, s{{.}}
Modified: llvm/branches/R600/test/CodeGen/ARM/fast-isel-pic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/fast-isel-pic.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/fast-isel-pic.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/fast-isel-pic.ll Tue Oct 2 09:15:33 2012
@@ -1,6 +1,8 @@
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=arm-apple-ios | FileCheck %s --check-prefix=ARM
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARMv7
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s --check-prefix=THUMB-ELF
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=armv7-none-linux-gnueabi | FileCheck %s --check-prefix=ARMv7-ELF
@g = global i32 0, align 4
@@ -10,6 +12,10 @@
; THUMB: movw [[reg0:r[0-9]+]],
; THUMB: movt [[reg0]],
; THUMB: add [[reg0]], pc
+; THUMB-ELF: LoadGV
+; THUMB-ELF: ldr.n r[[reg0:[0-9]+]],
+; THUMB-ELF: ldr.n r[[reg1:[0-9]+]],
+; THUMB-ELF: ldr r[[reg0]], [r[[reg1]], r[[reg0]]]
; ARM: LoadGV
; ARM: ldr [[reg1:r[0-9]+]],
; ARM: add [[reg1]], pc, [[reg1]]
@@ -17,6 +23,10 @@
; ARMv7: movw [[reg2:r[0-9]+]],
; ARMv7: movt [[reg2]],
; ARMv7: add [[reg2]], pc, [[reg2]]
+; ARMv7-ELF: LoadGV
+; ARMv7-ELF: ldr r[[reg2:[0-9]+]],
+; ARMv7-ELF: ldr r[[reg3:[0-9]+]],
+; ARMv7-ELF: ldr r[[reg2]], [r[[reg3]], r[[reg2]]]
%tmp = load i32* @g
ret i32 %tmp
}
@@ -30,6 +40,10 @@
; THUMB: movt r[[reg3]],
; THUMB: add r[[reg3]], pc
; THUMB: ldr r[[reg3]], [r[[reg3]]]
+; THUMB-ELF: LoadIndirectSymbol
+; THUMB-ELF: ldr.n r[[reg3:[0-9]+]],
+; THUMB-ELF: ldr.n r[[reg4:[0-9]+]],
+; THUMB-ELF: ldr r[[reg3]], [r[[reg4]], r[[reg3]]]
; ARM: LoadIndirectSymbol
; ARM: ldr [[reg4:r[0-9]+]],
; ARM: ldr [[reg4]], [pc, [[reg4]]]
@@ -38,6 +52,10 @@
; ARMv7: movt r[[reg5]],
; ARMv7: add r[[reg5]], pc, r[[reg5]]
; ARMv7: ldr r[[reg5]], [r[[reg5]]]
+; ARMv7-ELF: LoadIndirectSymbol
+; ARMv7-ELF: ldr r[[reg5:[0-9]+]],
+; ARMv7-ELF: ldr r[[reg6:[0-9]+]],
+; ARMv7-ELF: ldr r[[reg5]], [r[[reg6]], r[[reg5]]]
%tmp = load i32* @i
ret i32 %tmp
}
Modified: llvm/branches/R600/test/CodeGen/ARM/fdivs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/fdivs.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/fdivs.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/fdivs.ll Tue Oct 2 09:15:33 2012
@@ -10,14 +10,14 @@
}
; VFP2: test:
-; VFP2: vdiv.f32 s0, s1, s0
+; VFP2: vdiv.f32 s0, s2, s0
; NFP1: test:
-; NFP1: vdiv.f32 s0, s1, s0
+; NFP1: vdiv.f32 s0, s2, s0
; NFP0: test:
-; NFP0: vdiv.f32 s0, s1, s0
+; NFP0: vdiv.f32 s0, s2, s0
; CORTEXA8: test:
-; CORTEXA8: vdiv.f32 s0, s1, s0
+; CORTEXA8: vdiv.f32 s0, s2, s0
; CORTEXA9: test:
; CORTEXA9: vdiv.f32 s{{.}}, s{{.}}, s{{.}}
Modified: llvm/branches/R600/test/CodeGen/ARM/fmuls.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/fmuls.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/fmuls.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/fmuls.ll Tue Oct 2 09:15:33 2012
@@ -10,15 +10,15 @@
}
; VFP2: test:
-; VFP2: vmul.f32 s0, s1, s0
+; VFP2: vmul.f32 s
; NFP1: test:
-; NFP1: vmul.f32 d0, d1, d0
+; NFP1: vmul.f32 d
; NFP0: test:
-; NFP0: vmul.f32 s0, s1, s0
+; NFP0: vmul.f32 s
; CORTEXA8: test:
-; CORTEXA8: vmul.f32 d0, d1, d0
+; CORTEXA8: vmul.f32 d
; CORTEXA9: test:
; CORTEXA9: vmul.f32 s{{.}}, s{{.}}, s{{.}}
Modified: llvm/branches/R600/test/CodeGen/ARM/fp_convert.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/fp_convert.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/fp_convert.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/fp_convert.ll Tue Oct 2 09:15:33 2012
@@ -31,7 +31,7 @@
; VFP2: test3:
; VFP2: vcvt.f32.u32 s{{.}}, s{{.}}
; NEON: test3:
-; NEON: vcvt.f32.u32 d0, d0
+; NEON: vcvt.f32.u32 d
entry:
%0 = add i32 %a, %b
%1 = uitofp i32 %0 to float
@@ -42,7 +42,7 @@
; VFP2: test4:
; VFP2: vcvt.f32.s32 s{{.}}, s{{.}}
; NEON: test4:
-; NEON: vcvt.f32.s32 d0, d0
+; NEON: vcvt.f32.s32 d
entry:
%0 = add i32 %a, %b
%1 = sitofp i32 %0 to float
Modified: llvm/branches/R600/test/CodeGen/ARM/fsubs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/fsubs.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/fsubs.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/fsubs.ll Tue Oct 2 09:15:33 2012
@@ -8,6 +8,6 @@
ret float %0
}
-; VFP2: vsub.f32 s0, s1, s0
-; NFP1: vsub.f32 d0, d1, d0
-; NFP0: vsub.f32 s0, s1, s0
+; VFP2: vsub.f32 s
+; NFP1: vsub.f32 d
+; NFP0: vsub.f32 s
Modified: llvm/branches/R600/test/CodeGen/ARM/ifcvt1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/ifcvt1.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/ifcvt1.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/ifcvt1.ll Tue Oct 2 09:15:33 2012
@@ -1,17 +1,21 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s -check-prefix=SWIFT
define i32 @t1(i32 %a, i32 %b) {
-; CHECK: t1:
+; A8: t1:
+; SWIFT: t1:
%tmp2 = icmp eq i32 %a, 0
br i1 %tmp2, label %cond_false, label %cond_true
cond_true:
-; CHECK: subeq r0, r1, #1
+; A8: subeq r0, r1, #1
+; SWIFT: sub r0, r1, #1
%tmp5 = add i32 %b, 1
ret i32 %tmp5
cond_false:
-; CHECK: addne r0, r1, #1
+; A8: addne r0, r1, #1
+; SWIFT: addne r0, r1, #1
%tmp7 = add i32 %b, -1
ret i32 %tmp7
}
Added: llvm/branches/R600/test/CodeGen/ARM/ifcvt12.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/ifcvt12.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/ifcvt12.ll (added)
+++ llvm/branches/R600/test/CodeGen/ARM/ifcvt12.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+define i32 @f1(i32 %a, i32 %b, i32 %c) {
+; CHECK: f1:
+; CHECK: mlsne r0, r0, r1, r2
+ %tmp1 = icmp eq i32 %a, 0
+ br i1 %tmp1, label %cond_false, label %cond_true
+
+cond_true:
+ %tmp2 = mul i32 %a, %b
+ %tmp3 = sub i32 %c, %tmp2
+ ret i32 %tmp3
+
+cond_false:
+ ret i32 %a
+}
Modified: llvm/branches/R600/test/CodeGen/ARM/ifcvt5.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/ifcvt5.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/ifcvt5.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/ifcvt5.ll Tue Oct 2 09:15:33 2012
@@ -1,4 +1,6 @@
-; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=swift | FileCheck %s -check-prefix=SWIFT
+; rdar://8402126
@x = external global i32* ; <i32**> [#uses=1]
@@ -10,8 +12,12 @@
}
define i32 @t1(i32 %a, i32 %b) {
-; CHECK: t1:
-; CHECK: poplt {r7, pc}
+; A8: t1:
+; A8: poplt {r7, pc}
+
+; SWIFT: t1:
+; SWIFT: pop {r7, pc}
+; SWIFT: pop {r7, pc}
entry:
%tmp1 = icmp sgt i32 %a, 10 ; <i1> [#uses=1]
br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
Modified: llvm/branches/R600/test/CodeGen/ARM/ldr_post.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/ldr_post.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/ldr_post.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/ldr_post.ll Tue Oct 2 09:15:33 2012
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s
; CHECK: test1:
; CHECK: ldr {{.*, \[.*]}}, -r2
Modified: llvm/branches/R600/test/CodeGen/ARM/ldr_pre.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/ldr_pre.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/ldr_pre.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/ldr_pre.ll Tue Oct 2 09:15:33 2012
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s
; CHECK: test1:
; CHECK: ldr {{.*!}}
Modified: llvm/branches/R600/test/CodeGen/ARM/mls.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/mls.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/mls.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/mls.ll Tue Oct 2 09:15:33 2012
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+v6t2 -arm-use-mulops=false | FileCheck %s -check-prefix=NO_MULOPS
define i32 @f1(i32 %a, i32 %b, i32 %c) {
%tmp1 = mul i32 %a, %b
@@ -13,4 +14,15 @@
ret i32 %tmp2
}
+; CHECK: f1:
; CHECK: mls r0, r0, r1, r2
+; NO_MULOPS: f1:
+; NO_MULOPS: mul r0, r0, r1
+; NO_MULOPS-NEXT: sub r0, r2, r0
+
+; CHECK: f2:
+; CHECK: mul r0, r0, r1
+; CHECK-NEXT: sub r0, r0, r2
+; NO_MULOPS: f2:
+; NO_MULOPS: mul r0, r0, r1
+; NO_MULOPS-NEXT: sub r0, r0, r2
Added: llvm/branches/R600/test/CodeGen/ARM/neon-fma.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/neon-fma.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/neon-fma.ll (added)
+++ llvm/branches/R600/test/CodeGen/ARM/neon-fma.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -mcpu=swift | FileCheck %s
+
+; CHECK: test_v2f32
+; CHECK: vfma.f32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+
+define <2 x float> @test_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp {
+entry:
+ %call = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone
+ ret <2 x float> %call
+}
+
+; CHECK: test_v4f32
+; CHECK: vfma.f32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+
+define <4 x float> @test_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone ssp {
+entry:
+ %call = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone
+ ret <4 x float> %call
+}
+
+declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
Modified: llvm/branches/R600/test/CodeGen/ARM/neon_ld2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/neon_ld2.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/neon_ld2.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/neon_ld2.ll Tue Oct 2 09:15:33 2012
@@ -1,10 +1,16 @@
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s --check-prefix=SWIFT
; CHECK: t1
; CHECK: vld1.64
; CHECK: vld1.64
; CHECK: vadd.i64 q
; CHECK: vst1.64
+; SWIFT: t1
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
+; SWIFT: vadd.i64 q
+; SWIFT: vst1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
entry:
%0 = load <2 x i64>* %a, align 16 ; <<2 x i64>> [#uses=1]
@@ -21,6 +27,12 @@
; CHECK: vsub.i64 q
; CHECK: vmov r0, r1, d
; CHECK: vmov r2, r3, d
+; SWIFT: t2
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
+; SWIFT: vsub.i64 q
+; SWIFT: vmov r0, r1, d
+; SWIFT: vmov r2, r3, d
define <4 x i32> @t2(<2 x i64>* %a, <2 x i64>* %b) nounwind readonly {
entry:
%0 = load <2 x i64>* %a, align 16 ; <<2 x i64>> [#uses=1]
@@ -30,3 +42,18 @@
ret <4 x i32> %3
}
+; Limited alignment.
+; SWIFT: t3
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+}}
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+}}
+; SWIFT: vadd.i64 q
+; SWIFT: vst1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+}}
+define void @t3(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+entry:
+ %0 = load <2 x i64>* %a, align 8
+ %1 = load <2 x i64>* %b, align 8
+ %2 = add <2 x i64> %0, %1
+ %3 = bitcast <2 x i64> %2 to <4 x i32>
+ store <4 x i32> %3, <4 x i32>* %r, align 8
+ ret void
+}
Modified: llvm/branches/R600/test/CodeGen/ARM/opt-shuff-tstore.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/opt-shuff-tstore.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/opt-shuff-tstore.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/opt-shuff-tstore.ll Tue Oct 2 09:15:33 2012
@@ -2,7 +2,7 @@
; CHECK: func_4_8
; CHECK: vst1.32
-; CHECK-NEXT: bx lr
+; CHECK: bx lr
define void @func_4_8(<4 x i8> %param, <4 x i8>* %p) {
%r = add <4 x i8> %param, <i8 1, i8 2, i8 3, i8 4>
store <4 x i8> %r, <4 x i8>* %p
@@ -11,7 +11,7 @@
; CHECK: func_2_16
; CHECK: vst1.32
-; CHECK-NEXT: bx lr
+; CHECK: bx lr
define void @func_2_16(<2 x i16> %param, <2 x i16>* %p) {
%r = add <2 x i16> %param, <i16 1, i16 2>
store <2 x i16> %r, <2 x i16>* %p
Modified: llvm/branches/R600/test/CodeGen/ARM/reg_sequence.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/reg_sequence.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/reg_sequence.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/reg_sequence.ll Tue Oct 2 09:15:33 2012
@@ -124,7 +124,6 @@
return2:
; CHECK: %return2
; CHECK: vadd.i32
-; CHECK: vorr {{q[0-9]+}}, {{q[0-9]+}}
; CHECK-NOT: vmov
; CHECK: vst2.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
%tmp100 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1]
Modified: llvm/branches/R600/test/CodeGen/ARM/subreg-remat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/ARM/subreg-remat.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/ARM/subreg-remat.ll (original)
+++ llvm/branches/R600/test/CodeGen/ARM/subreg-remat.ll Tue Oct 2 09:15:33 2012
@@ -4,14 +4,14 @@
;
; The vector %v2 is built like this:
;
-; %vreg6:ssub_1<def> = VMOVSR %vreg0<kill>, pred:14, pred:%noreg, %vreg6<imp-def>; DPR_VFP2:%vreg6 GPR:%vreg0
+; %vreg6:ssub_1<def> = ...
; %vreg6:ssub_0<def> = VLDRS <cp#0>, 0, pred:14, pred:%noreg; mem:LD4[ConstantPool] DPR_VFP2:%vreg6
;
; When %vreg6 spills, the VLDRS constant pool load cannot be rematerialized
; since it implicitly reads the ssub_1 sub-register.
;
; CHECK: f1
-; CHECK: vmov s1, r0
+; CHECK: vmov d0, r0, r0
; CHECK: vldr s0, LCPI
; The vector must be spilled:
; CHECK: vstr d0,
Added: llvm/branches/R600/test/CodeGen/Mips/dsp-r1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/dsp-r1.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/dsp-r1.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/dsp-r1.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,1241 @@
+; RUN: llc -march=mipsel -mattr=+dsp < %s | FileCheck %s
+
+define i32 @test__builtin_mips_extr_w1(i32 %i0, i32, i64 %a0) nounwind {
+entry:
+; CHECK: extr.w
+
+ %1 = tail call i32 @llvm.mips.extr.w(i64 %a0, i32 15)
+ ret i32 %1
+}
+
+declare i32 @llvm.mips.extr.w(i64, i32) nounwind
+
+define i32 @test__builtin_mips_extr_w2(i32 %i0, i32, i64 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: extrv.w
+
+ %1 = tail call i32 @llvm.mips.extr.w(i64 %a0, i32 %a1)
+ ret i32 %1
+}
+
+define i32 @test__builtin_mips_extr_r_w1(i32 %i0, i32, i64 %a0) nounwind {
+entry:
+; CHECK: extr_r.w
+
+ %1 = tail call i32 @llvm.mips.extr.r.w(i64 %a0, i32 15)
+ ret i32 %1
+}
+
+declare i32 @llvm.mips.extr.r.w(i64, i32) nounwind
+
+define i32 @test__builtin_mips_extr_s_h1(i32 %i0, i32, i64 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: extrv_s.h
+
+ %1 = tail call i32 @llvm.mips.extr.s.h(i64 %a0, i32 %a1)
+ ret i32 %1
+}
+
+declare i32 @llvm.mips.extr.s.h(i64, i32) nounwind
+
+define i32 @test__builtin_mips_extr_rs_w1(i32 %i0, i32, i64 %a0) nounwind {
+entry:
+; CHECK: extr_rs.w
+
+ %1 = tail call i32 @llvm.mips.extr.rs.w(i64 %a0, i32 15)
+ ret i32 %1
+}
+
+declare i32 @llvm.mips.extr.rs.w(i64, i32) nounwind
+
+define i32 @test__builtin_mips_extr_rs_w2(i32 %i0, i32, i64 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: extrv_rs.w
+
+ %1 = tail call i32 @llvm.mips.extr.rs.w(i64 %a0, i32 %a1)
+ ret i32 %1
+}
+
+define i32 @test__builtin_mips_extr_s_h2(i32 %i0, i32, i64 %a0) nounwind {
+entry:
+; CHECK: extr_s.h
+
+ %1 = tail call i32 @llvm.mips.extr.s.h(i64 %a0, i32 15)
+ ret i32 %1
+}
+
+define i32 @test__builtin_mips_extr_r_w2(i32 %i0, i32, i64 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: extrv_r.w
+
+ %1 = tail call i32 @llvm.mips.extr.r.w(i64 %a0, i32 %a1)
+ ret i32 %1
+}
+
+define i32 @test__builtin_mips_extp1(i32 %i0, i32, i64 %a0) nounwind {
+entry:
+; CHECK: extp ${{[0-9]+}}
+
+ %1 = tail call i32 @llvm.mips.extp(i64 %a0, i32 15)
+ ret i32 %1
+}
+
+declare i32 @llvm.mips.extp(i64, i32) nounwind
+
+define i32 @test__builtin_mips_extp2(i32 %i0, i32, i64 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: extpv
+
+ %1 = tail call i32 @llvm.mips.extp(i64 %a0, i32 %a1)
+ ret i32 %1
+}
+
+define i32 @test__builtin_mips_extpdp1(i32 %i0, i32, i64 %a0) nounwind {
+entry:
+; CHECK: extpdp ${{[0-9]+}}
+
+ %1 = tail call i32 @llvm.mips.extpdp(i64 %a0, i32 15)
+ ret i32 %1
+}
+
+declare i32 @llvm.mips.extpdp(i64, i32) nounwind
+
+define i32 @test__builtin_mips_extpdp2(i32 %i0, i32, i64 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: extpdpv
+
+ %1 = tail call i32 @llvm.mips.extpdp(i64 %a0, i32 %a1)
+ ret i32 %1
+}
+
+define i64 @test__builtin_mips_dpau_h_qbl1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone {
+entry:
+; CHECK: dpau.h.qbl
+
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = bitcast i32 %a2.coerce to <4 x i8>
+ %3 = tail call i64 @llvm.mips.dpau.h.qbl(i64 %a0, <4 x i8> %1, <4 x i8> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpau.h.qbl(i64, <4 x i8>, <4 x i8>) nounwind readnone
+
+define i64 @test__builtin_mips_dpau_h_qbr1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone {
+entry:
+; CHECK: dpau.h.qbr
+
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = bitcast i32 %a2.coerce to <4 x i8>
+ %3 = tail call i64 @llvm.mips.dpau.h.qbr(i64 %a0, <4 x i8> %1, <4 x i8> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpau.h.qbr(i64, <4 x i8>, <4 x i8>) nounwind readnone
+
+define i64 @test__builtin_mips_dpsu_h_qbl1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone {
+entry:
+; CHECK: dpsu.h.qbl
+
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = bitcast i32 %a2.coerce to <4 x i8>
+ %3 = tail call i64 @llvm.mips.dpsu.h.qbl(i64 %a0, <4 x i8> %1, <4 x i8> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpsu.h.qbl(i64, <4 x i8>, <4 x i8>) nounwind readnone
+
+define i64 @test__builtin_mips_dpsu_h_qbr1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone {
+entry:
+; CHECK: dpsu.h.qbr
+
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = bitcast i32 %a2.coerce to <4 x i8>
+ %3 = tail call i64 @llvm.mips.dpsu.h.qbr(i64 %a0, <4 x i8> %1, <4 x i8> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpsu.h.qbr(i64, <4 x i8>, <4 x i8>) nounwind readnone
+
+define i64 @test__builtin_mips_dpaq_s_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind {
+entry:
+; CHECK: dpaq_s.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.dpaq.s.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpaq.s.w.ph(i64, <2 x i16>, <2 x i16>) nounwind
+
+define i64 @test__builtin_mips_dpaq_sa_l_w1(i32 %i0, i32, i64 %a0, i32 %a1, i32 %a2) nounwind {
+entry:
+; CHECK: dpaq_sa.l.w
+
+ %1 = tail call i64 @llvm.mips.dpaq.sa.l.w(i64 %a0, i32 %a1, i32 %a2)
+ ret i64 %1
+}
+
+declare i64 @llvm.mips.dpaq.sa.l.w(i64, i32, i32) nounwind
+
+define i64 @test__builtin_mips_dpsq_s_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind {
+entry:
+; CHECK: dpsq_s.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.dpsq.s.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpsq.s.w.ph(i64, <2 x i16>, <2 x i16>) nounwind
+
+define i64 @test__builtin_mips_dpsq_sa_l_w1(i32 %i0, i32, i64 %a0, i32 %a1, i32 %a2) nounwind {
+entry:
+; CHECK: dpsq_sa.l.w
+
+ %1 = tail call i64 @llvm.mips.dpsq.sa.l.w(i64 %a0, i32 %a1, i32 %a2)
+ ret i64 %1
+}
+
+declare i64 @llvm.mips.dpsq.sa.l.w(i64, i32, i32) nounwind
+
+define i64 @test__builtin_mips_mulsaq_s_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind {
+entry:
+; CHECK: mulsaq_s.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.mulsaq.s.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.mulsaq.s.w.ph(i64, <2 x i16>, <2 x i16>) nounwind
+
+define i64 @test__builtin_mips_maq_s_w_phl1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind {
+entry:
+; CHECK: maq_s.w.phl
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.maq.s.w.phl(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.maq.s.w.phl(i64, <2 x i16>, <2 x i16>) nounwind
+
+define i64 @test__builtin_mips_maq_s_w_phr1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind {
+entry:
+; CHECK: maq_s.w.phr
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.maq.s.w.phr(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.maq.s.w.phr(i64, <2 x i16>, <2 x i16>) nounwind
+
+define i64 @test__builtin_mips_maq_sa_w_phl1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind {
+entry:
+; CHECK: maq_sa.w.phl
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.maq.sa.w.phl(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.maq.sa.w.phl(i64, <2 x i16>, <2 x i16>) nounwind
+
+define i64 @test__builtin_mips_maq_sa_w_phr1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind {
+entry:
+; CHECK: maq_sa.w.phr
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.maq.sa.w.phr(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.maq.sa.w.phr(i64, <2 x i16>, <2 x i16>) nounwind
+
+define i64 @test__builtin_mips_shilo1(i32 %i0, i32, i64 %a0) nounwind readnone {
+entry:
+; CHECK: shilo $ac{{[0-9]}}
+
+ %1 = tail call i64 @llvm.mips.shilo(i64 %a0, i32 0)
+ ret i64 %1
+}
+
+declare i64 @llvm.mips.shilo(i64, i32) nounwind readnone
+
+define i64 @test__builtin_mips_shilo2(i32 %i0, i32, i64 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: shilov
+
+ %1 = tail call i64 @llvm.mips.shilo(i64 %a0, i32 %a1)
+ ret i64 %1
+}
+
+define i64 @test__builtin_mips_mthlip1(i32 %i0, i32, i64 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: mthlip ${{[0-9]+}}
+
+ %1 = tail call i64 @llvm.mips.mthlip(i64 %a0, i32 %a1)
+ ret i64 %1
+}
+
+declare i64 @llvm.mips.mthlip(i64, i32) nounwind
+
+define i32 @test__builtin_mips_bposge321(i32 %i0) nounwind readonly {
+entry:
+; CHECK: bposge32 $BB{{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.bposge32()
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.bposge32() nounwind readonly
+
+define i64 @test__builtin_mips_madd1(i32 %i0, i32, i64 %a0, i32 %a1, i32 %a2) nounwind readnone {
+entry:
+; CHECK: madd $ac{{[0-9]}}
+
+ %1 = tail call i64 @llvm.mips.madd(i64 %a0, i32 %a1, i32 %a2)
+ ret i64 %1
+}
+
+declare i64 @llvm.mips.madd(i64, i32, i32) nounwind readnone
+
+define i64 @test__builtin_mips_maddu1(i32 %i0, i32, i64 %a0, i32 %a1, i32 %a2) nounwind readnone {
+entry:
+; CHECK: maddu $ac{{[0-9]}}
+
+ %1 = tail call i64 @llvm.mips.maddu(i64 %a0, i32 %a1, i32 %a2)
+ ret i64 %1
+}
+
+declare i64 @llvm.mips.maddu(i64, i32, i32) nounwind readnone
+
+define i64 @test__builtin_mips_msub1(i32 %i0, i32, i64 %a0, i32 %a1, i32 %a2) nounwind readnone {
+entry:
+; CHECK: msub $ac{{[0-9]}}
+
+ %1 = tail call i64 @llvm.mips.msub(i64 %a0, i32 %a1, i32 %a2)
+ ret i64 %1
+}
+
+declare i64 @llvm.mips.msub(i64, i32, i32) nounwind readnone
+
+define i64 @test__builtin_mips_msubu1(i32 %i0, i32, i64 %a0, i32 %a1, i32 %a2) nounwind readnone {
+entry:
+; CHECK: msubu $ac{{[0-9]}}
+
+ %1 = tail call i64 @llvm.mips.msubu(i64 %a0, i32 %a1, i32 %a2)
+ ret i64 %1
+}
+
+declare i64 @llvm.mips.msubu(i64, i32, i32) nounwind readnone
+
+define i64 @test__builtin_mips_mult1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: mult $ac{{[0-9]}}
+
+ %0 = tail call i64 @llvm.mips.mult(i32 %a0, i32 %a1)
+ ret i64 %0
+}
+
+declare i64 @llvm.mips.mult(i32, i32) nounwind readnone
+
+define i64 @test__builtin_mips_multu1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: multu $ac{{[0-9]}}
+
+ %0 = tail call i64 @llvm.mips.multu(i32 %a0, i32 %a1)
+ ret i64 %0
+}
+
+declare i64 @llvm.mips.multu(i32, i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_addq_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: addq.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.addq.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.addq.ph(<2 x i16>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_addq_s_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: addq_s.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.addq.s.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.addq.s.ph(<2 x i16>, <2 x i16>) nounwind
+
+define i32 @test__builtin_mips_addq_s_w1(i32 %i0, i32 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: addq_s.w
+
+ %0 = tail call i32 @llvm.mips.addq.s.w(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.addq.s.w(i32, i32) nounwind
+
+define { i32 } @test__builtin_mips_addu_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: addu.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call <4 x i8> @llvm.mips.addu.qb(<4 x i8> %0, <4 x i8> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.addu.qb(<4 x i8>, <4 x i8>) nounwind
+
+define { i32 } @test__builtin_mips_addu_s_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: addu_s.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call <4 x i8> @llvm.mips.addu.s.qb(<4 x i8> %0, <4 x i8> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.addu.s.qb(<4 x i8>, <4 x i8>) nounwind
+
+define { i32 } @test__builtin_mips_subq_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: subq.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.subq.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.subq.ph(<2 x i16>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_subq_s_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: subq_s.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.subq.s.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.subq.s.ph(<2 x i16>, <2 x i16>) nounwind
+
+define i32 @test__builtin_mips_subq_s_w1(i32 %i0, i32 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: subq_s.w
+
+ %0 = tail call i32 @llvm.mips.subq.s.w(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.subq.s.w(i32, i32) nounwind
+
+define { i32 } @test__builtin_mips_subu_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: subu.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call <4 x i8> @llvm.mips.subu.qb(<4 x i8> %0, <4 x i8> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.subu.qb(<4 x i8>, <4 x i8>) nounwind
+
+define { i32 } @test__builtin_mips_subu_s_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: subu_s.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call <4 x i8> @llvm.mips.subu.s.qb(<4 x i8> %0, <4 x i8> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.subu.s.qb(<4 x i8>, <4 x i8>) nounwind
+
+define i32 @test__builtin_mips_addsc1(i32 %i0, i32 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: addsc ${{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.addsc(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.addsc(i32, i32) nounwind
+
+define i32 @test__builtin_mips_addwc1(i32 %i0, i32 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: addwc ${{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.addwc(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.addwc(i32, i32) nounwind
+
+define i32 @test__builtin_mips_modsub1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: modsub ${{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.modsub(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.modsub(i32, i32) nounwind readnone
+
+define i32 @test__builtin_mips_raddu_w_qb1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: raddu.w.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call i32 @llvm.mips.raddu.w.qb(<4 x i8> %0)
+ ret i32 %1
+}
+
+declare i32 @llvm.mips.raddu.w.qb(<4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_muleu_s_ph_qbl1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: muleu_s.ph.qbl
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.muleu.s.ph.qbl(<4 x i8> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.muleu.s.ph.qbl(<4 x i8>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_muleu_s_ph_qbr1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: muleu_s.ph.qbr
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.muleu.s.ph.qbr(<4 x i8> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.muleu.s.ph.qbr(<4 x i8>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_mulq_rs_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: mulq_rs.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.mulq.rs.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.mulq.rs.ph(<2 x i16>, <2 x i16>) nounwind
+
+define i32 @test__builtin_mips_muleq_s_w_phl1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: muleq_s.w.phl
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call i32 @llvm.mips.muleq.s.w.phl(<2 x i16> %0, <2 x i16> %1)
+ ret i32 %2
+}
+
+declare i32 @llvm.mips.muleq.s.w.phl(<2 x i16>, <2 x i16>) nounwind
+
+define i32 @test__builtin_mips_muleq_s_w_phr1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: muleq_s.w.phr
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call i32 @llvm.mips.muleq.s.w.phr(<2 x i16> %0, <2 x i16> %1)
+ ret i32 %2
+}
+
+declare i32 @llvm.mips.muleq.s.w.phr(<2 x i16>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_precrq_qb_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone {
+entry:
+; CHECK: precrq.qb.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <4 x i8> @llvm.mips.precrq.qb.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.precrq.qb.ph(<2 x i16>, <2 x i16>) nounwind readnone
+
+define { i32 } @test__builtin_mips_precrq_ph_w1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: precrq.ph.w
+
+ %0 = tail call <2 x i16> @llvm.mips.precrq.ph.w(i32 %a0, i32 %a1)
+ %1 = bitcast <2 x i16> %0 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.precrq.ph.w(i32, i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_precrq_rs_ph_w1(i32 %i0, i32 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: precrq_rs.ph.w
+
+ %0 = tail call <2 x i16> @llvm.mips.precrq.rs.ph.w(i32 %a0, i32 %a1)
+ %1 = bitcast <2 x i16> %0 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.precrq.rs.ph.w(i32, i32) nounwind
+
+define { i32 } @test__builtin_mips_precrqu_s_qb_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: precrqu_s.qb.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <4 x i8> @llvm.mips.precrqu.s.qb.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.precrqu.s.qb.ph(<2 x i16>, <2 x i16>) nounwind
+
+
+define i32 @test__builtin_mips_cmpu_eq_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmpu.eq.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ tail call void @llvm.mips.cmpu.eq.qb(<4 x i8> %0, <4 x i8> %1)
+ %2 = tail call i32 @llvm.mips.rddsp(i32 31)
+ ret i32 %2
+}
+
+declare void @llvm.mips.cmpu.eq.qb(<4 x i8>, <4 x i8>) nounwind
+
+declare i32 @llvm.mips.rddsp(i32) nounwind readonly
+
+define i32 @test__builtin_mips_cmpu_lt_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmpu.lt.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ tail call void @llvm.mips.cmpu.lt.qb(<4 x i8> %0, <4 x i8> %1)
+ %2 = tail call i32 @llvm.mips.rddsp(i32 31)
+ ret i32 %2
+}
+
+declare void @llvm.mips.cmpu.lt.qb(<4 x i8>, <4 x i8>) nounwind
+
+define i32 @test__builtin_mips_cmpu_le_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmpu.le.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ tail call void @llvm.mips.cmpu.le.qb(<4 x i8> %0, <4 x i8> %1)
+ %2 = tail call i32 @llvm.mips.rddsp(i32 31)
+ ret i32 %2
+}
+
+declare void @llvm.mips.cmpu.le.qb(<4 x i8>, <4 x i8>) nounwind
+
+define i32 @test__builtin_mips_cmpgu_eq_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmpgu.eq.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call i32 @llvm.mips.cmpgu.eq.qb(<4 x i8> %0, <4 x i8> %1)
+ ret i32 %2
+}
+
+declare i32 @llvm.mips.cmpgu.eq.qb(<4 x i8>, <4 x i8>) nounwind
+
+define i32 @test__builtin_mips_cmpgu_lt_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmpgu.lt.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call i32 @llvm.mips.cmpgu.lt.qb(<4 x i8> %0, <4 x i8> %1)
+ ret i32 %2
+}
+
+declare i32 @llvm.mips.cmpgu.lt.qb(<4 x i8>, <4 x i8>) nounwind
+
+define i32 @test__builtin_mips_cmpgu_le_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmpgu.le.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call i32 @llvm.mips.cmpgu.le.qb(<4 x i8> %0, <4 x i8> %1)
+ ret i32 %2
+}
+
+declare i32 @llvm.mips.cmpgu.le.qb(<4 x i8>, <4 x i8>) nounwind
+
+define i32 @test__builtin_mips_cmp_eq_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmp.eq.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ tail call void @llvm.mips.cmp.eq.ph(<2 x i16> %0, <2 x i16> %1)
+ %2 = tail call i32 @llvm.mips.rddsp(i32 31)
+ ret i32 %2
+}
+
+declare void @llvm.mips.cmp.eq.ph(<2 x i16>, <2 x i16>) nounwind
+
+define i32 @test__builtin_mips_cmp_lt_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmp.lt.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ tail call void @llvm.mips.cmp.lt.ph(<2 x i16> %0, <2 x i16> %1)
+ %2 = tail call i32 @llvm.mips.rddsp(i32 31)
+ ret i32 %2
+}
+
+declare void @llvm.mips.cmp.lt.ph(<2 x i16>, <2 x i16>) nounwind
+
+define i32 @test__builtin_mips_cmp_le_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmp.le.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ tail call void @llvm.mips.cmp.le.ph(<2 x i16> %0, <2 x i16> %1)
+ %2 = tail call i32 @llvm.mips.rddsp(i32 31)
+ ret i32 %2
+}
+
+declare void @llvm.mips.cmp.le.ph(<2 x i16>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_pick_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readonly {
+entry:
+; CHECK: pick.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call <4 x i8> @llvm.mips.pick.qb(<4 x i8> %0, <4 x i8> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.pick.qb(<4 x i8>, <4 x i8>) nounwind readonly
+
+define { i32 } @test__builtin_mips_pick_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readonly {
+entry:
+; CHECK: pick.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.pick.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.pick.ph(<2 x i16>, <2 x i16>) nounwind readonly
+
+define { i32 } @test__builtin_mips_packrl_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone {
+entry:
+; CHECK: packrl.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.packrl.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.packrl.ph(<2 x i16>, <2 x i16>) nounwind readnone
+
+define i32 @test__builtin_mips_rddsp1(i32 %i0) nounwind readonly {
+entry:
+; CHECK: rddsp ${{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.rddsp(i32 31)
+ ret i32 %0
+}
+
+define { i32 } @test__builtin_mips_shll_qb1(i32 %i0, i32 %a0.coerce) nounwind {
+entry:
+; CHECK: shll.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <4 x i8> @llvm.mips.shll.qb(<4 x i8> %0, i32 3)
+ %2 = bitcast <4 x i8> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.shll.qb(<4 x i8>, i32) nounwind
+
+define { i32 } @test__builtin_mips_shll_qb2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind {
+entry:
+; CHECK: shllv.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <4 x i8> @llvm.mips.shll.qb(<4 x i8> %0, i32 %a1)
+ %2 = bitcast <4 x i8> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+define { i32 } @test__builtin_mips_shll_ph1(i32 %i0, i32 %a0.coerce) nounwind {
+entry:
+; CHECK: shll.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call <2 x i16> @llvm.mips.shll.ph(<2 x i16> %0, i32 7)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.shll.ph(<2 x i16>, i32) nounwind
+
+define { i32 } @test__builtin_mips_shll_ph2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind {
+entry:
+; CHECK: shllv.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call <2 x i16> @llvm.mips.shll.ph(<2 x i16> %0, i32 %a1)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+define { i32 } @test__builtin_mips_shll_s_ph1(i32 %i0, i32 %a0.coerce) nounwind {
+entry:
+; CHECK: shll_s.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call <2 x i16> @llvm.mips.shll.s.ph(<2 x i16> %0, i32 7)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.shll.s.ph(<2 x i16>, i32) nounwind
+
+define { i32 } @test__builtin_mips_shll_s_ph2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind {
+entry:
+; CHECK: shllv_s.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call <2 x i16> @llvm.mips.shll.s.ph(<2 x i16> %0, i32 %a1)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+define i32 @test__builtin_mips_shll_s_w1(i32 %i0, i32 %a0) nounwind {
+entry:
+; CHECK: shll_s.w
+
+ %0 = tail call i32 @llvm.mips.shll.s.w(i32 %a0, i32 15)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.shll.s.w(i32, i32) nounwind
+
+define i32 @test__builtin_mips_shll_s_w2(i32 %i0, i32 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: shllv_s.w
+
+ %0 = tail call i32 @llvm.mips.shll.s.w(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+define { i32 } @test__builtin_mips_shrl_qb1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: shrl.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <4 x i8> @llvm.mips.shrl.qb(<4 x i8> %0, i32 3)
+ %2 = bitcast <4 x i8> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.shrl.qb(<4 x i8>, i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_shrl_qb2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind readnone {
+entry:
+; CHECK: shrlv.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <4 x i8> @llvm.mips.shrl.qb(<4 x i8> %0, i32 %a1)
+ %2 = bitcast <4 x i8> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+define { i32 } @test__builtin_mips_shra_ph1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: shra.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call <2 x i16> @llvm.mips.shra.ph(<2 x i16> %0, i32 7)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.shra.ph(<2 x i16>, i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_shra_ph2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind readnone {
+entry:
+; CHECK: shrav.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call <2 x i16> @llvm.mips.shra.ph(<2 x i16> %0, i32 %a1)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+define { i32 } @test__builtin_mips_shra_r_ph1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: shra_r.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call <2 x i16> @llvm.mips.shra.r.ph(<2 x i16> %0, i32 7)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.shra.r.ph(<2 x i16>, i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_shra_r_ph2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind readnone {
+entry:
+; CHECK: shrav_r.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call <2 x i16> @llvm.mips.shra.r.ph(<2 x i16> %0, i32 %a1)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+define i32 @test__builtin_mips_shra_r_w1(i32 %i0, i32 %a0) nounwind readnone {
+entry:
+; CHECK: shra_r.w
+
+ %0 = tail call i32 @llvm.mips.shra.r.w(i32 %a0, i32 15)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.shra.r.w(i32, i32) nounwind readnone
+
+define i32 @test__builtin_mips_shra_r_w2(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: shrav_r.w
+
+ %0 = tail call i32 @llvm.mips.shra.r.w(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+define { i32 } @test__builtin_mips_absq_s_ph1(i32 %i0, i32 %a0.coerce) nounwind {
+entry:
+; CHECK: absq_s.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call <2 x i16> @llvm.mips.absq.s.ph(<2 x i16> %0)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.absq.s.ph(<2 x i16>) nounwind
+
+define i32 @test__builtin_mips_absq_s_w1(i32 %i0, i32 %a0) nounwind {
+entry:
+; CHECK: absq_s.w
+
+ %0 = tail call i32 @llvm.mips.absq.s.w(i32 %a0)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.absq.s.w(i32) nounwind
+
+define i32 @test__builtin_mips_preceq_w_phl1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: preceq.w.phl
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call i32 @llvm.mips.preceq.w.phl(<2 x i16> %0)
+ ret i32 %1
+}
+
+declare i32 @llvm.mips.preceq.w.phl(<2 x i16>) nounwind readnone
+
+define i32 @test__builtin_mips_preceq_w_phr1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: preceq.w.phr
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call i32 @llvm.mips.preceq.w.phr(<2 x i16> %0)
+ ret i32 %1
+}
+
+declare i32 @llvm.mips.preceq.w.phr(<2 x i16>) nounwind readnone
+
+define { i32 } @test__builtin_mips_precequ_ph_qbl1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: precequ.ph.qbl
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <2 x i16> @llvm.mips.precequ.ph.qbl(<4 x i8> %0)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.precequ.ph.qbl(<4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_precequ_ph_qbr1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: precequ.ph.qbr
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <2 x i16> @llvm.mips.precequ.ph.qbr(<4 x i8> %0)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.precequ.ph.qbr(<4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_precequ_ph_qbla1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: precequ.ph.qbla
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <2 x i16> @llvm.mips.precequ.ph.qbla(<4 x i8> %0)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.precequ.ph.qbla(<4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_precequ_ph_qbra1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: precequ.ph.qbra
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <2 x i16> @llvm.mips.precequ.ph.qbra(<4 x i8> %0)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.precequ.ph.qbra(<4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_preceu_ph_qbl1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: preceu.ph.qbl
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <2 x i16> @llvm.mips.preceu.ph.qbl(<4 x i8> %0)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.preceu.ph.qbl(<4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_preceu_ph_qbr1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: preceu.ph.qbr
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <2 x i16> @llvm.mips.preceu.ph.qbr(<4 x i8> %0)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.preceu.ph.qbr(<4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_preceu_ph_qbla1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: preceu.ph.qbla
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <2 x i16> @llvm.mips.preceu.ph.qbla(<4 x i8> %0)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.preceu.ph.qbla(<4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_preceu_ph_qbra1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: preceu.ph.qbra
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <2 x i16> @llvm.mips.preceu.ph.qbra(<4 x i8> %0)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.preceu.ph.qbra(<4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_repl_qb1(i32 %i0) nounwind readnone {
+entry:
+; CHECK: repl.qb
+
+ %0 = tail call <4 x i8> @llvm.mips.repl.qb(i32 127)
+ %1 = bitcast <4 x i8> %0 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.repl.qb(i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_repl_qb2(i32 %i0, i32 %a0) nounwind readnone {
+entry:
+; CHECK: replv.qb
+
+ %0 = tail call <4 x i8> @llvm.mips.repl.qb(i32 %a0)
+ %1 = bitcast <4 x i8> %0 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+ ret { i32 } %.fca.0.insert
+}
+
+define { i32 } @test__builtin_mips_repl_ph1(i32 %i0) nounwind readnone {
+entry:
+; CHECK: repl.ph
+
+ %0 = tail call <2 x i16> @llvm.mips.repl.ph(i32 0)
+ %1 = bitcast <2 x i16> %0 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.repl.ph(i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_repl_ph2(i32 %i0, i32 %a0) nounwind readnone {
+entry:
+; CHECK: replv.ph
+
+ %0 = tail call <2 x i16> @llvm.mips.repl.ph(i32 %a0)
+ %1 = bitcast <2 x i16> %0 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+ ret { i32 } %.fca.0.insert
+}
+
+define i32 @test__builtin_mips_bitrev1(i32 %i0, i32 %a0) nounwind readnone {
+entry:
+; CHECK: bitrev ${{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.bitrev(i32 %a0)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.bitrev(i32) nounwind readnone
+
+define i32 @test__builtin_mips_lbux1(i32 %i0, i8* %a0, i32 %a1) nounwind readonly {
+entry:
+; CHECK: lbux ${{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.lbux(i8* %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.lbux(i8*, i32) nounwind readonly
+
+define i32 @test__builtin_mips_lhx1(i32 %i0, i8* %a0, i32 %a1) nounwind readonly {
+entry:
+; CHECK: lhx ${{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.lhx(i8* %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.lhx(i8*, i32) nounwind readonly
+
+define i32 @test__builtin_mips_lwx1(i32 %i0, i8* %a0, i32 %a1) nounwind readonly {
+entry:
+; CHECK: lwx ${{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.lwx(i8* %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.lwx(i8*, i32) nounwind readonly
+
+define i32 @test__builtin_mips_wrdsp1(i32 %i0, i32 %a0) nounwind {
+entry:
+; CHECK: wrdsp ${{[0-9]+}}
+
+ tail call void @llvm.mips.wrdsp(i32 %a0, i32 31)
+ %0 = tail call i32 @llvm.mips.rddsp(i32 31)
+ ret i32 %0
+}
+
+declare void @llvm.mips.wrdsp(i32, i32) nounwind
Added: llvm/branches/R600/test/CodeGen/Mips/dsp-r2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/dsp-r2.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/dsp-r2.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/dsp-r2.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,568 @@
+; RUN: llc -march=mipsel -mattr=+dspr2 < %s | FileCheck %s
+
+define i64 @test__builtin_mips_dpa_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone {
+entry:
+; CHECK: dpa.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.dpa.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpa.w.ph(i64, <2 x i16>, <2 x i16>) nounwind readnone
+
+define i64 @test__builtin_mips_dps_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone {
+entry:
+; CHECK: dps.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.dps.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dps.w.ph(i64, <2 x i16>, <2 x i16>) nounwind readnone
+
+define i64 @test__builtin_mips_mulsa_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone {
+entry:
+; CHECK: mulsa.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.mulsa.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.mulsa.w.ph(i64, <2 x i16>, <2 x i16>) nounwind readnone
+
+define i64 @test__builtin_mips_dpax_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone {
+entry:
+; CHECK: dpax.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.dpax.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpax.w.ph(i64, <2 x i16>, <2 x i16>) nounwind readnone
+
+define i64 @test__builtin_mips_dpsx_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind readnone {
+entry:
+; CHECK: dpsx.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.dpsx.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpsx.w.ph(i64, <2 x i16>, <2 x i16>) nounwind readnone
+
+define i64 @test__builtin_mips_dpaqx_s_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind {
+entry:
+; CHECK: dpaqx_s.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.dpaqx.s.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpaqx.s.w.ph(i64, <2 x i16>, <2 x i16>) nounwind
+
+define i64 @test__builtin_mips_dpaqx_sa_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind {
+entry:
+; CHECK: dpaqx_sa.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.dpaqx.sa.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpaqx.sa.w.ph(i64, <2 x i16>, <2 x i16>) nounwind
+
+define i64 @test__builtin_mips_dpsqx_s_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind {
+entry:
+; CHECK: dpsqx_s.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.dpsqx.s.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpsqx.s.w.ph(i64, <2 x i16>, <2 x i16>) nounwind
+
+define i64 @test__builtin_mips_dpsqx_sa_w_ph1(i32 %i0, i32, i64 %a0, i32 %a1.coerce, i32 %a2.coerce) nounwind {
+entry:
+; CHECK: dpsqx_sa.w.ph
+
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = bitcast i32 %a2.coerce to <2 x i16>
+ %3 = tail call i64 @llvm.mips.dpsqx.sa.w.ph(i64 %a0, <2 x i16> %1, <2 x i16> %2)
+ ret i64 %3
+}
+
+declare i64 @llvm.mips.dpsqx.sa.w.ph(i64, <2 x i16>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_addu_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: addu.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.addu.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.addu.ph(<2 x i16>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_addu_s_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: addu_s.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.addu.s.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.addu.s.ph(<2 x i16>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_mulq_s_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: mulq_s.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.mulq.s.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.mulq.s.ph(<2 x i16>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_subu_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: subu.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.subu.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.subu.ph(<2 x i16>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_subu_s_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: subu_s.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.subu.s.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.subu.s.ph(<2 x i16>, <2 x i16>) nounwind
+
+define i32 @test__builtin_mips_cmpgdu_eq_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmpgdu.eq.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call i32 @llvm.mips.cmpgdu.eq.qb(<4 x i8> %0, <4 x i8> %1)
+ ret i32 %2
+}
+
+declare i32 @llvm.mips.cmpgdu.eq.qb(<4 x i8>, <4 x i8>) nounwind
+
+define i32 @test__builtin_mips_cmpgdu_lt_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmpgdu.lt.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call i32 @llvm.mips.cmpgdu.lt.qb(<4 x i8> %0, <4 x i8> %1)
+ ret i32 %2
+}
+
+declare i32 @llvm.mips.cmpgdu.lt.qb(<4 x i8>, <4 x i8>) nounwind
+
+define i32 @test__builtin_mips_cmpgdu_le_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: cmpgdu.le.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call i32 @llvm.mips.cmpgdu.le.qb(<4 x i8> %0, <4 x i8> %1)
+ ret i32 %2
+}
+
+declare i32 @llvm.mips.cmpgdu.le.qb(<4 x i8>, <4 x i8>) nounwind
+
+define { i32 } @test__builtin_mips_precr_qb_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: precr.qb.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <4 x i8> @llvm.mips.precr.qb.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.precr.qb.ph(<2 x i16>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_precr_sra_ph_w1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: precr_sra.ph.w
+
+ %0 = tail call <2 x i16> @llvm.mips.precr.sra.ph.w(i32 %a0, i32 %a1, i32 15)
+ %1 = bitcast <2 x i16> %0 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.precr.sra.ph.w(i32, i32, i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_precr_sra_r_ph_w1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: precr_sra_r.ph.w
+
+ %0 = tail call <2 x i16> @llvm.mips.precr.sra.r.ph.w(i32 %a0, i32 %a1, i32 15)
+ %1 = bitcast <2 x i16> %0 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.precr.sra.r.ph.w(i32, i32, i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_shra_qb1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: shra.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <4 x i8> @llvm.mips.shra.qb(<4 x i8> %0, i32 3)
+ %2 = bitcast <4 x i8> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.shra.qb(<4 x i8>, i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_shra_r_qb1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: shra_r.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <4 x i8> @llvm.mips.shra.r.qb(<4 x i8> %0, i32 3)
+ %2 = bitcast <4 x i8> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.shra.r.qb(<4 x i8>, i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_shra_qb2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind readnone {
+entry:
+; CHECK: shrav.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <4 x i8> @llvm.mips.shra.qb(<4 x i8> %0, i32 %a1)
+ %2 = bitcast <4 x i8> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+define { i32 } @test__builtin_mips_shra_r_qb2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind readnone {
+entry:
+; CHECK: shrav_r.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <4 x i8> @llvm.mips.shra.r.qb(<4 x i8> %0, i32 %a1)
+ %2 = bitcast <4 x i8> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+define { i32 } @test__builtin_mips_shrl_ph1(i32 %i0, i32 %a0.coerce) nounwind readnone {
+entry:
+; CHECK: shrl.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call <2 x i16> @llvm.mips.shrl.ph(<2 x i16> %0, i32 7)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.shrl.ph(<2 x i16>, i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_shrl_ph2(i32 %i0, i32 %a0.coerce, i32 %a1) nounwind readnone {
+entry:
+; CHECK: shrlv.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = tail call <2 x i16> @llvm.mips.shrl.ph(<2 x i16> %0, i32 %a1)
+ %2 = bitcast <2 x i16> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+define { i32 } @test__builtin_mips_absq_s_qb1(i32 %i0, i32 %a0.coerce) nounwind {
+entry:
+; CHECK: absq_s.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = tail call <4 x i8> @llvm.mips.absq.s.qb(<4 x i8> %0)
+ %2 = bitcast <4 x i8> %1 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.absq.s.qb(<4 x i8>) nounwind
+
+define { i32 } @test__builtin_mips_mul_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: mul.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.mul.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.mul.ph(<2 x i16>, <2 x i16>) nounwind
+
+define { i32 } @test__builtin_mips_mul_s_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind {
+entry:
+; CHECK: mul_s.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.mul.s.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.mul.s.ph(<2 x i16>, <2 x i16>) nounwind
+
+define i32 @test__builtin_mips_mulq_rs_w1(i32 %i0, i32 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: mulq_rs.w
+
+ %0 = tail call i32 @llvm.mips.mulq.rs.w(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.mulq.rs.w(i32, i32) nounwind
+
+define i32 @test__builtin_mips_mulq_s_w1(i32 %i0, i32 %a0, i32 %a1) nounwind {
+entry:
+; CHECK: mulq_s.w
+
+ %0 = tail call i32 @llvm.mips.mulq.s.w(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.mulq.s.w(i32, i32) nounwind
+
+define { i32 } @test__builtin_mips_adduh_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone {
+entry:
+; CHECK: adduh.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call <4 x i8> @llvm.mips.adduh.qb(<4 x i8> %0, <4 x i8> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.adduh.qb(<4 x i8>, <4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_adduh_r_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone {
+entry:
+; CHECK: adduh_r.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call <4 x i8> @llvm.mips.adduh.r.qb(<4 x i8> %0, <4 x i8> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.adduh.r.qb(<4 x i8>, <4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_subuh_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone {
+entry:
+; CHECK: subuh.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call <4 x i8> @llvm.mips.subuh.qb(<4 x i8> %0, <4 x i8> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.subuh.qb(<4 x i8>, <4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_subuh_r_qb1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone {
+entry:
+; CHECK: subuh_r.qb
+
+ %0 = bitcast i32 %a0.coerce to <4 x i8>
+ %1 = bitcast i32 %a1.coerce to <4 x i8>
+ %2 = tail call <4 x i8> @llvm.mips.subuh.r.qb(<4 x i8> %0, <4 x i8> %1)
+ %3 = bitcast <4 x i8> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <4 x i8> @llvm.mips.subuh.r.qb(<4 x i8>, <4 x i8>) nounwind readnone
+
+define { i32 } @test__builtin_mips_addqh_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone {
+entry:
+; CHECK: addqh.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.addqh.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.addqh.ph(<2 x i16>, <2 x i16>) nounwind readnone
+
+define { i32 } @test__builtin_mips_addqh_r_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone {
+entry:
+; CHECK: addqh_r.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.addqh.r.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.addqh.r.ph(<2 x i16>, <2 x i16>) nounwind readnone
+
+define i32 @test__builtin_mips_addqh_w1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: addqh.w
+
+ %0 = tail call i32 @llvm.mips.addqh.w(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.addqh.w(i32, i32) nounwind readnone
+
+define i32 @test__builtin_mips_addqh_r_w1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: addqh_r.w
+
+ %0 = tail call i32 @llvm.mips.addqh.r.w(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.addqh.r.w(i32, i32) nounwind readnone
+
+define { i32 } @test__builtin_mips_subqh_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone {
+entry:
+; CHECK: subqh.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.subqh.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.subqh.ph(<2 x i16>, <2 x i16>) nounwind readnone
+
+define { i32 } @test__builtin_mips_subqh_r_ph1(i32 %i0, i32 %a0.coerce, i32 %a1.coerce) nounwind readnone {
+entry:
+; CHECK: subqh_r.ph
+
+ %0 = bitcast i32 %a0.coerce to <2 x i16>
+ %1 = bitcast i32 %a1.coerce to <2 x i16>
+ %2 = tail call <2 x i16> @llvm.mips.subqh.r.ph(<2 x i16> %0, <2 x i16> %1)
+ %3 = bitcast <2 x i16> %2 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
+ ret { i32 } %.fca.0.insert
+}
+
+declare <2 x i16> @llvm.mips.subqh.r.ph(<2 x i16>, <2 x i16>) nounwind readnone
+
+define i32 @test__builtin_mips_subqh_w1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: subqh.w
+
+ %0 = tail call i32 @llvm.mips.subqh.w(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.subqh.w(i32, i32) nounwind readnone
+
+define i32 @test__builtin_mips_subqh_r_w1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: subqh_r.w
+
+ %0 = tail call i32 @llvm.mips.subqh.r.w(i32 %a0, i32 %a1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.subqh.r.w(i32, i32) nounwind readnone
+
+define i32 @test__builtin_mips_append1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: append ${{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.append(i32 %a0, i32 %a1, i32 15)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.append(i32, i32, i32) nounwind readnone
+
+define i32 @test__builtin_mips_balign1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: balign ${{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.balign(i32 %a0, i32 %a1, i32 1)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.balign(i32, i32, i32) nounwind readnone
+
+define i32 @test__builtin_mips_prepend1(i32 %i0, i32 %a0, i32 %a1) nounwind readnone {
+entry:
+; CHECK: prepend ${{[0-9]+}}
+
+ %0 = tail call i32 @llvm.mips.prepend(i32 %a0, i32 %a1, i32 15)
+ ret i32 %0
+}
+
+declare i32 @llvm.mips.prepend(i32, i32, i32) nounwind readnone
Added: llvm/branches/R600/test/CodeGen/Mips/vector-load-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Mips/vector-load-store.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/Mips/vector-load-store.ll (added)
+++ llvm/branches/R600/test/CodeGen/Mips/vector-load-store.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,27 @@
+; RUN: llc -march=mipsel -mattr=+dsp < %s | FileCheck %s
+
+ at g1 = common global <2 x i16> zeroinitializer, align 4
+ at g0 = common global <2 x i16> zeroinitializer, align 4
+ at g3 = common global <4 x i8> zeroinitializer, align 4
+ at g2 = common global <4 x i8> zeroinitializer, align 4
+
+define void @func_v2i16() nounwind {
+entry:
+; CHECK: lw
+; CHECK: sw
+
+ %0 = load <2 x i16>* @g1, align 4
+ store <2 x i16> %0, <2 x i16>* @g0, align 4
+ ret void
+}
+
+define void @func_v4i8() nounwind {
+entry:
+; CHECK: lw
+; CHECK: sw
+
+ %0 = load <4 x i8>* @g3, align 4
+ store <4 x i8> %0, <4 x i8>* @g2, align 4
+ ret void
+}
+
Modified: llvm/branches/R600/test/CodeGen/Thumb2/cortex-fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Thumb2/cortex-fp.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/Thumb2/cortex-fp.ll (original)
+++ llvm/branches/R600/test/CodeGen/Thumb2/cortex-fp.ll Tue Oct 2 09:15:33 2012
@@ -7,8 +7,8 @@
entry:
; CHECK: foo
; CORTEXM3: blx ___mulsf3
-; CORTEXM4: vmul.f32 s0, s1, s0
-; CORTEXA8: vmul.f32 d0, d1, d0
+; CORTEXM4: vmul.f32 s0, s2, s0
+; CORTEXA8: vmul.f32 d
%0 = fmul float %a, %b
ret float %0
}
@@ -19,6 +19,6 @@
%0 = fmul double %a, %b
; CORTEXM3: blx ___muldf3
; CORTEXM4: blx ___muldf3
-; CORTEXA8: vmul.f64 d16, d17, d16
+; CORTEXA8: vmul.f64 d
ret double %0
}
Modified: llvm/branches/R600/test/CodeGen/Thumb2/div.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Thumb2/div.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/Thumb2/div.ll (original)
+++ llvm/branches/R600/test/CodeGen/Thumb2/div.ll Tue Oct 2 09:15:33 2012
@@ -2,6 +2,8 @@
; RUN: | FileCheck %s -check-prefix=CHECK-THUMB
; RUN: llc < %s -march=thumb -mcpu=cortex-m3 -mattr=+thumb2 \
; RUN: | FileCheck %s -check-prefix=CHECK-THUMBV7M
+; RUN: llc < %s -march=thumb -mcpu=swift \
+; RUN: | FileCheck %s -check-prefix=CHECK-SWIFT-T2
define i32 @f1(i32 %a, i32 %b) {
entry:
@@ -9,6 +11,8 @@
; CHECK-THUMB: __divsi3
; CHECK-THUMBV7M: f1
; CHECK-THUMBV7M: sdiv
+; CHECK-SWIFT-T2: f1
+; CHECK-SWIFT-T2: sdiv
%tmp1 = sdiv i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
@@ -19,6 +23,8 @@
; CHECK-THUMB: __udivsi3
; CHECK-THUMBV7M: f2
; CHECK-THUMBV7M: udiv
+; CHECK-SWIFT-T2: f2
+; CHECK-SWIFT-T2: udiv
%tmp1 = udiv i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
@@ -29,6 +35,8 @@
; CHECK-THUMB: __modsi3
; CHECK-THUMBV7M: f3
; CHECK-THUMBV7M: sdiv
+; CHECK-SWIFT-T2: f3
+; CHECK-SWIFT-T2: sdiv
%tmp1 = srem i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
@@ -39,6 +47,8 @@
; CHECK-THUMB: __umodsi3
; CHECK-THUMBV7M: f4
; CHECK-THUMBV7M: udiv
+; CHECK-SWIFT-T2: f4
+; CHECK-SWIFT-T2: udiv
%tmp1 = urem i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
Modified: llvm/branches/R600/test/CodeGen/Thumb2/thumb2-mla.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Thumb2/thumb2-mla.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/Thumb2/thumb2-mla.ll (original)
+++ llvm/branches/R600/test/CodeGen/Thumb2/thumb2-mla.ll Tue Oct 2 09:15:33 2012
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -arm-use-mulops=false | FileCheck %s -check-prefix=NO_MULOPS
define i32 @f1(i32 %a, i32 %b, i32 %c) {
%tmp1 = mul i32 %a, %b
@@ -7,6 +8,9 @@
}
; CHECK: f1:
; CHECK: mla r0, r0, r1, r2
+; NO_MULOPS: f1:
+; NO_MULOPS: muls r0, r1, r0
+; NO_MULOPS-NEXT: add r0, r2
define i32 @f2(i32 %a, i32 %b, i32 %c) {
%tmp1 = mul i32 %a, %b
@@ -15,3 +19,6 @@
}
; CHECK: f2:
; CHECK: mla r0, r0, r1, r2
+; NO_MULOPS: f2:
+; NO_MULOPS: muls r0, r1, r0
+; NO_MULOPS-NEXT: add r0, r2
Modified: llvm/branches/R600/test/CodeGen/Thumb2/thumb2-smla.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/Thumb2/thumb2-smla.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/Thumb2/thumb2-smla.ll (original)
+++ llvm/branches/R600/test/CodeGen/Thumb2/thumb2-smla.ll Tue Oct 2 09:15:33 2012
@@ -1,8 +1,12 @@
; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk,+t2dsp | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk,+t2dsp -arm-use-mulops=false | FileCheck %s -check-prefix=NO_MULOPS
define i32 @f3(i32 %a, i16 %x, i32 %y) {
; CHECK: f3
; CHECK: smlabt r0, r1, r2, r0
+; NO_MULOPS: f3
+; NO_MULOPS: smultb r1, r2, r1
+; NO_MULOPS-NEXT: add r0, r1
%tmp = sext i16 %x to i32 ; <i32> [#uses=1]
%tmp2 = ashr i32 %y, 16 ; <i32> [#uses=1]
%tmp3 = mul i32 %tmp2, %tmp ; <i32> [#uses=1]
Added: llvm/branches/R600/test/CodeGen/X86/2012-09-28-CGPBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/2012-09-28-CGPBug.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/2012-09-28-CGPBug.ll (added)
+++ llvm/branches/R600/test/CodeGen/X86/2012-09-28-CGPBug.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,53 @@
+; RUN: llc -mtriple=i386-apple-macosx < %s | FileCheck %s
+; rdar://12396696
+
+ at JT = global [4 x i32] [i32 sub (i32 ptrtoint (i8* blockaddress(@h, %18) to i32), i32 ptrtoint (i8* blockaddress(@h, %11) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@h, %17) to i32), i32 ptrtoint (i8* blockaddress(@h, %11) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@h, %22) to i32), i32 ptrtoint (i8* blockaddress(@h, %18) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@h, %22) to i32), i32 ptrtoint (i8* blockaddress(@h, %17) to i32))]
+ at gGlobalLock = external global i8*
+ at .str40 = external global [35 x i8]
+
+; CHECK: _JT:
+; CHECK-NOT: .long Ltmp{{[0-9]+}}-1
+; CHECK-NOT: .long 1-Ltmp{{[0-9]+}}
+; CHECK: .long Ltmp{{[0-9]+}}-Ltmp{{[0-9]+}}
+; CHECK: .long Ltmp{{[0-9]+}}-Ltmp{{[0-9]+}}
+; CHECK: .long Ltmp{{[0-9]+}}-Ltmp{{[0-9]+}}
+; CHECK: .long Ltmp{{[0-9]+}}-Ltmp{{[0-9]+}}
+
+define void @h(i8*) nounwind ssp {
+ %2 = alloca i8*
+ store i8* %0, i8** %2
+ %3 = load i8** %2
+ %4 = bitcast i8* %3 to { i32, i32 }*
+ %5 = getelementptr { i32, i32 }* %4, i32 0, i32 0
+ %6 = load i32* %5
+ %7 = srem i32 %6, 2
+ %8 = icmp slt i32 %6, 2
+ %9 = select i1 %8, i32 %6, i32 %7
+ %10 = icmp eq i32 %9, 0
+ br label %11
+
+; <label>:11 ; preds = %1
+ %12 = zext i1 %10 to i32
+ %13 = getelementptr [4 x i32]* @JT, i32 0, i32 %12
+ %14 = load i32* %13
+ %15 = add i32 %14, ptrtoint (i8* blockaddress(@h, %11) to i32)
+ %16 = inttoptr i32 %15 to i8*
+ indirectbr i8* %16, [label %17, label %18]
+
+; <label>:17 ; preds = %11
+ tail call void (i8*, ...)* @g(i8* getelementptr inbounds ([35 x i8]* @.str40, i32 0, i32 0))
+ br label %22
+
+; <label>:18 ; preds = %11
+ %19 = call i32 @f(i32 -1037694186) nounwind
+ %20 = inttoptr i32 %19 to i32 (i8**)*
+ %21 = tail call i32 %20(i8** @gGlobalLock)
+ br label %22
+
+; <label>:22 ; preds = %18, %17
+ ret void
+}
+
+declare i32 @f(i32)
+
+declare void @g(i8*, ...)
Modified: llvm/branches/R600/test/CodeGen/X86/crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/crash.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/crash.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/crash.ll Tue Oct 2 09:15:33 2012
@@ -442,3 +442,38 @@
ret void
}
declare void @_Z6PrintFz(...)
+
+ at a = external global i32, align 4
+ at fn1.g = private unnamed_addr constant [9 x i32*] [i32* null, i32* @a, i32* null, i32* null, i32* null, i32* null, i32* null, i32* null, i32* null], align 16
+ at e = external global i32, align 4
+
+define void @pr13943() nounwind uwtable ssp {
+entry:
+ %srcval = load i576* bitcast ([9 x i32*]* @fn1.g to i576*), align 16
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %g.0 = phi i576 [ %srcval, %entry ], [ %ins, %for.inc ]
+ %0 = load i32* @e, align 4
+ %1 = lshr i576 %g.0, 64
+ %2 = trunc i576 %1 to i64
+ %3 = inttoptr i64 %2 to i32*
+ %cmp = icmp eq i32* undef, %3
+ %conv2 = zext i1 %cmp to i32
+ %and = and i32 %conv2, %0
+ tail call void (...)* @fn3(i32 %and) nounwind
+ %tobool = icmp eq i32 undef, 0
+ br i1 %tobool, label %for.inc, label %if.then
+
+if.then: ; preds = %for.cond
+ ret void
+
+for.inc: ; preds = %for.cond
+ %4 = shl i576 %1, 384
+ %mask = and i576 %g.0, -726838724295606890509921801691610055141362320587174446476410459910173841445449629921945328942266354949348255351381262292727973638307841
+ %5 = and i576 %4, 726838724295606890509921801691610055141362320587174446476410459910173841445449629921945328942266354949348255351381262292727973638307840
+ %ins = or i576 %5, %mask
+ br label %for.cond
+}
+
+declare void @fn3(...)
Modified: llvm/branches/R600/test/CodeGen/X86/jump_sign.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/jump_sign.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/jump_sign.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/jump_sign.ll Tue Oct 2 09:15:33 2012
@@ -278,3 +278,31 @@
%cond = select i1 %cmp, i32 %add, i32 0
ret i32 %cond
}
+
+; PR13966
+ at b = common global i32 0, align 4
+ at a = common global i32 0, align 4
+define i32 @test1(i32 %p1) nounwind uwtable {
+entry:
+; CHECK: test1:
+; CHECK: testb
+; CHECK: j
+; CHECK: ret
+ %0 = load i32* @b, align 4
+ %cmp = icmp ult i32 %0, %p1
+ %conv = zext i1 %cmp to i32
+ %1 = load i32* @a, align 4
+ %and = and i32 %conv, %1
+ %conv1 = trunc i32 %and to i8
+ %2 = urem i8 %conv1, 3
+ %tobool = icmp eq i8 %2, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then:
+ %dec = add nsw i32 %1, -1
+ store i32 %dec, i32* @a, align 4
+ br label %if.end
+
+if.end:
+ ret i32 undef
+}
Added: llvm/branches/R600/test/CodeGen/X86/mulx32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/mulx32.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/mulx32.ll (added)
+++ llvm/branches/R600/test/CodeGen/X86/mulx32.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,22 @@
+; RUN: llc -mcpu=core-avx2 -march=x86 < %s | FileCheck %s
+
+define i64 @f1(i32 %a, i32 %b) {
+ %x = zext i32 %a to i64
+ %y = zext i32 %b to i64
+ %r = mul i64 %x, %y
+; CHECK: f1
+; CHECK: mulxl
+; CHECK: ret
+ ret i64 %r
+}
+
+define i64 @f2(i32 %a, i32* %p) {
+ %b = load i32* %p
+ %x = zext i32 %a to i64
+ %y = zext i32 %b to i64
+ %r = mul i64 %x, %y
+; CHECK: f2
+; CHECK: mulxl ({{.+}}), %{{.+}}, %{{.+}}
+; CHECK: ret
+ ret i64 %r
+}
Added: llvm/branches/R600/test/CodeGen/X86/mulx64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/mulx64.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/mulx64.ll (added)
+++ llvm/branches/R600/test/CodeGen/X86/mulx64.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,22 @@
+; RUN: llc -mcpu=core-avx2 -march=x86-64 < %s | FileCheck %s
+
+define i128 @f1(i64 %a, i64 %b) {
+ %x = zext i64 %a to i128
+ %y = zext i64 %b to i128
+ %r = mul i128 %x, %y
+; CHECK: f1
+; CHECK: mulxq
+; CHECK: ret
+ ret i128 %r
+}
+
+define i128 @f2(i64 %a, i64* %p) {
+ %b = load i64* %p
+ %x = zext i64 %a to i128
+ %y = zext i64 %b to i128
+ %r = mul i128 %x, %y
+; CHECK: f2
+; CHECK: mulxq ({{.+}}), %{{.+}}, %{{.+}}
+; CHECK: ret
+ ret i128 %r
+}
Modified: llvm/branches/R600/test/CodeGen/X86/phys_subreg_coalesce-3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/phys_subreg_coalesce-3.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/phys_subreg_coalesce-3.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/phys_subreg_coalesce-3.ll Tue Oct 2 09:15:33 2012
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=corei7 | FileCheck %s
; rdar://5571034
; This requires physreg joining, %vreg13 is live everywhere:
Modified: llvm/branches/R600/test/CodeGen/X86/ptr-rotate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/ptr-rotate.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/ptr-rotate.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/ptr-rotate.ll Tue Oct 2 09:15:33 2012
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=i386-apple-darwin -o - < %s | FileCheck %s
+; RUN: llc -mtriple=i386-apple-darwin -mcpu=corei7 -o - < %s | FileCheck %s
define i32 @func(i8* %A) nounwind readnone {
entry:
Modified: llvm/branches/R600/test/CodeGen/X86/rot32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/rot32.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/rot32.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/rot32.ll Tue Oct 2 09:15:33 2012
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=core-avx2 | FileCheck %s --check-prefix=BMI2
define i32 @foo(i32 %x, i32 %y, i32 %z) nounwind readnone {
entry:
@@ -48,12 +49,25 @@
entry:
; CHECK: xfoo:
; CHECK: roll $7
+; BMI2: xfoo:
+; BMI2: rorxl $25
%0 = lshr i32 %x, 25
%1 = shl i32 %x, 7
%2 = or i32 %0, %1
ret i32 %2
}
+define i32 @xfoop(i32* %p) nounwind readnone {
+entry:
+; BMI2: xfoop:
+; BMI2: rorxl $25, ({{.+}}), %{{.+}}
+ %x = load i32* %p
+ %a = lshr i32 %x, 25
+ %b = shl i32 %x, 7
+ %c = or i32 %a, %b
+ ret i32 %c
+}
+
define i32 @xbar(i32 %x, i32 %y, i32 %z) nounwind readnone {
entry:
; CHECK: xbar:
@@ -68,12 +82,25 @@
entry:
; CHECK: xun:
; CHECK: roll $25
+; BMI2: xun:
+; BMI2: rorxl $7
%0 = lshr i32 %x, 7
%1 = shl i32 %x, 25
%2 = or i32 %0, %1
ret i32 %2
}
+define i32 @xunp(i32* %p) nounwind readnone {
+entry:
+; BMI2: xunp:
+; BMI2: rorxl $7, ({{.+}}), %{{.+}}
+ %x = load i32* %p
+ %a = lshr i32 %x, 7
+ %b = shl i32 %x, 25
+ %c = or i32 %a, %b
+ ret i32 %c
+}
+
define i32 @xbu(i32 %x, i32 %y, i32 %z) nounwind readnone {
entry:
; CHECK: xbu:
Modified: llvm/branches/R600/test/CodeGen/X86/rot64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/rot64.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/rot64.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/rot64.ll Tue Oct 2 09:15:33 2012
@@ -1,8 +1,9 @@
-; RUN: llc < %s -march=x86-64 > %t
-; RUN: grep rol %t | count 3
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 > %t
+; RUN: grep rol %t | count 5
; RUN: grep ror %t | count 1
; RUN: grep shld %t | count 2
; RUN: grep shrd %t | count 2
+; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s --check-prefix=BMI2
define i64 @foo(i64 %x, i64 %y, i64 %z) nounwind readnone {
entry:
@@ -42,12 +43,25 @@
define i64 @xfoo(i64 %x, i64 %y, i64 %z) nounwind readnone {
entry:
+; BMI2: xfoo:
+; BMI2: rorxq $57
%0 = lshr i64 %x, 57
%1 = shl i64 %x, 7
%2 = or i64 %0, %1
ret i64 %2
}
+define i64 @xfoop(i64* %p) nounwind readnone {
+entry:
+; BMI2: xfoop:
+; BMI2: rorxq $57, ({{.+}}), %{{.+}}
+ %x = load i64* %p
+ %a = lshr i64 %x, 57
+ %b = shl i64 %x, 7
+ %c = or i64 %a, %b
+ ret i64 %c
+}
+
define i64 @xbar(i64 %x, i64 %y, i64 %z) nounwind readnone {
entry:
%0 = shl i64 %y, 7
@@ -58,12 +72,25 @@
define i64 @xun(i64 %x, i64 %y, i64 %z) nounwind readnone {
entry:
+; BMI2: xun:
+; BMI2: rorxq $7
%0 = lshr i64 %x, 7
%1 = shl i64 %x, 57
%2 = or i64 %0, %1
ret i64 %2
}
+define i64 @xunp(i64* %p) nounwind readnone {
+entry:
+; BMI2: xunp:
+; BMI2: rorxq $7, ({{.+}}), %{{.+}}
+ %x = load i64* %p
+ %a = lshr i64 %x, 7
+ %b = shl i64 %x, 57
+ %c = or i64 %a, %b
+ ret i64 %c
+}
+
define i64 @xbu(i64 %x, i64 %y, i64 %z) nounwind readnone {
entry:
%0 = lshr i64 %y, 7
Modified: llvm/branches/R600/test/CodeGen/X86/rotate2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/rotate2.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/rotate2.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/rotate2.ll Tue Oct 2 09:15:33 2012
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep rol | count 2
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | grep rol | count 2
define i64 @test1(i64 %x) nounwind {
entry:
Added: llvm/branches/R600/test/CodeGen/X86/shift-bmi2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/shift-bmi2.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/shift-bmi2.ll (added)
+++ llvm/branches/R600/test/CodeGen/X86/shift-bmi2.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,178 @@
+; RUN: llc -mtriple=i386-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI2 %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI264 %s
+
+define i32 @shl32(i32 %x, i32 %shamt) nounwind uwtable readnone {
+entry:
+ %shl = shl i32 %x, %shamt
+; BMI2: shl32
+; BMI2: shlxl
+; BMI2: ret
+; BMI264: shl32
+; BMI264: shlxl
+; BMI264: ret
+ ret i32 %shl
+}
+
+define i32 @shl32i(i32 %x) nounwind uwtable readnone {
+entry:
+ %shl = shl i32 %x, 5
+; BMI2: shl32i
+; BMI2-NOT: shlxl
+; BMI2: ret
+; BMI264: shl32i
+; BMI264-NOT: shlxl
+; BMI264: ret
+ ret i32 %shl
+}
+
+define i32 @shl32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
+entry:
+ %x = load i32* %p
+ %shl = shl i32 %x, %shamt
+; BMI2: shl32p
+; BMI2: shlxl %{{.+}}, ({{.+}}), %{{.+}}
+; BMI2: ret
+; BMI264: shl32p
+; BMI264: shlxl %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: ret
+ ret i32 %shl
+}
+
+define i32 @shl32pi(i32* %p) nounwind uwtable readnone {
+entry:
+ %x = load i32* %p
+ %shl = shl i32 %x, 5
+; BMI2: shl32pi
+; BMI2-NOT: shlxl
+; BMI2: ret
+; BMI264: shl32pi
+; BMI264-NOT: shlxl
+; BMI264: ret
+ ret i32 %shl
+}
+
+define i64 @shl64(i64 %x, i64 %shamt) nounwind uwtable readnone {
+entry:
+ %shl = shl i64 %x, %shamt
+; BMI264: shl64
+; BMI264: shlxq
+; BMI264: ret
+ ret i64 %shl
+}
+
+define i64 @shl64i(i64 %x) nounwind uwtable readnone {
+entry:
+ %shl = shl i64 %x, 7
+; BMI264: shl64i
+; BMI264-NOT: shlxq
+; BMI264: ret
+ ret i64 %shl
+}
+
+define i64 @shl64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
+entry:
+ %x = load i64* %p
+ %shl = shl i64 %x, %shamt
+; BMI264: shl64p
+; BMI264: shlxq %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: ret
+ ret i64 %shl
+}
+
+define i64 @shl64pi(i64* %p) nounwind uwtable readnone {
+entry:
+ %x = load i64* %p
+ %shl = shl i64 %x, 7
+; BMI264: shl64p
+; BMI264-NOT: shlxq
+; BMI264: ret
+ ret i64 %shl
+}
+
+define i32 @lshr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
+entry:
+ %shl = lshr i32 %x, %shamt
+; BMI2: lshr32
+; BMI2: shrxl
+; BMI2: ret
+; BMI264: lshr32
+; BMI264: shrxl
+; BMI264: ret
+ ret i32 %shl
+}
+
+define i32 @lshr32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
+entry:
+ %x = load i32* %p
+ %shl = lshr i32 %x, %shamt
+; BMI2: lshr32p
+; BMI2: shrxl %{{.+}}, ({{.+}}), %{{.+}}
+; BMI2: ret
+; BMI264: lshr32
+; BMI264: shrxl %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: ret
+ ret i32 %shl
+}
+
+define i64 @lshr64(i64 %x, i64 %shamt) nounwind uwtable readnone {
+entry:
+ %shl = lshr i64 %x, %shamt
+; BMI264: lshr64
+; BMI264: shrxq
+; BMI264: ret
+ ret i64 %shl
+}
+
+define i64 @lshr64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
+entry:
+ %x = load i64* %p
+ %shl = lshr i64 %x, %shamt
+; BMI264: lshr64p
+; BMI264: shrxq %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: ret
+ ret i64 %shl
+}
+
+define i32 @ashr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
+entry:
+ %shl = ashr i32 %x, %shamt
+; BMI2: ashr32
+; BMI2: sarxl
+; BMI2: ret
+; BMI264: ashr32
+; BMI264: sarxl
+; BMI264: ret
+ ret i32 %shl
+}
+
+define i32 @ashr32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
+entry:
+ %x = load i32* %p
+ %shl = ashr i32 %x, %shamt
+; BMI2: ashr32p
+; BMI2: sarxl %{{.+}}, ({{.+}}), %{{.+}}
+; BMI2: ret
+; BMI264: ashr32
+; BMI264: sarxl %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: ret
+ ret i32 %shl
+}
+
+define i64 @ashr64(i64 %x, i64 %shamt) nounwind uwtable readnone {
+entry:
+ %shl = ashr i64 %x, %shamt
+; BMI264: ashr64
+; BMI264: sarxq
+; BMI264: ret
+ ret i64 %shl
+}
+
+define i64 @ashr64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
+entry:
+ %x = load i64* %p
+ %shl = ashr i64 %x, %shamt
+; BMI264: ashr64p
+; BMI264: sarxq %{{.+}}, ({{.+}}), %{{.+}}
+; BMI264: ret
+ ret i64 %shl
+}
Modified: llvm/branches/R600/test/CodeGen/X86/targetLoweringGeneric.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/X86/targetLoweringGeneric.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/CodeGen/X86/targetLoweringGeneric.ll (original)
+++ llvm/branches/R600/test/CodeGen/X86/targetLoweringGeneric.ll Tue Oct 2 09:15:33 2012
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=i386-apple-darwin9 -fast-isel=false -O0 < %s | FileCheck %s
+; RUN: llc -mtriple=i386-apple-darwin9 -mcpu=corei7 -fast-isel=false -O0 < %s | FileCheck %s
; Gather non-machine specific tests for the transformations in
; CodeGen/SelectionDAG/TargetLowering. Currently, these
Modified: llvm/branches/R600/test/DebugInfo/bug_null_debuginfo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/DebugInfo/bug_null_debuginfo.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/DebugInfo/bug_null_debuginfo.ll (original)
+++ llvm/branches/R600/test/DebugInfo/bug_null_debuginfo.ll Tue Oct 2 09:15:33 2012
@@ -1,5 +1,4 @@
-; RUN: llc
-
+; RUN: llc < %s
!llvm.dbg.cu = !{!0}
Added: llvm/branches/R600/test/MC/MachO/i386-large-relocations.s
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/MC/MachO/i386-large-relocations.s?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/MC/MachO/i386-large-relocations.s (added)
+++ llvm/branches/R600/test/MC/MachO/i386-large-relocations.s Tue Oct 2 09:15:33 2012
@@ -0,0 +1,36 @@
+// RUN: llvm-mc -triple i386-apple-darwin10 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+.space 0x1ed280
+ .section __DATA,__const
+ .align 4
+.space 0x5181020
+_foo:
+ .long _bar
+ .long 0
+ .long _bar+8
+ .long _bar+24
+ .long 0
+ .long _bar+16
+
+.zerofill __DATA,__bss,__dummy,0x5d780
+.zerofill __DATA,__bss,_bar,48,4
+
+// Normally scattered relocations are used for sym+offset expressions. When
+// the value exceeds 24-bits, however, it's outside what MachO can encode,
+// so the assembler falls back to non-scattered relocations.
+// rdar://12358909
+
+// CHECK: ('_relocations', [
+// CHECK: # Relocation 0
+// CHECK: (('word-0', 0x5181034),
+// CHECK: ('word-1', 0x4000003)),
+// CHECK: # Relocation 1
+// CHECK: (('word-0', 0x518102c),
+// CHECK: ('word-1', 0x4000003)),
+// CHECK: # Relocation 2
+// CHECK: (('word-0', 0x5181028),
+// CHECK: ('word-1', 0x4000003)),
+// CHECK: # Relocation 3
+// CHECK: (('word-0', 0x5181020),
+// CHECK: ('word-1', 0x4000003)),
+// CHECK: ])
Modified: llvm/branches/R600/test/Other/lint.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Other/lint.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/Other/lint.ll (original)
+++ llvm/branches/R600/test/Other/lint.ll Tue Oct 2 09:15:33 2012
@@ -9,6 +9,7 @@
declare void @one_arg(i32)
@CG = constant i32 7
+ at E = external global i8
define i32 @foo() noreturn {
%buf = alloca i8
@@ -79,6 +80,18 @@
; CHECK: Write to read-only memory
call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* bitcast (i32* @CG to i8*), i64 1, i32 1, i1 0)
+; CHECK: Undefined behavior: Buffer overflow
+ %wider = bitcast i8* %buf to i16*
+ store i16 0, i16* %wider
+; CHECK: Undefined behavior: Buffer overflow
+ %inner = getelementptr {i8, i8}* %buf2, i32 0, i32 1
+ %wider2 = bitcast i8* %inner to i16*
+ store i16 0, i16* %wider2
+; CHECK: Undefined behavior: Buffer overflow
+ %before = getelementptr i8* %buf, i32 -1
+ %wider3 = bitcast i8* %before to i16*
+ store i16 0, i16* %wider3
+
br label %next
next:
@@ -88,6 +101,10 @@
ret i32 0
foo:
+; CHECK-NOT: Undefined behavior: Buffer overflow
+; CHECK-NOT: Memory reference address is misaligned
+ %e = bitcast i8* @E to i64*
+ store i64 0, i64* %e
%z = add i32 0, 0
; CHECK: unreachable immediately preceded by instruction without side effects
unreachable
Modified: llvm/branches/R600/test/Transforms/CorrelatedValuePropagation/crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/CorrelatedValuePropagation/crash.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/CorrelatedValuePropagation/crash.ll (original)
+++ llvm/branches/R600/test/Transforms/CorrelatedValuePropagation/crash.ll Tue Oct 2 09:15:33 2012
@@ -35,3 +35,28 @@
func_29.exit:
ret void
}
+
+; PR13972
+define void @test3() nounwind {
+for.body:
+ br label %return
+
+for.cond.i: ; preds = %if.else.i, %for.body.i
+ %e.2.i = phi i32 [ %e.2.i, %if.else.i ], [ -8, %for.body.i ]
+ br i1 undef, label %return, label %for.body.i
+
+for.body.i: ; preds = %for.cond.i
+ switch i32 %e.2.i, label %for.cond3.i [
+ i32 -3, label %if.else.i
+ i32 0, label %for.cond.i
+ ]
+
+for.cond3.i: ; preds = %for.cond3.i, %for.body.i
+ br label %for.cond3.i
+
+if.else.i: ; preds = %for.body.i
+ br label %for.cond.i
+
+return: ; preds = %for.cond.i, %for.body
+ ret void
+}
Modified: llvm/branches/R600/test/Transforms/GlobalOpt/load-store-global.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/GlobalOpt/load-store-global.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/GlobalOpt/load-store-global.ll (original)
+++ llvm/branches/R600/test/Transforms/GlobalOpt/load-store-global.ll Tue Oct 2 09:15:33 2012
@@ -1,15 +1,38 @@
-; RUN: opt < %s -globalopt -S | not grep G
+; RUN: opt < %s -globalopt -S | FileCheck %s
@G = internal global i32 17 ; <i32*> [#uses=3]
+; CHECK-NOT: @G
define void @foo() {
%V = load i32* @G ; <i32> [#uses=1]
store i32 %V, i32* @G
ret void
+; CHECK: @foo
+; CHECK-NEXT: ret void
}
define i32 @bar() {
%X = load i32* @G ; <i32> [#uses=1]
ret i32 %X
+; CHECK: @bar
+; CHECK-NEXT: ret i32 17
+}
+
+ at a = internal global i64* null, align 8
+; CHECK-NOT: @a
+
+; PR13968
+define void @qux() nounwind {
+ %b = bitcast i64** @a to i8*
+ %g = getelementptr i64** @a, i32 1
+ %cmp = icmp ne i8* null, %b
+ %cmp2 = icmp eq i8* null, %b
+ %cmp3 = icmp eq i64** null, %g
+ store i64* inttoptr (i64 1 to i64*), i64** @a, align 8
+ %l = load i64** @a, align 8
+ ret void
+; CHECK: @qux
+; CHECK-NOT: store
+; CHECK-NOT: load
}
Added: llvm/branches/R600/test/Transforms/InstCombine/memcpy_chk-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/memcpy_chk-1.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/memcpy_chk-1.ll (added)
+++ llvm/branches/R600/test/Transforms/InstCombine/memcpy_chk-1.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,60 @@
+; Test lib call simplification of __memcpy_chk calls with various values
+; for dstlen and len.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T1 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T2 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T3 = type { [100 x i32], [100 x i32], [2048 x i8] }
+
+ at t1 = common global %struct.T1 zeroinitializer
+ at t2 = common global %struct.T2 zeroinitializer
+ at t3 = common global %struct.T3 zeroinitializer
+
+; Check cases where dstlen >= len.
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64
+ call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
+ ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T3* @t3 to i8*
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64
+ call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 2848)
+ ret void
+}
+
+; Check cases where dstlen < len.
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+ %dst = bitcast %struct.T3* @t3 to i8*
+ %src = bitcast %struct.T1* @t1 to i8*
+
+; CHECK-NEXT: call i8* @__memcpy_chk
+ call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 2848, i64 1824)
+ ret void
+}
+
+define void @test_no_simplify2() {
+; CHECK: @test_no_simplify2
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: call i8* @__memcpy_chk
+ call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1024, i64 0)
+ ret void
+}
+
+declare i8* @__memcpy_chk(i8*, i8*, i64, i64)
Added: llvm/branches/R600/test/Transforms/InstCombine/memcpy_chk-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/memcpy_chk-2.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/memcpy_chk-2.ll (added)
+++ llvm/branches/R600/test/Transforms/InstCombine/memcpy_chk-2.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,24 @@
+; Test that lib call simplification doesn't simplify __memcpy_chk calls
+; with the wrong prototype.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T1 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T2 = type { [100 x i32], [100 x i32], [1024 x i8] }
+
+ at t1 = common global %struct.T1 zeroinitializer
+ at t2 = common global %struct.T2 zeroinitializer
+
+define void @test_no_simplify() {
+; CHECK: @test_no_simplify
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: call i8* @__memcpy_chk
+ call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824)
+ ret void
+}
+
+declare i8* @__memcpy_chk(i8*, i8*, i64)
Added: llvm/branches/R600/test/Transforms/InstCombine/memmove_chk-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/memmove_chk-1.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/memmove_chk-1.ll (added)
+++ llvm/branches/R600/test/Transforms/InstCombine/memmove_chk-1.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,60 @@
+; Test lib call simplification of __memmove_chk calls with various values
+; for dstlen and len.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T1 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T2 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T3 = type { [100 x i32], [100 x i32], [2048 x i8] }
+
+ at t1 = common global %struct.T1 zeroinitializer
+ at t2 = common global %struct.T2 zeroinitializer
+ at t3 = common global %struct.T3 zeroinitializer
+
+; Check cases where dstlen >= len.
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64
+ call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
+ ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T3* @t3 to i8*
+
+; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64
+ call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 2848)
+ ret void
+}
+
+; Check cases where dstlen < len.
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+ %dst = bitcast %struct.T3* @t3 to i8*
+ %src = bitcast %struct.T1* @t1 to i8*
+
+; CHECK-NEXT: call i8* @__memmove_chk
+ call i8* @__memmove_chk(i8* %dst, i8* %src, i64 2848, i64 1824)
+ ret void
+}
+
+define void @test_no_simplify2() {
+; CHECK: @test_no_simplify2
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: call i8* @__memmove_chk
+ call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1024, i64 0)
+ ret void
+}
+
+declare i8* @__memmove_chk(i8*, i8*, i64, i64)
Added: llvm/branches/R600/test/Transforms/InstCombine/memmove_chk-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/memmove_chk-2.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/memmove_chk-2.ll (added)
+++ llvm/branches/R600/test/Transforms/InstCombine/memmove_chk-2.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,24 @@
+; Test that lib call simplification doesn't simplify __memmove_chk calls
+; with the wrong prototype.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T1 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T2 = type { [100 x i32], [100 x i32], [1024 x i8] }
+
+ at t1 = common global %struct.T1 zeroinitializer
+ at t2 = common global %struct.T2 zeroinitializer
+
+define void @test_no_simplify() {
+; CHECK: @test_no_simplify
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: call i8* @__memmove_chk
+ call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824)
+ ret void
+}
+
+declare i8* @__memmove_chk(i8*, i8*, i64)
Added: llvm/branches/R600/test/Transforms/InstCombine/memset_chk-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/memset_chk-1.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/memset_chk-1.ll (added)
+++ llvm/branches/R600/test/Transforms/InstCombine/memset_chk-1.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,61 @@
+; Test lib call simplification of __memset_chk calls with various values
+; for dstlen and len.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; rdar://7719085
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T = type { [100 x i32], [100 x i32], [1024 x i8] }
+ at t = common global %struct.T zeroinitializer
+
+; Check cases where dstlen >= len.
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+ %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64
+ call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 1824)
+ ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+ %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64
+ call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 3648)
+ ret void
+}
+
+define void @test_simplify3() {
+; CHECK: @test_simplify3
+ %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64
+ call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 -1)
+ ret void
+}
+
+; Check cases where dstlen < len.
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+ %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: call i8* @__memset_chk
+ call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 400)
+ ret void
+}
+
+define void @test_no_simplify2() {
+; CHECK: @test_no_simplify2
+ %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: call i8* @__memset_chk
+ call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 0)
+ ret void
+}
+
+declare i8* @__memset_chk(i8*, i32, i64, i64)
Added: llvm/branches/R600/test/Transforms/InstCombine/memset_chk-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/memset_chk-2.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/memset_chk-2.ll (added)
+++ llvm/branches/R600/test/Transforms/InstCombine/memset_chk-2.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,20 @@
+; Test that lib call simplification doesn't simplify __memset_chk calls
+; with the wrong prototype.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T = type { [100 x i32], [100 x i32], [1024 x i8] }
+ at t = common global %struct.T zeroinitializer
+
+define void @test_no_simplify() {
+; CHECK: @test_no_simplify
+ %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: call i8* @__memset_chk
+ call i8* @__memset_chk(i8* %dst, i32 0, i64 1824)
+ ret void
+}
+
+declare i8* @__memset_chk(i8*, i32, i64)
Removed: llvm/branches/R600/test/Transforms/InstCombine/memset_chk.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/memset_chk.ll?rev=165010&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/memset_chk.ll (original)
+++ llvm/branches/R600/test/Transforms/InstCombine/memset_chk.ll (removed)
@@ -1,18 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-; rdar://7719085
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-
-%struct.data = type { [100 x i32], [100 x i32], [1024 x i8] }
-
-define i32 @t() nounwind ssp {
-; CHECK: @t
-; CHECK: @llvm.memset.p0i8.i64
-entry:
- %0 = alloca %struct.data, align 8 ; <%struct.data*> [#uses=1]
- %1 = bitcast %struct.data* %0 to i8* ; <i8*> [#uses=1]
- %2 = call i8* @__memset_chk(i8* %1, i32 0, i64 1824, i64 1824) nounwind ; <i8*> [#uses=0]
- ret i32 0
-}
-
-declare i8* @__memset_chk(i8*, i32, i64, i64) nounwind
Added: llvm/branches/R600/test/Transforms/InstCombine/strcpy_chk-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/strcpy_chk-1.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/strcpy_chk-1.ll (added)
+++ llvm/branches/R600/test/Transforms/InstCombine/strcpy_chk-1.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,88 @@
+; Test lib call simplification of __strcpy_chk calls with various values
+; for src, dst, and slen.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+ at a = common global [60 x i8] zeroinitializer, align 1
+ at b = common global [60 x i8] zeroinitializer, align 1
+ at .str = private constant [8 x i8] c"abcdefg\00"
+
+; Check cases where slen >= strlen (src).
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @strcpy
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 60)
+ ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @strcpy
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8)
+ ret void
+}
+
+define void @test_simplify3() {
+; CHECK: @test_simplify3
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @strcpy
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1)
+ ret void
+}
+
+; Check cases where there are no string constants.
+
+define void @test_simplify4() {
+; CHECK: @test_simplify4
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @strcpy
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1)
+ ret void
+}
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @__strcpy_chk
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8)
+ ret void
+}
+
+; Check case were slen < strlen (src).
+
+define void @test_no_simplify2() {
+; CHECK: @test_no_simplify2
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @__strcpy_chk
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 3)
+ ret void
+}
+
+define void @test_no_simplify3() {
+; CHECK: @test_no_simplify3
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @__strcpy_chk
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 0)
+ ret void
+}
+
+declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind
Added: llvm/branches/R600/test/Transforms/InstCombine/strcpy_chk-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/strcpy_chk-2.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/strcpy_chk-2.ll (added)
+++ llvm/branches/R600/test/Transforms/InstCombine/strcpy_chk-2.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,21 @@
+; Test that lib call simplification doesn't simplify __strcpy_chk calls
+; with the wrong prototype.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+ at a = common global [60 x i16] zeroinitializer, align 1
+ at .str = private constant [8 x i8] c"abcdefg\00"
+
+define void @test_no_simplify() {
+; CHECK: @test_no_simplify
+ %dst = getelementptr inbounds [60 x i16]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i16* @__strcpy_chk
+ call i16* @__strcpy_chk(i16* %dst, i8* %src, i32 8)
+ ret void
+}
+
+declare i16* @__strcpy_chk(i16*, i8*, i32)
Removed: llvm/branches/R600/test/Transforms/InstCombine/strcpy_chk.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/strcpy_chk.ll?rev=165010&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/strcpy_chk.ll (original)
+++ llvm/branches/R600/test/Transforms/InstCombine/strcpy_chk.ll (removed)
@@ -1,13 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
- at a = common global [60 x i8] zeroinitializer, align 1 ; <[60 x i8]*> [#uses=1]
- at .str = private constant [8 x i8] c"abcdefg\00" ; <[8 x i8]*> [#uses=1]
-
-define i8* @foo() nounwind {
-; CHECK: @foo
-; CHECK-NEXT: call i8* @strcpy
- %call = call i8* @__strcpy_chk(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 60) ; <i8*> [#uses=1]
- ret i8* %call
-}
-
-declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind
Added: llvm/branches/R600/test/Transforms/InstCombine/strncpy_chk-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/strncpy_chk-1.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/strncpy_chk-1.ll (added)
+++ llvm/branches/R600/test/Transforms/InstCombine/strncpy_chk-1.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,66 @@
+; Test lib call simplification of __strncpy_chk calls with various values
+; for len and dstlen.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+ at a = common global [60 x i8] zeroinitializer, align 1
+ at b = common global [60 x i8] zeroinitializer, align 1
+ at .str = private constant [8 x i8] c"abcdefg\00"
+
+; Check cases where dstlen >= len
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @strncpy
+ call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 60)
+ ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @strncpy
+ call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 8)
+ ret void
+}
+
+define void @test_simplify3() {
+; CHECK: @test_simplify3
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @strncpy
+ call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 60)
+ ret void
+}
+
+; Check cases where dstlen < len
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @__strncpy_chk
+ call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 4)
+ ret void
+}
+
+define void @test_no_simplify2() {
+; CHECK: @test_no_simplify2
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @__strncpy_chk
+ call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 0)
+ ret void
+}
+
+declare i8* @__strncpy_chk(i8*, i8*, i32, i32)
Added: llvm/branches/R600/test/Transforms/InstCombine/strncpy_chk-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/strncpy_chk-2.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/strncpy_chk-2.ll (added)
+++ llvm/branches/R600/test/Transforms/InstCombine/strncpy_chk-2.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,21 @@
+; Test that lib call simplification doesn't simplify __strncpy_chk calls
+; with the wrong prototype.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+ at a = common global [60 x i16] zeroinitializer, align 1
+ at b = common global [60 x i16] zeroinitializer, align 1
+
+define void @test_no_simplify() {
+; CHECK: @test_no_simplify
+ %dst = getelementptr inbounds [60 x i16]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [60 x i16]* @b, i32 0, i32 0
+
+; CHECK-NEXT: call i16* @__strncpy_chk
+ call i16* @__strncpy_chk(i16* %dst, i16* %src, i32 60, i32 60)
+ ret void
+}
+
+declare i16* @__strncpy_chk(i16*, i16*, i32, i32)
Modified: llvm/branches/R600/test/Transforms/InstCombine/vec_demanded_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/vec_demanded_elts.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/vec_demanded_elts.ll (original)
+++ llvm/branches/R600/test/Transforms/InstCombine/vec_demanded_elts.ll Tue Oct 2 09:15:33 2012
@@ -196,7 +196,7 @@
; CHECK-NOT: insertelement
; CHECK: %a3 = insertelement <4 x float> %a0, float 3.000000e+00, i32 3
; CHECK-NOT: insertelement
-; CHECK: %ret = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> %a3, <4 x float> <float undef, float 4.000000e+00, float 5.000000e+00, float undef>
+; CHECK: shufflevector <4 x float> %a3, <4 x float> <float undef, float 4.000000e+00, float 5.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
%a0 = insertelement <4 x float> undef, float %f, i32 0
%a1 = insertelement <4 x float> %a0, float 1.000000e+00, i32 1
%a2 = insertelement <4 x float> %a1, float 2.000000e+00, i32 2
Modified: llvm/branches/R600/test/Transforms/InstCombine/vec_shuffle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/InstCombine/vec_shuffle.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/InstCombine/vec_shuffle.ll (original)
+++ llvm/branches/R600/test/Transforms/InstCombine/vec_shuffle.ll Tue Oct 2 09:15:33 2012
@@ -153,3 +153,46 @@
ret <8 x i8> %tmp3
}
+; We should form a shuffle out of a select with constant condition.
+define <4 x i16> @test13a(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: @test13a
+; CHECK-NEXT: shufflevector <4 x i16> %lhs, <4 x i16> %rhs, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT: ret
+ %A = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>,
+ <4 x i16> %lhs, <4 x i16> %rhs
+ ret <4 x i16> %A
+}
+
+define <4 x i16> @test13b(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: @test13b
+; CHECK-NEXT: ret <4 x i16> %lhs
+ %A = select <4 x i1> <i1 true, i1 undef, i1 true, i1 true>,
+ <4 x i16> %lhs, <4 x i16> %rhs
+ ret <4 x i16> %A
+}
+
+define <4 x i16> @test13c(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: @test13c
+; CHECK-NEXT: shufflevector <4 x i16> %lhs, <4 x i16> %rhs, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
+; CHECK-NEXT: ret
+ %A = select <4 x i1> <i1 true, i1 undef, i1 true, i1 false>,
+ <4 x i16> %lhs, <4 x i16> %rhs
+ ret <4 x i16> %A
+}
+
+define <4 x i16> @test13d(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: @test13d
+; CHECK: select
+; CHECK-NEXT: ret
+ %A = select <4 x i1> <i1 true, i1 icmp ugt (<4 x i16>(<4 x i16>, <4 x i16>)* @test13a, <4 x i16>(<4 x i16>, <4 x i16>)* @test13b), i1 true, i1 false>,
+ <4 x i16> %lhs, <4 x i16> %rhs
+ ret <4 x i16> %A
+}
+
+define <4 x i16> @test13e(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: @test13e
+; CHECK-NEXT: ret <4 x i16> %rhs
+ %A = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>,
+ <4 x i16> %lhs, <4 x i16> %rhs
+ ret <4 x i16> %A
+}
Modified: llvm/branches/R600/test/Transforms/LoopUnroll/pr11361.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopUnroll/pr11361.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopUnroll/pr11361.ll (original)
+++ llvm/branches/R600/test/Transforms/LoopUnroll/pr11361.ll Tue Oct 2 09:15:33 2012
@@ -1,4 +1,4 @@
-; RUN: opt -loop-unroll -disable-output
+; RUN: opt -loop-unroll -disable-output < %s
; PR11361
; This tests for an iterator invalidation issue.
Modified: llvm/branches/R600/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll (original)
+++ llvm/branches/R600/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll Tue Oct 2 09:15:33 2012
@@ -1,4 +1,4 @@
-; RUN: opt -loop-unswitch -disable-output
+; RUN: opt -loop-unswitch -disable-output < %s
; PR10031
define i32 @test(i32 %command) {
Added: llvm/branches/R600/test/Transforms/PhaseOrdering/gdce.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/PhaseOrdering/gdce.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/PhaseOrdering/gdce.ll (added)
+++ llvm/branches/R600/test/Transforms/PhaseOrdering/gdce.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,106 @@
+; RUN: opt -O2 -S %s | FileCheck %s
+
+; Run global DCE to eliminate unused ctor and dtor.
+; rdar://9142819
+
+; CHECK: main
+; CHECK-NOT: _ZN4BaseC1Ev
+; CHECK-NOT: _ZN4BaseD1Ev
+; CHECK-NOT: _ZN4BaseD2Ev
+; CHECK-NOT: _ZN4BaseC2Ev
+; CHECK-NOT: _ZN4BaseD0Ev
+
+%class.Base = type { i32 (...)** }
+
+ at _ZTV4Base = linkonce_odr unnamed_addr constant [4 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI4Base to i8*), i8* bitcast (void (%class.Base*)* @_ZN4BaseD1Ev to i8*), i8* bitcast (void (%class.Base*)* @_ZN4BaseD0Ev to i8*)]
+ at _ZTVN10__cxxabiv117__class_type_infoE = external global i8*
+ at _ZTS4Base = linkonce_odr constant [6 x i8] c"4Base\00"
+ at _ZTI4Base = linkonce_odr unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([6 x i8]* @_ZTS4Base, i32 0, i32 0) }
+
+define i32 @main() uwtable ssp {
+entry:
+ %retval = alloca i32, align 4
+ %b = alloca %class.Base, align 8
+ %cleanup.dest.slot = alloca i32
+ store i32 0, i32* %retval
+ call void @_ZN4BaseC1Ev(%class.Base* %b)
+ store i32 0, i32* %retval
+ store i32 1, i32* %cleanup.dest.slot
+ call void @_ZN4BaseD1Ev(%class.Base* %b)
+ %0 = load i32* %retval
+ ret i32 %0
+}
+
+define linkonce_odr void @_ZN4BaseC1Ev(%class.Base* %this) unnamed_addr uwtable ssp align 2 {
+entry:
+ %this.addr = alloca %class.Base*, align 8
+ store %class.Base* %this, %class.Base** %this.addr, align 8
+ %this1 = load %class.Base** %this.addr
+ call void @_ZN4BaseC2Ev(%class.Base* %this1)
+ ret void
+}
+
+define linkonce_odr void @_ZN4BaseD1Ev(%class.Base* %this) unnamed_addr uwtable ssp align 2 {
+entry:
+ %this.addr = alloca %class.Base*, align 8
+ store %class.Base* %this, %class.Base** %this.addr, align 8
+ %this1 = load %class.Base** %this.addr
+ call void @_ZN4BaseD2Ev(%class.Base* %this1)
+ ret void
+}
+
+define linkonce_odr void @_ZN4BaseD2Ev(%class.Base* %this) unnamed_addr nounwind uwtable ssp align 2 {
+entry:
+ %this.addr = alloca %class.Base*, align 8
+ store %class.Base* %this, %class.Base** %this.addr, align 8
+ %this1 = load %class.Base** %this.addr
+ ret void
+}
+
+define linkonce_odr void @_ZN4BaseC2Ev(%class.Base* %this) unnamed_addr nounwind uwtable ssp align 2 {
+entry:
+ %this.addr = alloca %class.Base*, align 8
+ store %class.Base* %this, %class.Base** %this.addr, align 8
+ %this1 = load %class.Base** %this.addr
+ %0 = bitcast %class.Base* %this1 to i8***
+ store i8** getelementptr inbounds ([4 x i8*]* @_ZTV4Base, i64 0, i64 2), i8*** %0
+ ret void
+}
+
+define linkonce_odr void @_ZN4BaseD0Ev(%class.Base* %this) unnamed_addr uwtable ssp align 2 {
+entry:
+ %this.addr = alloca %class.Base*, align 8
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ store %class.Base* %this, %class.Base** %this.addr, align 8
+ %this1 = load %class.Base** %this.addr
+ invoke void @_ZN4BaseD1Ev(%class.Base* %this1)
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %entry
+ %0 = bitcast %class.Base* %this1 to i8*
+ call void @_ZdlPv(i8* %0) nounwind
+ ret void
+
+lpad: ; preds = %entry
+ %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ %2 = extractvalue { i8*, i32 } %1, 0
+ store i8* %2, i8** %exn.slot
+ %3 = extractvalue { i8*, i32 } %1, 1
+ store i32 %3, i32* %ehselector.slot
+ %4 = bitcast %class.Base* %this1 to i8*
+ call void @_ZdlPv(i8* %4) nounwind
+ br label %eh.resume
+
+eh.resume: ; preds = %lpad
+ %exn = load i8** %exn.slot
+ %sel = load i32* %ehselector.slot
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
+ %lpad.val2 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
+ resume { i8*, i32 } %lpad.val2
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_ZdlPv(i8*) nounwind
Added: llvm/branches/R600/test/Transforms/SROA/alignment.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/SROA/alignment.ll?rev=165011&view=auto
==============================================================================
--- llvm/branches/R600/test/Transforms/SROA/alignment.ll (added)
+++ llvm/branches/R600/test/Transforms/SROA/alignment.ll Tue Oct 2 09:15:33 2012
@@ -0,0 +1,116 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+
+define void @test1({ i8, i8 }* %a, { i8, i8 }* %b) {
+; CHECK: @test1
+; CHECK: %[[gep_a0:.*]] = getelementptr inbounds { i8, i8 }* %a, i64 0, i32 0
+; CHECK: %[[a0:.*]] = load i8* %[[gep_a0]], align 16
+; CHECK: %[[gep_a1:.*]] = getelementptr inbounds { i8, i8 }* %a, i64 0, i32 1
+; CHECK: %[[a1:.*]] = load i8* %[[gep_a1]], align 1
+; CHECK: %[[gep_b0:.*]] = getelementptr inbounds { i8, i8 }* %b, i64 0, i32 0
+; CHECK: store i8 %[[a0]], i8* %[[gep_b0]], align 16
+; CHECK: %[[gep_b1:.*]] = getelementptr inbounds { i8, i8 }* %b, i64 0, i32 1
+; CHECK: store i8 %[[a1]], i8* %[[gep_b1]], align 1
+; CHECK: ret void
+
+entry:
+ %alloca = alloca { i8, i8 }, align 16
+ %gep_a = getelementptr { i8, i8 }* %a, i32 0, i32 0
+ %gep_alloca = getelementptr { i8, i8 }* %alloca, i32 0, i32 0
+ %gep_b = getelementptr { i8, i8 }* %b, i32 0, i32 0
+
+ store i8 420, i8* %gep_alloca, align 16
+
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %gep_alloca, i8* %gep_a, i32 2, i32 16, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %gep_b, i8* %gep_alloca, i32 2, i32 16, i1 false)
+ ret void
+}
+
+define void @test2() {
+; CHECK: @test2
+; CHECK: alloca i16
+; CHECK: load i8* %{{.*}}, align 1
+; CHECK: store i8 42, i8* %{{.*}}, align 1
+; CHECK: ret void
+
+entry:
+ %a = alloca { i8, i8, i8, i8 }, align 2
+ %gep1 = getelementptr { i8, i8, i8, i8 }* %a, i32 0, i32 1
+ %cast1 = bitcast i8* %gep1 to i16*
+ store volatile i16 0, i16* %cast1
+ %gep2 = getelementptr { i8, i8, i8, i8 }* %a, i32 0, i32 2
+ %result = load i8* %gep2, align 2
+ store i8 42, i8* %gep2, align 2
+ ret void
+}
+
+define void @PR13920(<2 x i64>* %a, i16* %b) {
+; Test that alignments on memcpy intrinsics get propagated to loads and stores.
+; CHECK: @PR13920
+; CHECK: load <2 x i64>* %a, align 2
+; CHECK: store <2 x i64> {{.*}}, <2 x i64>* {{.*}}, align 2
+; CHECK: ret void
+
+entry:
+ %aa = alloca <2 x i64>, align 16
+ %aptr = bitcast <2 x i64>* %a to i8*
+ %aaptr = bitcast <2 x i64>* %aa to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %aaptr, i8* %aptr, i32 16, i32 2, i1 false)
+ %bptr = bitcast i16* %b to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %bptr, i8* %aaptr, i32 16, i32 2, i1 false)
+ ret void
+}
+
+define void @test3(i8* %x) {
+; Test that when we promote an alloca to a type with lower ABI alignment, we
+; provide the needed explicit alignment that code using the alloca may be
+; expecting. However, also check that any offset within an alloca can in turn
+; reduce the alignment.
+; CHECK: @test3
+; CHECK: alloca [22 x i8], align 8
+; CHECK: alloca [18 x i8], align 2
+; CHECK: ret void
+
+entry:
+ %a = alloca { i8*, i8*, i8* }
+ %b = alloca { i8*, i8*, i8* }
+ %a_raw = bitcast { i8*, i8*, i8* }* %a to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a_raw, i8* %x, i32 22, i32 8, i1 false)
+ %b_raw = bitcast { i8*, i8*, i8* }* %b to i8*
+ %b_gep = getelementptr i8* %b_raw, i32 6
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_gep, i8* %x, i32 18, i32 2, i1 false)
+ ret void
+}
+
+%struct.S = type { i8, { i64 } }
+
+define void @test4() {
+; This test case triggered very strange alignment behavior with memcpy due to
+; strange splitting. Reported by Duncan.
+; CHECK: @test4
+
+entry:
+ %D.2113 = alloca %struct.S
+ %Op = alloca %struct.S
+ %D.2114 = alloca %struct.S
+ %gep1 = getelementptr inbounds %struct.S* %Op, i32 0, i32 0
+ store i8 0, i8* %gep1, align 8
+ %gep2 = getelementptr inbounds %struct.S* %Op, i32 0, i32 1, i32 0
+ %cast = bitcast i64* %gep2 to double*
+ store double 0.000000e+00, double* %cast, align 8
+ store i64 0, i64* %gep2, align 8
+ %dst1 = bitcast %struct.S* %D.2114 to i8*
+ %src1 = bitcast %struct.S* %Op to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst1, i8* %src1, i32 16, i32 8, i1 false)
+ %dst2 = bitcast %struct.S* %D.2113 to i8*
+ %src2 = bitcast %struct.S* %D.2114 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst2, i8* %src2, i32 16, i32 8, i1 false)
+; We get 3 memcpy calls with various reasons to shrink their alignment to 1.
+; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 3, i32 1, i1 false)
+; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 8, i32 1, i1 false)
+; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 11, i32 1, i1 false)
+
+ ret void
+}
Modified: llvm/branches/R600/test/Transforms/SROA/basictest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/SROA/basictest.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/SROA/basictest.ll (original)
+++ llvm/branches/R600/test/Transforms/SROA/basictest.ll Tue Oct 2 09:15:33 2012
@@ -855,3 +855,45 @@
%result = or i8 %load, %load2
ret i8 %result
}
+
+%PR13916.struct = type { i8 }
+
+define void @PR13916.1() {
+; Ensure that we handle overlapping memcpy intrinsics correctly, especially in
+; the case where there is a directly identical value for both source and dest.
+; CHECK: @PR13916.1
+; FIXME: We shouldn't leave this alloca around.
+; CHECK: alloca
+; CHECK: ret void
+
+entry:
+ %a = alloca i8
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 1, i32 1, i1 false)
+ %tmp2 = load i8* %a
+ ret void
+}
+
+define void @PR13916.2() {
+; Check whether we continue to handle them correctly when they start off with
+; different pointer value chains, but during rewriting we coalesce them into the
+; same value.
+; CHECK: @PR13916.2
+; FIXME: We shouldn't leave this alloca around.
+; CHECK: alloca
+; CHECK: ret void
+
+entry:
+ %a = alloca %PR13916.struct, align 1
+ br i1 undef, label %if.then, label %if.end
+
+if.then:
+ %tmp0 = bitcast %PR13916.struct* %a to i8*
+ %tmp1 = bitcast %PR13916.struct* %a to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp0, i8* %tmp1, i32 1, i32 1, i1 false)
+ br label %if.end
+
+if.end:
+ %gep = getelementptr %PR13916.struct* %a, i32 0, i32 0
+ %tmp2 = load i8* %gep
+ ret void
+}
Modified: llvm/branches/R600/test/Transforms/SROA/phi-and-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/SROA/phi-and-select.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/SROA/phi-and-select.ll (original)
+++ llvm/branches/R600/test/Transforms/SROA/phi-and-select.ll Tue Oct 2 09:15:33 2012
@@ -59,15 +59,24 @@
%a = alloca [2 x i32]
; CHECK-NOT: alloca
+ ; Note that we build redundant GEPs here to ensure that having different GEPs
+ ; into the same alloca partation continues to work with PHI speculation. This
+ ; was the underlying cause of PR13926.
%a0 = getelementptr [2 x i32]* %a, i64 0, i32 0
+ %a0b = getelementptr [2 x i32]* %a, i64 0, i32 0
%a1 = getelementptr [2 x i32]* %a, i64 0, i32 1
+ %a1b = getelementptr [2 x i32]* %a, i64 0, i32 1
store i32 0, i32* %a0
store i32 1, i32* %a1
; CHECK-NOT: store
switch i32 %x, label %bb0 [ i32 1, label %bb1
i32 2, label %bb2
- i32 3, label %bb3 ]
+ i32 3, label %bb3
+ i32 4, label %bb4
+ i32 5, label %bb5
+ i32 6, label %bb6
+ i32 7, label %bb7 ]
bb0:
br label %exit
@@ -77,10 +86,19 @@
br label %exit
bb3:
br label %exit
+bb4:
+ br label %exit
+bb5:
+ br label %exit
+bb6:
+ br label %exit
+bb7:
+ br label %exit
exit:
- %phi = phi i32* [ %a1, %bb0 ], [ %a0, %bb1 ], [ %a0, %bb2 ], [ %a1, %bb3 ]
-; CHECK: phi i32 [ 1, %{{.*}} ], [ 0, %{{.*}} ], [ 0, %{{.*}} ], [ 1, %{{.*}} ]
+ %phi = phi i32* [ %a1, %bb0 ], [ %a0, %bb1 ], [ %a0, %bb2 ], [ %a1, %bb3 ],
+ [ %a1b, %bb4 ], [ %a0b, %bb5 ], [ %a0b, %bb6 ], [ %a1b, %bb7 ]
+; CHECK: phi i32 [ 1, %{{.*}} ], [ 0, %{{.*}} ], [ 0, %{{.*}} ], [ 1, %{{.*}} ], [ 1, %{{.*}} ], [ 0, %{{.*}} ], [ 0, %{{.*}} ], [ 1, %{{.*}} ]
%result = load i32* %phi
ret i32 %result
Modified: llvm/branches/R600/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll (original)
+++ llvm/branches/R600/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll Tue Oct 2 09:15:33 2012
@@ -6,17 +6,14 @@
; The table for @f
; CHECK: @switch.table = private unnamed_addr constant [7 x i32] [i32 55, i32 123, i32 0, i32 -1, i32 27, i32 62, i32 1]
-; The int table for @h
-; CHECK: @switch.table1 = private unnamed_addr constant [4 x i8] c"*\09X\05"
-
; The float table for @h
-; CHECK: @switch.table2 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000]
+; CHECK: @switch.table1 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000]
; The table for @foostring
-; CHECK: @switch.table3 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)]
+; CHECK: @switch.table2 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)]
; The table for @earlyreturncrash
-; CHECK: @switch.table4 = private unnamed_addr constant [4 x i32] [i32 42, i32 9, i32 88, i32 5]
+; CHECK: @switch.table3 = private unnamed_addr constant [4 x i32] [i32 42, i32 9, i32 88, i32 5]
; A simple int-to-int selection switch.
; It is dense enough to be replaced by table lookup.
@@ -88,14 +85,15 @@
; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4
; CHECK-NEXT: br i1 %0, label %switch.lookup, label %sw.epilog
; CHECK: switch.lookup:
-; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8]* @switch.table1, i32 0, i32 %switch.tableidx
-; CHECK-NEXT: %switch.load = load i8* %switch.gep
-; CHECK-NEXT: %switch.gep1 = getelementptr inbounds [4 x float]* @switch.table2, i32 0, i32 %switch.tableidx
-; CHECK-NEXT: %switch.load2 = load float* %switch.gep1
+; CHECK-NEXT: %switch.shiftamt = mul i32 %switch.tableidx, 8
+; CHECK-NEXT: %switch.downshift = lshr i32 89655594, %switch.shiftamt
+; CHECK-NEXT: %switch.masked = trunc i32 %switch.downshift to i8
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x float]* @switch.table1, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.load = load float* %switch.gep
; CHECK-NEXT: br label %sw.epilog
; CHECK: sw.epilog:
-; CHECK-NEXT: %a.0 = phi i8 [ %switch.load, %switch.lookup ], [ 7, %entry ]
-; CHECK-NEXT: %b.0 = phi float [ %switch.load2, %switch.lookup ], [ 0x4023FAE140000000, %entry ]
+; CHECK-NEXT: %a.0 = phi i8 [ %switch.masked, %switch.lookup ], [ 7, %entry ]
+; CHECK-NEXT: %b.0 = phi float [ %switch.load, %switch.lookup ], [ 0x4023FAE140000000, %entry ]
; CHECK-NEXT: call void @dummy(i8 signext %a.0, float %b.0)
; CHECK-NEXT: ret void
}
@@ -137,7 +135,7 @@
; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4
; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return
; CHECK: switch.lookup:
-; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8*]* @switch.table3, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8*]* @switch.table2, i32 0, i32 %switch.tableidx
; CHECK-NEXT: %switch.load = load i8** %switch.gep
; CHECK-NEXT: ret i8* %switch.load
}
@@ -166,9 +164,132 @@
; CHECK: @earlyreturncrash
; CHECK: switch.lookup:
-; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32]* @switch.table4, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32]* @switch.table3, i32 0, i32 %switch.tableidx
; CHECK-NEXT: %switch.load = load i32* %switch.gep
; CHECK-NEXT: ret i32 %switch.load
; CHECK: sw.epilog:
; CHECK-NEXT: ret i32 7
}
+
+
+; Example 7 from http://blog.regehr.org/archives/320
+; It is not dense enough for a regular table, but the results
+; can be packed into a bitmap.
+
+define i32 @crud(i8 zeroext %c) {
+entry:
+ %cmp = icmp ult i8 %c, 33
+ br i1 %cmp, label %lor.end, label %switch.early.test
+
+switch.early.test:
+ switch i8 %c, label %lor.rhs [
+ i8 92, label %lor.end
+ i8 62, label %lor.end
+ i8 60, label %lor.end
+ i8 59, label %lor.end
+ i8 58, label %lor.end
+ i8 46, label %lor.end
+ i8 44, label %lor.end
+ i8 34, label %lor.end
+ i8 39, label %switch.edge
+ ]
+
+switch.edge: br label %lor.end
+lor.rhs: br label %lor.end
+
+lor.end:
+ %0 = phi i1 [ true, %switch.early.test ],
+ [ false, %lor.rhs ],
+ [ true, %entry ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.edge ]
+ %lor.ext = zext i1 %0 to i32
+ ret i32 %lor.ext
+
+; CHECK: @crud
+; CHECK: entry:
+; CHECK-NEXT: %cmp = icmp ult i8 %c, 33
+; CHECK-NEXT: br i1 %cmp, label %lor.end, label %switch.early.test
+; CHECK: switch.early.test:
+; CHECK-NEXT: %switch.tableidx = sub i8 %c, 34
+; CHECK-NEXT: %0 = icmp ult i8 %switch.tableidx, 59
+; CHECK-NEXT: br i1 %0, label %switch.lookup, label %lor.end
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.cast = zext i8 %switch.tableidx to i59
+; CHECK-NEXT: %switch.shiftamt = mul i59 %switch.cast, 1
+; CHECK-NEXT: %switch.downshift = lshr i59 -288230375765830623, %switch.shiftamt
+; CHECK-NEXT: %switch.masked = trunc i59 %switch.downshift to i1
+; CHECK-NEXT: br label %lor.end
+; CHECK: lor.end:
+; CHECK-NEXT: %1 = phi i1 [ true, %entry ], [ %switch.masked, %switch.lookup ], [ false, %switch.early.test ]
+; CHECK-NEXT: %lor.ext = zext i1 %1 to i32
+; CHECK-NEXT: ret i32 %lor.ext
+}
+
+; PR13946
+define i32 @overflow(i32 %type) nounwind {
+entry:
+ switch i32 %type, label %sw.default [
+ i32 -2147483648, label %sw.bb
+ i32 0, label %sw.bb
+ i32 1, label %sw.bb1
+ i32 2, label %sw.bb2
+ i32 -2147483645, label %sw.bb3
+ i32 3, label %sw.bb3
+ ]
+
+sw.bb:
+ br label %if.end
+
+sw.bb1:
+ br label %if.end
+
+sw.bb2:
+ br label %if.end
+
+sw.bb3:
+ br label %if.end
+
+sw.default:
+ br label %if.end
+
+if.else:
+ br label %if.end
+
+if.end:
+ %dirent_type.0 = phi i32 [ 3, %sw.default ], [ 6, %sw.bb3 ], [ 5, %sw.bb2 ], [ 0, %sw.bb1 ], [ 3, %sw.bb ], [ 0, %if.else ]
+ ret i32 %dirent_type.0
+; CHECK: define i32 @overflow
+; CHECK: switch
+; CHECK: phi
+}
+
+; PR13985
+define i1 @undef(i32 %tmp) uwtable ssp {
+bb:
+ switch i32 %tmp, label %bb3 [
+ i32 0, label %bb1
+ i32 1, label %bb1
+ i32 7, label %bb2
+ i32 8, label %bb2
+ ]
+
+bb1: ; preds = %bb, %bb
+ br label %bb3
+
+bb2: ; preds = %bb, %bb
+ br label %bb3
+
+bb3: ; preds = %bb2, %bb1, %bb
+ %tmp4 = phi i1 [ undef, %bb ], [ false, %bb2 ], [ true, %bb1 ]
+ ret i1 %tmp4
+; CHECK: define i1 @undef
+; CHECK: %switch.cast = trunc i32 %switch.tableidx to i9
+; CHECK: %switch.downshift = lshr i9 3, %switch.shiftamt
+}
Modified: llvm/branches/R600/tools/lli/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/tools/lli/CMakeLists.txt?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/tools/lli/CMakeLists.txt (original)
+++ llvm/branches/R600/tools/lli/CMakeLists.txt Tue Oct 2 09:15:33 2012
@@ -1,6 +1,4 @@
-link_directories( ${LLVM_INTEL_JITEVENTS_LIBDIR} )
-
set(LLVM_LINK_COMPONENTS mcjit jit interpreter nativecodegen bitreader asmparser selectiondag)
if( LLVM_USE_OPROFILE )
Modified: llvm/branches/R600/unittests/ADT/APFloatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/unittests/ADT/APFloatTest.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/unittests/ADT/APFloatTest.cpp (original)
+++ llvm/branches/R600/unittests/ADT/APFloatTest.cpp Tue Oct 2 09:15:33 2012
@@ -689,6 +689,23 @@
P = R;
P.roundToIntegral(APFloat::rmNearestTiesToEven);
EXPECT_EQ(R.convertToDouble(), P.convertToDouble());
+
+ P = APFloat::getZero(APFloat::IEEEdouble);
+ P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_EQ(0.0, P.convertToDouble());
+ P = APFloat::getZero(APFloat::IEEEdouble, true);
+ P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_EQ(-0.0, P.convertToDouble());
+ P = APFloat::getNaN(APFloat::IEEEdouble);
+ P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_TRUE(IsNAN(P.convertToDouble()));
+ P = APFloat::getInf(APFloat::IEEEdouble);
+ P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_TRUE(IsInf(P.convertToDouble()) && P.convertToDouble() > 0.0);
+ P = APFloat::getInf(APFloat::IEEEdouble, true);
+ P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_TRUE(IsInf(P.convertToDouble()) && P.convertToDouble() < 0.0);
+
}
TEST(APFloatTest, getLargest) {
Modified: llvm/branches/R600/unittests/ExecutionEngine/JIT/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/unittests/ExecutionEngine/JIT/CMakeLists.txt?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/unittests/ExecutionEngine/JIT/CMakeLists.txt (original)
+++ llvm/branches/R600/unittests/ExecutionEngine/JIT/CMakeLists.txt Tue Oct 2 09:15:33 2012
@@ -14,8 +14,6 @@
)
if( LLVM_USE_INTEL_JITEVENTS )
- include_directories( ${LLVM_INTEL_JITEVENTS_INCDIR} )
- link_directories( ${LLVM_INTEL_JITEVENTS_LIBDIR} )
set(ProfileTestSources
IntelJITEventListenerTest.cpp
)
Modified: llvm/branches/R600/unittests/ExecutionEngine/JIT/IntelJITEventListenerTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/unittests/ExecutionEngine/JIT/IntelJITEventListenerTest.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/unittests/ExecutionEngine/JIT/IntelJITEventListenerTest.cpp (original)
+++ llvm/branches/R600/unittests/ExecutionEngine/JIT/IntelJITEventListenerTest.cpp Tue Oct 2 09:15:33 2012
@@ -11,7 +11,10 @@
using namespace llvm;
-#include "llvm/ExecutionEngine/IntelJITEventsWrapper.h"
+// Because we want to keep the implementation details of the Intel API used to
+// communicate with Amplifier out of the public header files, the header below
+// is included from the source tree instead.
+#include "../../../lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h"
#include <map>
#include <list>
@@ -80,7 +83,7 @@
EXPECT_TRUE(0 != MockWrapper);
Listener.reset(JITEventListener::createIntelJITEventListener(
- MockWrapper.get()));
+ MockWrapper.take()));
EXPECT_TRUE(0 != Listener);
EE->RegisterJITEventListener(Listener.get());
}
Modified: llvm/branches/R600/unittests/Transforms/Utils/IntegerDivision.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/unittests/Transforms/Utils/IntegerDivision.cpp?rev=165011&r1=165010&r2=165011&view=diff
==============================================================================
--- llvm/branches/R600/unittests/Transforms/Utils/IntegerDivision.cpp (original)
+++ llvm/branches/R600/unittests/Transforms/Utils/IntegerDivision.cpp Tue Oct 2 09:15:33 2012
@@ -47,8 +47,96 @@
Instruction* Quotient = dyn_cast<Instruction>(cast<User>(Ret)->getOperand(0));
EXPECT_TRUE(Quotient && Quotient->getOpcode() == Instruction::Sub);
+}
+
+TEST(IntegerDivision, UDiv) {
+ LLVMContext &C(getGlobalContext());
+ Module M("test division", C);
+ IRBuilder<> Builder(C);
+
+ SmallVector<Type*, 2> ArgTys(2, Builder.getInt32Ty());
+ Function *F = Function::Create(FunctionType::get(Builder.getInt32Ty(),
+ ArgTys, false),
+ GlobalValue::ExternalLinkage, "F", &M);
+ assert(F->getArgumentList().size() == 2);
+
+ BasicBlock *BB = BasicBlock::Create(C, "", F);
+ Builder.SetInsertPoint(BB);
+
+ Function::arg_iterator AI = F->arg_begin();
+ Value *A = AI++;
+ Value *B = AI++;
+
+ Value *Div = Builder.CreateUDiv(A, B);
+ EXPECT_TRUE(BB->front().getOpcode() == Instruction::UDiv);
+
+ Value *Ret = Builder.CreateRet(Div);
+
+ expandDivision(cast<BinaryOperator>(Div));
+ EXPECT_TRUE(BB->front().getOpcode() == Instruction::ICmp);
+
+ Instruction* Quotient = dyn_cast<Instruction>(cast<User>(Ret)->getOperand(0));
+ EXPECT_TRUE(Quotient && Quotient->getOpcode() == Instruction::PHI);
+}
+
+TEST(IntegerDivision, SRem) {
+ LLVMContext &C(getGlobalContext());
+ Module M("test remainder", C);
+ IRBuilder<> Builder(C);
+
+ SmallVector<Type*, 2> ArgTys(2, Builder.getInt32Ty());
+ Function *F = Function::Create(FunctionType::get(Builder.getInt32Ty(),
+ ArgTys, false),
+ GlobalValue::ExternalLinkage, "F", &M);
+ assert(F->getArgumentList().size() == 2);
+
+ BasicBlock *BB = BasicBlock::Create(C, "", F);
+ Builder.SetInsertPoint(BB);
+
+ Function::arg_iterator AI = F->arg_begin();
+ Value *A = AI++;
+ Value *B = AI++;
+
+ Value *Rem = Builder.CreateSRem(A, B);
+ EXPECT_TRUE(BB->front().getOpcode() == Instruction::SRem);
+
+ Value *Ret = Builder.CreateRet(Rem);
+
+ expandRemainder(cast<BinaryOperator>(Rem));
+ EXPECT_TRUE(BB->front().getOpcode() == Instruction::AShr);
+
+ Instruction* Remainder = dyn_cast<Instruction>(cast<User>(Ret)->getOperand(0));
+ EXPECT_TRUE(Remainder && Remainder->getOpcode() == Instruction::Sub);
+}
+
+TEST(IntegerDivision, URem) {
+ LLVMContext &C(getGlobalContext());
+ Module M("test remainder", C);
+ IRBuilder<> Builder(C);
+
+ SmallVector<Type*, 2> ArgTys(2, Builder.getInt32Ty());
+ Function *F = Function::Create(FunctionType::get(Builder.getInt32Ty(),
+ ArgTys, false),
+ GlobalValue::ExternalLinkage, "F", &M);
+ assert(F->getArgumentList().size() == 2);
+
+ BasicBlock *BB = BasicBlock::Create(C, "", F);
+ Builder.SetInsertPoint(BB);
+
+ Function::arg_iterator AI = F->arg_begin();
+ Value *A = AI++;
+ Value *B = AI++;
+
+ Value *Rem = Builder.CreateURem(A, B);
+ EXPECT_TRUE(BB->front().getOpcode() == Instruction::URem);
+
+ Value *Ret = Builder.CreateRet(Rem);
+
+ expandRemainder(cast<BinaryOperator>(Rem));
+ EXPECT_TRUE(BB->front().getOpcode() == Instruction::ICmp);
- Builder.SetInsertPoint(BB->end());
+ Instruction* Remainder = dyn_cast<Instruction>(cast<User>(Ret)->getOperand(0));
+ EXPECT_TRUE(Remainder && Remainder->getOpcode() == Instruction::Sub);
}
}
More information about the llvm-branch-commits
mailing list