[clang] f0eb558 - Remove support for 3DNow!, both intrinsics and builtins. (#96246)

via cfe-commits cfe-commits at lists.llvm.org
Tue Jul 16 09:08:52 PDT 2024


Author: James Y Knight
Date: 2024-07-16T12:08:48-04:00
New Revision: f0eb5587ceeb641445b64cb264c822b4751de04a

URL: https://github.com/llvm/llvm-project/commit/f0eb5587ceeb641445b64cb264c822b4751de04a
DIFF: https://github.com/llvm/llvm-project/commit/f0eb5587ceeb641445b64cb264c822b4751de04a.diff

LOG: Remove support for 3DNow!, both intrinsics and builtins. (#96246)

This set of instructions was only supported by AMD chips starting in
the K6-2 (introduced 1998), and before the "Bulldozer" family
(2011). They were never much used, as they were effectively superseded
by the more-widely-implemented SSE (first implemented on the AMD side
in Athlon XP in 2001).

This is being done as a predecessor towards general removal of MMX
register usage. Since there is almost no usage of the 3DNow!
intrinsics, and no modern hardware even implements them, simple
removal seems like the best option.

(Clang half originally uploaded in https://reviews.llvm.org/D94213)

Works towards issue #41665 and issue #98272.

Added: 
    

Modified: 
    clang/docs/ReleaseNotes.rst
    clang/include/clang/Basic/BuiltinsX86.def
    clang/include/clang/Driver/Options.td
    clang/lib/Basic/Targets/X86.cpp
    clang/lib/Basic/Targets/X86.h
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/Driver/ToolChains/Arch/X86.cpp
    clang/lib/Headers/mm3dnow.h
    clang/lib/Headers/x86intrin.h
    clang/test/CodeGen/builtins-x86.c
    clang/test/Driver/x86-target-features.c
    clang/test/Headers/mm3dnow.c
    clang/test/Preprocessor/predefined-arch-macros.c
    clang/test/Preprocessor/x86_target_features.c
    llvm/docs/ReleaseNotes.rst
    llvm/include/llvm/IR/IntrinsicsX86.td
    llvm/lib/Target/X86/X86.td
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/lib/Target/X86/X86Instr3DNow.td
    llvm/lib/Target/X86/X86InstrPredicates.td
    llvm/lib/Target/X86/X86Subtarget.cpp
    llvm/lib/Target/X86/X86Subtarget.h
    llvm/test/CodeGen/X86/expand-vr64-gr64-copy.mir
    llvm/test/CodeGen/X86/pr35982.ll
    llvm/test/CodeGen/X86/prefetch.ll

Removed: 
    clang/test/CodeGen/X86/3dnow-builtins.c
    llvm/test/CodeGen/X86/3dnow-intrinsics.ll
    llvm/test/CodeGen/X86/commute-3dnow.ll
    llvm/test/CodeGen/X86/stack-folding-3dnow.ll


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index cb96c5a16f261..e51dc8d76ac0d 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1078,6 +1078,25 @@ X86 Support
 ^^^^^^^^^^^
 
 - Remove knl/knm specific ISA supports: AVX512PF, AVX512ER, PREFETCHWT1
+- Support has been removed for the AMD "3DNow!" instruction-set.
+  Neither modern AMD CPUs, nor any Intel CPUs implement these
+  instructions, and they were never widely used.
+
+  * The options ``-m3dnow`` and ``-m3dnowa`` are no longer honored, and will emit a warning if used.
+  * The macros ``__3dNOW__`` and ``__3dNOW_A__`` are no longer ever set by the compiler.
+  * The header ``<mm3dnow.h>`` is deprecated, and emits a warning if included.
+  * The 3dNow intrinsic functions have been removed: ``_m_femms``,
+    ``_m_pavgusb``, ``_m_pf2id``, ``_m_pfacc``, ``_m_pfadd``,
+    ``_m_pfcmpeq``, ``_m_pfcmpge``, ``_m_pfcmpgt``, ``_m_pfmax``,
+    ``_m_pfmin``, ``_m_pfmul``, ``_m_pfrcp``, ``_m_pfrcpit1``,
+    ``_m_pfrcpit2``, ``_m_pfrsqrt``, ``_m_pfrsqrtit1``, ``_m_pfsub``,
+    ``_m_pfsubr``, ``_m_pi2fd``, ``_m_pmulhrw``, ``_m_pf2iw``,
+    ``_m_pfnacc``, ``_m_pfpnacc``, ``_m_pi2fw``, ``_m_pswapdsf``,
+    ``_m_pswapdsi``.
+  * The compiler builtins corresponding to each of the above
+    intrinsics have also been removed  (``__builtin_ia32_femms``, and so on).
+  * "3DNow!" instructions remain supported in assembly code, including
+    inside inline-assembly.
 
 Arm and AArch64 Support
 ^^^^^^^^^^^^^^^^^^^^^^^

diff  --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 7074479786b97..a85e7918f4d7e 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -37,36 +37,6 @@ TARGET_BUILTIN(__builtin_ia32_undef512, "V8d", "ncV:512:", "")
 TARGET_BUILTIN(__builtin_ia32_readeflags_u32, "Ui", "n", "")
 TARGET_BUILTIN(__builtin_ia32_writeeflags_u32, "vUi", "n", "")
 
-// 3DNow!
-//
-TARGET_BUILTIN(__builtin_ia32_femms, "v", "n", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pavgusb, "V8cV8cV8c", "ncV:64:", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pf2id, "V2iV2f", "ncV:64:", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfacc, "V2fV2fV2f", "ncV:64:", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfadd, "V2fV2fV2f", "ncV:64:", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfcmpeq, "V2iV2fV2f", "ncV:64:", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfcmpge, "V2iV2fV2f", "ncV:64:", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfcmpgt, "V2iV2fV2f", "ncV:64:", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfmax, "V2fV2fV2f", "ncV:64:", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfmin, "V2fV2fV2f", "ncV:64:", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfmul, "V2fV2fV2f", "ncV:64:", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfrcp, "V2fV2f", "ncV:64:", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfrcpit1, "V2fV2fV2f", "ncV:64:", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfrcpit2, "V2fV2fV2f", "ncV:64:", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfrsqrt, "V2fV2f", "ncV:64:", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfrsqit1, "V2fV2fV2f", "ncV:64:", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfsub, "V2fV2fV2f", "ncV:64:", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfsubr, "V2fV2fV2f", "ncV:64:", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pi2fd, "V2fV2i", "ncV:64:", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pmulhrw, "V4sV4sV4s", "ncV:64:", "3dnow")
-// 3DNow! Extensions (3dnowa).
-TARGET_BUILTIN(__builtin_ia32_pf2iw, "V2iV2f", "ncV:64:", "3dnowa")
-TARGET_BUILTIN(__builtin_ia32_pfnacc, "V2fV2fV2f", "ncV:64:", "3dnowa")
-TARGET_BUILTIN(__builtin_ia32_pfpnacc, "V2fV2fV2f", "ncV:64:", "3dnowa")
-TARGET_BUILTIN(__builtin_ia32_pi2fw, "V2fV2i", "ncV:64:", "3dnowa")
-TARGET_BUILTIN(__builtin_ia32_pswapdsf, "V2fV2f", "ncV:64:", "3dnowa")
-TARGET_BUILTIN(__builtin_ia32_pswapdsi, "V2iV2i", "ncV:64:", "3dnowa")
-
 // MMX
 //
 // All MMX instructions will be generated via builtins. Any MMX vector

diff  --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 561aa28a52b1f..2400b193d4d38 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6135,10 +6135,6 @@ def mno_80387 : Flag<["-"], "mno-80387">, Alias<mno_x87>;
 def mno_fp_ret_in_387 : Flag<["-"], "mno-fp-ret-in-387">, Alias<mno_x87>;
 def mmmx : Flag<["-"], "mmmx">, Group<m_x86_Features_Group>;
 def mno_mmx : Flag<["-"], "mno-mmx">, Group<m_x86_Features_Group>;
-def m3dnow : Flag<["-"], "m3dnow">, Group<m_x86_Features_Group>;
-def mno_3dnow : Flag<["-"], "mno-3dnow">, Group<m_x86_Features_Group>;
-def m3dnowa : Flag<["-"], "m3dnowa">, Group<m_x86_Features_Group>;
-def mno_3dnowa : Flag<["-"], "mno-3dnowa">, Group<m_x86_Features_Group>;
 def mamx_bf16 : Flag<["-"], "mamx-bf16">, Group<m_x86_Features_Group>;
 def mno_amx_bf16 : Flag<["-"], "mno-amx-bf16">, Group<m_x86_Features_Group>;
 def mamx_complex : Flag<["-"], "mamx-complex">, Group<m_x86_Features_Group>;
@@ -6372,6 +6368,12 @@ def mvevpu : Flag<["-"], "mvevpu">, Group<m_ve_Features_Group>,
 def mno_vevpu : Flag<["-"], "mno-vevpu">, Group<m_ve_Features_Group>;
 } // let Flags = [TargetSpecific]
 
+// Unsupported X86 feature flags (triggers a warning)
+def m3dnow : Flag<["-"], "m3dnow">;
+def mno_3dnow : Flag<["-"], "mno-3dnow">;
+def m3dnowa : Flag<["-"], "m3dnowa">;
+def mno_3dnowa : Flag<["-"], "mno-3dnowa">;
+
 // These are legacy user-facing driver-level option spellings. They are always
 // aliases for options that are spelled using the more common Unix / GNU flag
 // style of double-dash and equals-joined flags.

diff  --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 1f6fc842ddd95..121a2c2d795fe 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -258,7 +258,9 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
     if (Feature[0] != '+')
       continue;
 
-    if (Feature == "+aes") {
+    if (Feature == "+mmx") {
+      HasMMX = true;
+    } else if (Feature == "+aes") {
       HasAES = true;
     } else if (Feature == "+vaes") {
       HasVAES = true;
@@ -487,13 +489,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
     // for bfloat16 arithmetic operations in the front-end.
     HasBFloat16 = SSELevel >= SSE2;
 
-    MMX3DNowEnum ThreeDNowLevel = llvm::StringSwitch<MMX3DNowEnum>(Feature)
-                                      .Case("+3dnowa", AMD3DNowAthlon)
-                                      .Case("+3dnow", AMD3DNow)
-                                      .Case("+mmx", MMX)
-                                      .Default(NoMMX3DNow);
-    MMX3DNowLevel = std::max(MMX3DNowLevel, ThreeDNowLevel);
-
     XOPEnum XLevel = llvm::StringSwitch<XOPEnum>(Feature)
                          .Case("+xop", XOP)
                          .Case("+fma4", FMA4)
@@ -1031,18 +1026,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
   }
 
   // Each case falls through to the previous one here.
-  switch (MMX3DNowLevel) {
-  case AMD3DNowAthlon:
-    Builder.defineMacro("__3dNOW_A__");
-    [[fallthrough]];
-  case AMD3DNow:
-    Builder.defineMacro("__3dNOW__");
-    [[fallthrough]];
-  case MMX:
+  if (HasMMX) {
     Builder.defineMacro("__MMX__");
-    [[fallthrough]];
-  case NoMMX3DNow:
-    break;
   }
 
   if (CPU >= CK_i486 || CPU == CK_None) {
@@ -1061,8 +1046,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
 
 bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
   return llvm::StringSwitch<bool>(Name)
-      .Case("3dnow", true)
-      .Case("3dnowa", true)
       .Case("adx", true)
       .Case("aes", true)
       .Case("amx-bf16", true)
@@ -1232,9 +1215,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("widekl", HasWIDEKL)
       .Case("lwp", HasLWP)
       .Case("lzcnt", HasLZCNT)
-      .Case("mm3dnow", MMX3DNowLevel >= AMD3DNow)
-      .Case("mm3dnowa", MMX3DNowLevel >= AMD3DNowAthlon)
-      .Case("mmx", MMX3DNowLevel >= MMX)
+      .Case("mmx", HasMMX)
       .Case("movbe", HasMOVBE)
       .Case("movdiri", HasMOVDIRI)
       .Case("movdir64b", HasMOVDIR64B)

diff  --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index a70711f4ae2bb..cdec41afd1a4b 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -67,12 +67,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
     AVX2,
     AVX512F
   } SSELevel = NoSSE;
-  enum MMX3DNowEnum {
-    NoMMX3DNow,
-    MMX,
-    AMD3DNow,
-    AMD3DNowAthlon
-  } MMX3DNowLevel = NoMMX3DNow;
+  bool HasMMX = false;
   enum XOPEnum { NoXOP, SSE4A, FMA4, XOP } XOPLevel = NoXOP;
   enum AddrSpace { ptr32_sptr = 270, ptr32_uptr = 271, ptr64 = 272 };
 
@@ -348,8 +343,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
       return "avx512";
     if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX)
       return "avx";
-    if (getTriple().getArch() == llvm::Triple::x86 &&
-        MMX3DNowLevel == NoMMX3DNow)
+    if (getTriple().getArch() == llvm::Triple::x86 && !HasMMX)
       return "no-mmx";
     return "";
   }

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index c0297476391d2..67027f8aa93f3 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -15969,14 +15969,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     return Builder.CreateCall(F, {Ops[0]});
   }
 
-  // 3DNow!
-  case X86::BI__builtin_ia32_pswapdsf:
-  case X86::BI__builtin_ia32_pswapdsi: {
-    llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
-    Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
-    llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
-    return Builder.CreateCall(F, Ops, "pswapd");
-  }
   case X86::BI__builtin_ia32_rdrand16_step:
   case X86::BI__builtin_ia32_rdrand32_step:
   case X86::BI__builtin_ia32_rdrand64_step:

diff  --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp
index 9fca7864b2546..2f63333b732f6 100644
--- a/clang/lib/Driver/ToolChains/Arch/X86.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp
@@ -310,4 +310,17 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
     Features.push_back("+prefer-no-scatter");
   if (Args.hasArg(options::OPT_mapx_inline_asm_use_gpr32))
     Features.push_back("+inline-asm-use-gpr32");
+
+  // Warn for removed 3dnow support
+  if (const Arg *A =
+          Args.getLastArg(options::OPT_m3dnowa, options::OPT_mno_3dnowa,
+                          options::OPT_mno_3dnow)) {
+    if (A->getOption().matches(options::OPT_m3dnowa))
+      D.Diag(diag::warn_drv_clang_unsupported) << A->getAsString(Args);
+  }
+  if (const Arg *A =
+          Args.getLastArg(options::OPT_m3dnow, options::OPT_mno_3dnow)) {
+    if (A->getOption().matches(options::OPT_m3dnow))
+      D.Diag(diag::warn_drv_clang_unsupported) << A->getAsString(Args);
+  }
 }

diff  --git a/clang/lib/Headers/mm3dnow.h b/clang/lib/Headers/mm3dnow.h
index 22ab13aa33409..afffba3a9c75e 100644
--- a/clang/lib/Headers/mm3dnow.h
+++ b/clang/lib/Headers/mm3dnow.h
@@ -7,151 +7,16 @@
  *===-----------------------------------------------------------------------===
  */
 
+// 3dNow intrinsics are no longer supported.
+
 #ifndef _MM3DNOW_H_INCLUDED
 #define _MM3DNOW_H_INCLUDED
 
+#ifndef _CLANG_DISABLE_CRT_DEPRECATION_WARNINGS
+#warning "The <mm3dnow.h> header is deprecated, and 3dNow! intrinsics are unsupported. For other intrinsics, include <x86intrin.h>, instead."
+#endif
+
 #include <mmintrin.h>
 #include <prfchwintrin.h>
 
-typedef float __v2sf __attribute__((__vector_size__(8)));
-
-/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow"), __min_vector_width__(64)))
-
-static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("3dnow")))
-_m_femms(void) {
-  __builtin_ia32_femms();
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pavgusb(__m64 __m1, __m64 __m2) {
-  return (__m64)__builtin_ia32_pavgusb((__v8qi)__m1, (__v8qi)__m2);
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pf2id(__m64 __m) {
-  return (__m64)__builtin_ia32_pf2id((__v2sf)__m);
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pfacc(__m64 __m1, __m64 __m2) {
-  return (__m64)__builtin_ia32_pfacc((__v2sf)__m1, (__v2sf)__m2);
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pfadd(__m64 __m1, __m64 __m2) {
-  return (__m64)__builtin_ia32_pfadd((__v2sf)__m1, (__v2sf)__m2);
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pfcmpeq(__m64 __m1, __m64 __m2) {
-  return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__m1, (__v2sf)__m2);
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pfcmpge(__m64 __m1, __m64 __m2) {
-  return (__m64)__builtin_ia32_pfcmpge((__v2sf)__m1, (__v2sf)__m2);
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pfcmpgt(__m64 __m1, __m64 __m2) {
-  return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__m1, (__v2sf)__m2);
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pfmax(__m64 __m1, __m64 __m2) {
-  return (__m64)__builtin_ia32_pfmax((__v2sf)__m1, (__v2sf)__m2);
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pfmin(__m64 __m1, __m64 __m2) {
-  return (__m64)__builtin_ia32_pfmin((__v2sf)__m1, (__v2sf)__m2);
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pfmul(__m64 __m1, __m64 __m2) {
-  return (__m64)__builtin_ia32_pfmul((__v2sf)__m1, (__v2sf)__m2);
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pfrcp(__m64 __m) {
-  return (__m64)__builtin_ia32_pfrcp((__v2sf)__m);
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pfrcpit1(__m64 __m1, __m64 __m2) {
-  return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__m1, (__v2sf)__m2);
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pfrcpit2(__m64 __m1, __m64 __m2) {
-  return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__m1, (__v2sf)__m2);
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pfrsqrt(__m64 __m) {
-  return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__m);
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pfrsqrtit1(__m64 __m1, __m64 __m2) {
-  return (__m64)__builtin_ia32_pfrsqit1((__v2sf)__m1, (__v2sf)__m2);
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pfsub(__m64 __m1, __m64 __m2) {
-  return (__m64)__builtin_ia32_pfsub((__v2sf)__m1, (__v2sf)__m2);
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pfsubr(__m64 __m1, __m64 __m2) {
-  return (__m64)__builtin_ia32_pfsubr((__v2sf)__m1, (__v2sf)__m2);
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pi2fd(__m64 __m) {
-  return (__m64)__builtin_ia32_pi2fd((__v2si)__m);
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pmulhrw(__m64 __m1, __m64 __m2) {
-  return (__m64)__builtin_ia32_pmulhrw((__v4hi)__m1, (__v4hi)__m2);
-}
-
-/* Handle the 3dnowa instructions here. */
-#undef __DEFAULT_FN_ATTRS
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnowa"), __min_vector_width__(64)))
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pf2iw(__m64 __m) {
-  return (__m64)__builtin_ia32_pf2iw((__v2sf)__m);
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pfnacc(__m64 __m1, __m64 __m2) {
-  return (__m64)__builtin_ia32_pfnacc((__v2sf)__m1, (__v2sf)__m2);
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pfpnacc(__m64 __m1, __m64 __m2) {
-  return (__m64)__builtin_ia32_pfpnacc((__v2sf)__m1, (__v2sf)__m2);
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pi2fw(__m64 __m) {
-  return (__m64)__builtin_ia32_pi2fw((__v2si)__m);
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pswapdsf(__m64 __m) {
-  return (__m64)__builtin_ia32_pswapdsf((__v2sf)__m);
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_m_pswapdsi(__m64 __m) {
-  return (__m64)__builtin_ia32_pswapdsi((__v2si)__m);
-}
-
-#undef __DEFAULT_FN_ATTRS
-
 #endif

diff  --git a/clang/lib/Headers/x86intrin.h b/clang/lib/Headers/x86intrin.h
index c20bfbb8fe46e..f42e9e580f883 100644
--- a/clang/lib/Headers/x86intrin.h
+++ b/clang/lib/Headers/x86intrin.h
@@ -14,10 +14,6 @@
 
 #include <immintrin.h>
 
-#if !defined(__SCE__) || __has_feature(modules) || defined(__3dNOW__)
-#include <mm3dnow.h>
-#endif
-
 #if !defined(__SCE__) || __has_feature(modules) || defined(__PRFCHW__)
 #include <prfchwintrin.h>
 #endif

diff  --git a/clang/test/CodeGen/X86/3dnow-builtins.c b/clang/test/CodeGen/X86/3dnow-builtins.c
deleted file mode 100644
index af754b71555c4..0000000000000
--- a/clang/test/CodeGen/X86/3dnow-builtins.c
+++ /dev/null
@@ -1,181 +0,0 @@
-// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +3dnowa -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=GCC -check-prefix=CHECK
-// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-scei-ps4 -target-feature +3dnowa -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=PS4 -check-prefix=CHECK
-// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-sie-ps5  -target-feature +3dnowa -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=PS4 -check-prefix=CHECK
-
-
-#include <x86intrin.h>
-
-__m64 test_m_pavgusb(__m64 m1, __m64 m2) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pavgusb
-  // GCC-LABEL: define{{.*}} double @test_m_pavgusb
-  // CHECK: @llvm.x86.3dnow.pavgusb
-  return _m_pavgusb(m1, m2);
-}
-
-__m64 test_m_pf2id(__m64 m) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pf2id
-  // GCC-LABEL: define{{.*}} double @test_m_pf2id
-  // CHECK: @llvm.x86.3dnow.pf2id
-  return _m_pf2id(m);
-}
-
-__m64 test_m_pfacc(__m64 m1, __m64 m2) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pfacc
-  // GCC-LABEL: define{{.*}} double @test_m_pfacc
-  // CHECK: @llvm.x86.3dnow.pfacc
-  return _m_pfacc(m1, m2);
-}
-
-__m64 test_m_pfadd(__m64 m1, __m64 m2) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pfadd
-  // GCC-LABEL: define{{.*}} double @test_m_pfadd
-  // CHECK: @llvm.x86.3dnow.pfadd
-  return _m_pfadd(m1, m2);
-}
-
-__m64 test_m_pfcmpeq(__m64 m1, __m64 m2) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pfcmpeq
-  // GCC-LABEL: define{{.*}} double @test_m_pfcmpeq
-  // CHECK: @llvm.x86.3dnow.pfcmpeq
-  return _m_pfcmpeq(m1, m2);
-}
-
-__m64 test_m_pfcmpge(__m64 m1, __m64 m2) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pfcmpge
-  // GCC-LABEL: define{{.*}} double @test_m_pfcmpge
-  // CHECK: @llvm.x86.3dnow.pfcmpge
-  return _m_pfcmpge(m1, m2);
-}
-
-__m64 test_m_pfcmpgt(__m64 m1, __m64 m2) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pfcmpgt
-  // GCC-LABEL: define{{.*}} double @test_m_pfcmpgt
-  // CHECK: @llvm.x86.3dnow.pfcmpgt
-  return _m_pfcmpgt(m1, m2);
-}
-
-__m64 test_m_pfmax(__m64 m1, __m64 m2) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pfmax
-  // GCC-LABEL: define{{.*}} double @test_m_pfmax
-  // CHECK: @llvm.x86.3dnow.pfmax
-  return _m_pfmax(m1, m2);
-}
-
-__m64 test_m_pfmin(__m64 m1, __m64 m2) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pfmin
-  // GCC-LABEL: define{{.*}} double @test_m_pfmin
-  // CHECK: @llvm.x86.3dnow.pfmin
-  return _m_pfmin(m1, m2);
-}
-
-__m64 test_m_pfmul(__m64 m1, __m64 m2) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pfmul
-  // GCC-LABEL: define{{.*}} double @test_m_pfmul
-  // CHECK: @llvm.x86.3dnow.pfmul
-  return _m_pfmul(m1, m2);
-}
-
-__m64 test_m_pfrcp(__m64 m) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pfrcp
-  // GCC-LABEL: define{{.*}} double @test_m_pfrcp
-  // CHECK: @llvm.x86.3dnow.pfrcp
-  return _m_pfrcp(m);
-}
-
-__m64 test_m_pfrcpit1(__m64 m1, __m64 m2) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pfrcpit1
-  // GCC-LABEL: define{{.*}} double @test_m_pfrcpit1
-  // CHECK: @llvm.x86.3dnow.pfrcpit1
-  return _m_pfrcpit1(m1, m2);
-}
-
-__m64 test_m_pfrcpit2(__m64 m1, __m64 m2) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pfrcpit2
-  // GCC-LABEL: define{{.*}} double @test_m_pfrcpit2
-  // CHECK: @llvm.x86.3dnow.pfrcpit2
-  return _m_pfrcpit2(m1, m2);
-}
-
-__m64 test_m_pfrsqrt(__m64 m) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pfrsqrt
-  // GCC-LABEL: define{{.*}} double @test_m_pfrsqrt
-  // CHECK: @llvm.x86.3dnow.pfrsqrt
-  return _m_pfrsqrt(m);
-}
-
-__m64 test_m_pfrsqrtit1(__m64 m1, __m64 m2) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pfrsqrtit1
-  // GCC-LABEL: define{{.*}} double @test_m_pfrsqrtit1
-  // CHECK: @llvm.x86.3dnow.pfrsqit1
-  return _m_pfrsqrtit1(m1, m2);
-}
-
-__m64 test_m_pfsub(__m64 m1, __m64 m2) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pfsub
-  // GCC-LABEL: define{{.*}} double @test_m_pfsub
-  // CHECK: @llvm.x86.3dnow.pfsub
-  return _m_pfsub(m1, m2);
-}
-
-__m64 test_m_pfsubr(__m64 m1, __m64 m2) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pfsubr
-  // GCC-LABEL: define{{.*}} double @test_m_pfsubr
-  // CHECK: @llvm.x86.3dnow.pfsubr
-  return _m_pfsubr(m1, m2);
-}
-
-__m64 test_m_pi2fd(__m64 m) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pi2fd
-  // GCC-LABEL: define{{.*}} double @test_m_pi2fd
-  // CHECK: @llvm.x86.3dnow.pi2fd
-  return _m_pi2fd(m);
-}
-
-__m64 test_m_pmulhrw(__m64 m1, __m64 m2) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pmulhrw
-  // GCC-LABEL: define{{.*}} double @test_m_pmulhrw
-  // CHECK: @llvm.x86.3dnow.pmulhrw
-  return _m_pmulhrw(m1, m2);
-}
-
-__m64 test_m_pf2iw(__m64 m) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pf2iw
-  // GCC-LABEL: define{{.*}} double @test_m_pf2iw
-  // CHECK: @llvm.x86.3dnowa.pf2iw
-  return _m_pf2iw(m);
-}
-
-__m64 test_m_pfnacc(__m64 m1, __m64 m2) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pfnacc
-  // GCC-LABEL: define{{.*}} double @test_m_pfnacc
-  // CHECK: @llvm.x86.3dnowa.pfnacc
-  return _m_pfnacc(m1, m2);
-}
-
-__m64 test_m_pfpnacc(__m64 m1, __m64 m2) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pfpnacc
-  // GCC-LABEL: define{{.*}} double @test_m_pfpnacc
-  // CHECK: @llvm.x86.3dnowa.pfpnacc
-  return _m_pfpnacc(m1, m2);
-}
-
-__m64 test_m_pi2fw(__m64 m) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pi2fw
-  // GCC-LABEL: define{{.*}} double @test_m_pi2fw
-  // CHECK: @llvm.x86.3dnowa.pi2fw
-  return _m_pi2fw(m);
-}
-
-__m64 test_m_pswapdsf(__m64 m) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pswapdsf
-  // GCC-LABEL: define{{.*}} double @test_m_pswapdsf
-  // CHECK: @llvm.x86.3dnowa.pswapd
-  return _m_pswapdsf(m);
-}
-
-__m64 test_m_pswapdsi(__m64 m) {
-  // PS4-LABEL: define{{.*}} i64 @test_m_pswapdsi
-  // GCC-LABEL: define{{.*}} double @test_m_pswapdsi
-  // CHECK: @llvm.x86.3dnowa.pswapd
-  return _m_pswapdsi(m);
-}

diff  --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c
index e0f220dbeafcc..de31a4db5b0c1 100644
--- a/clang/test/CodeGen/builtins-x86.c
+++ b/clang/test/CodeGen/builtins-x86.c
@@ -3,7 +3,6 @@
 // RUN: %clang_cc1 -DUSE_64 -DOPENCL -x cl -cl-std=CL2.0 -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -target-feature +clzero -target-feature +shstk -target-feature +wbnoinvd -target-feature +cldemote -emit-llvm -o %t %s
 
 #ifdef USE_ALL
-#define USE_3DNOW
 #define USE_64
 #define USE_SSE4
 #endif
@@ -96,9 +95,6 @@ void f0(void) {
   V4s    tmp_V4s;
   V2i    tmp_V2i;
   V1LLi  tmp_V1LLi;
-#ifdef USE_3DNOW
-  V2f    tmp_V2f;
-#endif
 
   // 128-bit
   V16c   tmp_V16c;
@@ -513,33 +509,7 @@ void f0(void) {
   __builtin_ia32_maskstorepd256(tmp_V4dp, tmp_V4LLi, tmp_V4d);
   __builtin_ia32_maskstoreps256(tmp_V8fp, tmp_V8i, tmp_V8f);
 
-#ifdef USE_3DNOW
-  tmp_V8c = __builtin_ia32_pavgusb(tmp_V8c, tmp_V8c);
-  tmp_V2i = __builtin_ia32_pf2id(tmp_V2f);
-  tmp_V2f = __builtin_ia32_pfacc(tmp_V2f, tmp_V2f);
-  tmp_V2f = __builtin_ia32_pfadd(tmp_V2f, tmp_V2f);
-  tmp_V2i = __builtin_ia32_pfcmpeq(tmp_V2f, tmp_V2f);
-  tmp_V2i = __builtin_ia32_pfcmpge(tmp_V2f, tmp_V2f);
-  tmp_V2i = __builtin_ia32_pfcmpgt(tmp_V2f, tmp_V2f);
-  tmp_V2f = __builtin_ia32_pfmax(tmp_V2f, tmp_V2f);
-  tmp_V2f = __builtin_ia32_pfmin(tmp_V2f, tmp_V2f);
-  tmp_V2f = __builtin_ia32_pfmul(tmp_V2f, tmp_V2f);
-  tmp_V2f = __builtin_ia32_pfrcp(tmp_V2f);
-  tmp_V2f = __builtin_ia32_pfrcpit1(tmp_V2f, tmp_V2f);
-  tmp_V2f = __builtin_ia32_pfrcpit2(tmp_V2f, tmp_V2f);
-  tmp_V2f = __builtin_ia32_pfrsqrt(tmp_V2f);
-  tmp_V2f = __builtin_ia32_pfrsqit1(tmp_V2f, tmp_V2f);
-  tmp_V2f = __builtin_ia32_pfsub(tmp_V2f, tmp_V2f);
-  tmp_V2f = __builtin_ia32_pfsubr(tmp_V2f, tmp_V2f);
-  tmp_V2f = __builtin_ia32_pi2fd(tmp_V2i);
-  tmp_V4s = __builtin_ia32_pmulhrw(tmp_V4s, tmp_V4s);
-  tmp_V2i = __builtin_ia32_pf2iw(tmp_V2f);
-  tmp_V2f = __builtin_ia32_pfnacc(tmp_V2f, tmp_V2f);
-  tmp_V2f = __builtin_ia32_pfpnacc(tmp_V2f, tmp_V2f);
-  tmp_V2f = __builtin_ia32_pi2fw(tmp_V2i);
-  tmp_V2f = __builtin_ia32_pswapdsf(tmp_V2f);
-  tmp_V2i = __builtin_ia32_pswapdsi(tmp_V2i);
-
+#if USE_ALL
   tmp_V4i = __builtin_ia32_sha1rnds4(tmp_V4i, tmp_V4i, imm_i_0_4);
   tmp_V4i = __builtin_ia32_sha1nexte(tmp_V4i, tmp_V4i);
   tmp_V4i = __builtin_ia32_sha1msg1(tmp_V4i, tmp_V4i);

diff  --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c
index 6773571556bd4..ba61457102a57 100644
--- a/clang/test/Driver/x86-target-features.c
+++ b/clang/test/Driver/x86-target-features.c
@@ -8,8 +8,13 @@
 
 // RUN: %clang --target=i386 -march=i386 -mmmx -m3dnow -m3dnowa %s -### 2>&1 | FileCheck -check-prefix=MMX %s
 // RUN: %clang --target=i386 -march=i386 -mno-mmx -mno-3dnow -mno-3dnowa %s -### 2>&1 | FileCheck -check-prefix=NO-MMX %s
-// MMX: "-target-feature" "+mmx" "-target-feature" "+3dnow" "-target-feature" "+3dnowa"
-// NO-MMX: "-target-feature" "-mmx" "-target-feature" "-3dnow" "-target-feature" "-3dnowa"
+// MMX: warning: the clang compiler does not support '-m3dnowa'
+// MMX: warning: the clang compiler does not support '-m3dnow'
+// MMX-NOT: "3dnow"
+// MMX: "-target-feature" "+mmx"
+// MMX-NOT: "3dnow"
+// NO-MMX-NOT: warning
+// NO-MMX: "-target-feature" "-mmx"
 
 // RUN: %clang --target=i386 -march=i386 -msse -msse2 -msse3 -mssse3 -msse4a -msse4.1 -msse4.2 %s -### 2>&1 | FileCheck -check-prefix=SSE %s
 // RUN: %clang --target=i386 -march=i386 -mno-sse -mno-sse2 -mno-sse3 -mno-ssse3 -mno-sse4a -mno-sse4.1 -mno-sse4.2 %s -### 2>&1 | FileCheck -check-prefix=NO-SSE %s

diff  --git a/clang/test/Headers/mm3dnow.c b/clang/test/Headers/mm3dnow.c
index 255483cb9b836..a9b6dd88f8034 100644
--- a/clang/test/Headers/mm3dnow.c
+++ b/clang/test/Headers/mm3dnow.c
@@ -1,16 +1,21 @@
 // RUN: %clang_cc1 -fsyntax-only -ffreestanding %s -verify
+// RUN: %clang_cc1 -fsyntax-only -D_CLANG_DISABLE_CRT_DEPRECATION_WARNINGS -ffreestanding %s -verify
 // RUN: %clang_cc1 -fsyntax-only -ffreestanding -x c++ %s -verify
-// expected-no-diagnostics
 
 #if defined(i386) || defined(__x86_64__)
+#ifndef _CLANG_DISABLE_CRT_DEPRECATION_WARNINGS
+// expected-warning at mm3dnow.h:*{{The <mm3dnow.h> header is deprecated}}
+#else
+// expected-no-diagnostics
+#endif
+
 #include <mm3dnow.h>
 
-int __attribute__((__target__(("3dnow")))) foo(int a) {
-  _m_femms();
+int foo(void *x) {
+  _m_prefetch(x);
+  _m_prefetchw(x);
   return 4;
 }
-
-__m64 __attribute__((__target__(("3dnowa")))) bar(__m64 a) {
-  return _m_pf2iw(a);
-}
+#else
+// expected-no-diagnostics
 #endif

diff  --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c
index f0a2ef851287f..6f470d85ca563 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -99,7 +99,6 @@
 // RUN: %clang -march=winchip2 -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_WINCHIP2_M32
-// CHECK_WINCHIP2_M32: #define __3dNOW__ 1
 // CHECK_WINCHIP2_M32: #define __MMX__ 1
 // CHECK_WINCHIP2_M32: #define __i386 1
 // CHECK_WINCHIP2_M32: #define __i386__ 1
@@ -115,7 +114,6 @@
 // RUN: %clang -march=c3 -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_C3_M32
-// CHECK_C3_M32: #define __3dNOW__ 1
 // CHECK_C3_M32: #define __MMX__ 1
 // CHECK_C3_M32: #define __i386 1
 // CHECK_C3_M32: #define __i386__ 1
@@ -2707,8 +2705,6 @@
 // RUN: %clang -march=geode -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_GEODE_M32
-// CHECK_GEODE_M32: #define __3dNOW_A__ 1
-// CHECK_GEODE_M32: #define __3dNOW__ 1
 // CHECK_GEODE_M32: #define __MMX__ 1
 // CHECK_GEODE_M32: #define __geode 1
 // CHECK_GEODE_M32: #define __geode__ 1
@@ -2739,7 +2735,6 @@
 // RUN: %clang -march=k6-2 -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_K6_2_M32
-// CHECK_K6_2_M32: #define __3dNOW__ 1
 // CHECK_K6_2_M32: #define __MMX__ 1
 // CHECK_K6_2_M32: #define __i386 1
 // CHECK_K6_2_M32: #define __i386__ 1
@@ -2757,7 +2752,6 @@
 // RUN: %clang -march=k6-3 -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_K6_3_M32
-// CHECK_K6_3_M32: #define __3dNOW__ 1
 // CHECK_K6_3_M32: #define __MMX__ 1
 // CHECK_K6_3_M32: #define __i386 1
 // CHECK_K6_3_M32: #define __i386__ 1
@@ -2775,8 +2769,6 @@
 // RUN: %clang -march=athlon -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_M32
-// CHECK_ATHLON_M32: #define __3dNOW_A__ 1
-// CHECK_ATHLON_M32: #define __3dNOW__ 1
 // CHECK_ATHLON_M32: #define __MMX__ 1
 // CHECK_ATHLON_M32: #define __athlon 1
 // CHECK_ATHLON_M32: #define __athlon__ 1
@@ -2792,8 +2784,6 @@
 // RUN: %clang -march=athlon-tbird -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_TBIRD_M32
-// CHECK_ATHLON_TBIRD_M32: #define __3dNOW_A__ 1
-// CHECK_ATHLON_TBIRD_M32: #define __3dNOW__ 1
 // CHECK_ATHLON_TBIRD_M32: #define __MMX__ 1
 // CHECK_ATHLON_TBIRD_M32: #define __athlon 1
 // CHECK_ATHLON_TBIRD_M32: #define __athlon__ 1
@@ -2809,8 +2799,6 @@
 // RUN: %clang -march=athlon-4 -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_4_M32
-// CHECK_ATHLON_4_M32: #define __3dNOW_A__ 1
-// CHECK_ATHLON_4_M32: #define __3dNOW__ 1
 // CHECK_ATHLON_4_M32: #define __MMX__ 1
 // CHECK_ATHLON_4_M32: #define __SSE__ 1
 // CHECK_ATHLON_4_M32: #define __athlon 1
@@ -2829,8 +2817,6 @@
 // RUN: %clang -march=athlon-xp -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_XP_M32
-// CHECK_ATHLON_XP_M32: #define __3dNOW_A__ 1
-// CHECK_ATHLON_XP_M32: #define __3dNOW__ 1
 // CHECK_ATHLON_XP_M32: #define __MMX__ 1
 // CHECK_ATHLON_XP_M32: #define __SSE__ 1
 // CHECK_ATHLON_XP_M32: #define __athlon 1
@@ -2849,8 +2835,6 @@
 // RUN: %clang -march=athlon-mp -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_MP_M32
-// CHECK_ATHLON_MP_M32: #define __3dNOW_A__ 1
-// CHECK_ATHLON_MP_M32: #define __3dNOW__ 1
 // CHECK_ATHLON_MP_M32: #define __MMX__ 1
 // CHECK_ATHLON_MP_M32: #define __SSE__ 1
 // CHECK_ATHLON_MP_M32: #define __athlon 1
@@ -2881,8 +2865,6 @@
 // RUN: %clang -march=k8 -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_K8_M32
-// CHECK_K8_M32: #define __3dNOW_A__ 1
-// CHECK_K8_M32: #define __3dNOW__ 1
 // CHECK_K8_M32: #define __MMX__ 1
 // CHECK_K8_M32: #define __SSE2__ 1
 // CHECK_K8_M32: #define __SSE__ 1
@@ -2896,8 +2878,6 @@
 // RUN: %clang -march=k8 -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_K8_M64
-// CHECK_K8_M64: #define __3dNOW_A__ 1
-// CHECK_K8_M64: #define __3dNOW__ 1
 // CHECK_K8_M64: #define __MMX__ 1
 // CHECK_K8_M64: #define __SSE2_MATH__ 1
 // CHECK_K8_M64: #define __SSE2__ 1
@@ -2914,8 +2894,6 @@
 // RUN: %clang -march=k8-sse3 -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_K8_SSE3_M32
-// CHECK_K8_SSE3_M32: #define __3dNOW_A__ 1
-// CHECK_K8_SSE3_M32: #define __3dNOW__ 1
 // CHECK_K8_SSE3_M32: #define __MMX__ 1
 // CHECK_K8_SSE3_M32: #define __SSE2__ 1
 // CHECK_K8_SSE3_M32: #define __SSE3__ 1
@@ -2930,8 +2908,6 @@
 // RUN: %clang -march=k8-sse3 -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_K8_SSE3_M64
-// CHECK_K8_SSE3_M64: #define __3dNOW_A__ 1
-// CHECK_K8_SSE3_M64: #define __3dNOW__ 1
 // CHECK_K8_SSE3_M64: #define __MMX__ 1
 // CHECK_K8_SSE3_M64: #define __SSE2_MATH__ 1
 // CHECK_K8_SSE3_M64: #define __SSE2__ 1
@@ -2949,8 +2925,6 @@
 // RUN: %clang -march=opteron -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_OPTERON_M32
-// CHECK_OPTERON_M32: #define __3dNOW_A__ 1
-// CHECK_OPTERON_M32: #define __3dNOW__ 1
 // CHECK_OPTERON_M32: #define __MMX__ 1
 // CHECK_OPTERON_M32: #define __SSE2__ 1
 // CHECK_OPTERON_M32: #define __SSE__ 1
@@ -2964,8 +2938,6 @@
 // RUN: %clang -march=opteron -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_OPTERON_M64
-// CHECK_OPTERON_M64: #define __3dNOW_A__ 1
-// CHECK_OPTERON_M64: #define __3dNOW__ 1
 // CHECK_OPTERON_M64: #define __MMX__ 1
 // CHECK_OPTERON_M64: #define __SSE2_MATH__ 1
 // CHECK_OPTERON_M64: #define __SSE2__ 1
@@ -2982,8 +2954,6 @@
 // RUN: %clang -march=opteron-sse3 -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_OPTERON_SSE3_M32
-// CHECK_OPTERON_SSE3_M32: #define __3dNOW_A__ 1
-// CHECK_OPTERON_SSE3_M32: #define __3dNOW__ 1
 // CHECK_OPTERON_SSE3_M32: #define __MMX__ 1
 // CHECK_OPTERON_SSE3_M32: #define __SSE2__ 1
 // CHECK_OPTERON_SSE3_M32: #define __SSE3__ 1
@@ -2998,8 +2968,6 @@
 // RUN: %clang -march=opteron-sse3 -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_OPTERON_SSE3_M64
-// CHECK_OPTERON_SSE3_M64: #define __3dNOW_A__ 1
-// CHECK_OPTERON_SSE3_M64: #define __3dNOW__ 1
 // CHECK_OPTERON_SSE3_M64: #define __MMX__ 1
 // CHECK_OPTERON_SSE3_M64: #define __SSE2_MATH__ 1
 // CHECK_OPTERON_SSE3_M64: #define __SSE2__ 1
@@ -3017,8 +2985,6 @@
 // RUN: %clang -march=athlon64 -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON64_M32
-// CHECK_ATHLON64_M32: #define __3dNOW_A__ 1
-// CHECK_ATHLON64_M32: #define __3dNOW__ 1
 // CHECK_ATHLON64_M32: #define __MMX__ 1
 // CHECK_ATHLON64_M32: #define __SSE2__ 1
 // CHECK_ATHLON64_M32: #define __SSE__ 1
@@ -3032,8 +2998,6 @@
 // RUN: %clang -march=athlon64 -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON64_M64
-// CHECK_ATHLON64_M64: #define __3dNOW_A__ 1
-// CHECK_ATHLON64_M64: #define __3dNOW__ 1
 // CHECK_ATHLON64_M64: #define __MMX__ 1
 // CHECK_ATHLON64_M64: #define __SSE2_MATH__ 1
 // CHECK_ATHLON64_M64: #define __SSE2__ 1
@@ -3050,8 +3014,6 @@
 // RUN: %clang -march=athlon64-sse3 -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON64_SSE3_M32
-// CHECK_ATHLON64_SSE3_M32: #define __3dNOW_A__ 1
-// CHECK_ATHLON64_SSE3_M32: #define __3dNOW__ 1
 // CHECK_ATHLON64_SSE3_M32: #define __MMX__ 1
 // CHECK_ATHLON64_SSE3_M32: #define __SSE2__ 1
 // CHECK_ATHLON64_SSE3_M32: #define __SSE3__ 1
@@ -3066,8 +3028,6 @@
 // RUN: %clang -march=athlon64-sse3 -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON64_SSE3_M64
-// CHECK_ATHLON64_SSE3_M64: #define __3dNOW_A__ 1
-// CHECK_ATHLON64_SSE3_M64: #define __3dNOW__ 1
 // CHECK_ATHLON64_SSE3_M64: #define __MMX__ 1
 // CHECK_ATHLON64_SSE3_M64: #define __SSE2_MATH__ 1
 // CHECK_ATHLON64_SSE3_M64: #define __SSE2__ 1
@@ -3085,8 +3045,6 @@
 // RUN: %clang -march=athlon-fx -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_FX_M32
-// CHECK_ATHLON_FX_M32: #define __3dNOW_A__ 1
-// CHECK_ATHLON_FX_M32: #define __3dNOW__ 1
 // CHECK_ATHLON_FX_M32: #define __MMX__ 1
 // CHECK_ATHLON_FX_M32: #define __SSE2__ 1
 // CHECK_ATHLON_FX_M32: #define __SSE__ 1
@@ -3100,8 +3058,6 @@
 // RUN: %clang -march=athlon-fx -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_FX_M64
-// CHECK_ATHLON_FX_M64: #define __3dNOW_A__ 1
-// CHECK_ATHLON_FX_M64: #define __3dNOW__ 1
 // CHECK_ATHLON_FX_M64: #define __MMX__ 1
 // CHECK_ATHLON_FX_M64: #define __SSE2_MATH__ 1
 // CHECK_ATHLON_FX_M64: #define __SSE2__ 1
@@ -3118,8 +3074,6 @@
 // RUN: %clang -march=amdfam10 -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_AMDFAM10_M32
-// CHECK_AMDFAM10_M32: #define __3dNOW_A__ 1
-// CHECK_AMDFAM10_M32: #define __3dNOW__ 1
 // CHECK_AMDFAM10_M32: #define __LAHF_SAHF__ 1
 // CHECK_AMDFAM10_M32: #define __LZCNT__ 1
 // CHECK_AMDFAM10_M32: #define __MMX__ 1
@@ -3141,8 +3095,6 @@
 // RUN: %clang -march=amdfam10 -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_AMDFAM10_M64
-// CHECK_AMDFAM10_M64: #define __3dNOW_A__ 1
-// CHECK_AMDFAM10_M64: #define __3dNOW__ 1
 // CHECK_AMDFAM10_M64: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 1
 // CHECK_AMDFAM10_M64: #define __LAHF_SAHF__ 1
 // CHECK_AMDFAM10_M64: #define __LZCNT__ 1
@@ -3167,8 +3119,6 @@
 // RUN: %clang -march=btver1 -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_BTVER1_M32
-// CHECK_BTVER1_M32-NOT: #define __3dNOW_A__ 1
-// CHECK_BTVER1_M32-NOT: #define __3dNOW__ 1
 // CHECK_BTVER1_M32: #define __LAHF_SAHF__ 1
 // CHECK_BTVER1_M32: #define __LZCNT__ 1
 // CHECK_BTVER1_M32: #define __MMX__ 1
@@ -3190,8 +3140,6 @@
 // RUN: %clang -march=btver1 -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_BTVER1_M64
-// CHECK_BTVER1_M64-NOT: #define __3dNOW_A__ 1
-// CHECK_BTVER1_M64-NOT: #define __3dNOW__ 1
 // CHECK_BTVER1_M64: #define __LAHF_SAHF__ 1
 // CHECK_BTVER1_M64: #define __LZCNT__ 1
 // CHECK_BTVER1_M64: #define __MMX__ 1
@@ -3215,8 +3163,6 @@
 // RUN: %clang -march=btver2 -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_BTVER2_M32
-// CHECK_BTVER2_M32-NOT: #define __3dNOW_A__ 1
-// CHECK_BTVER2_M32-NOT: #define __3dNOW__ 1
 // CHECK_BTVER2_M32: #define __AES__ 1
 // CHECK_BTVER2_M32: #define __AVX__ 1
 // CHECK_BTVER2_M32: #define __BMI__ 1
@@ -3245,8 +3191,6 @@
 // RUN: %clang -march=btver2 -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_BTVER2_M64
-// CHECK_BTVER2_M64-NOT: #define __3dNOW_A__ 1
-// CHECK_BTVER2_M64-NOT: #define __3dNOW__ 1
 // CHECK_BTVER2_M64: #define __AES__ 1
 // CHECK_BTVER2_M64: #define __AVX__ 1
 // CHECK_BTVER2_M64: #define __BMI__ 1
@@ -3277,8 +3221,6 @@
 // RUN: %clang -march=bdver1 -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER1_M32
-// CHECK_BDVER1_M32-NOT: #define __3dNOW_A__ 1
-// CHECK_BDVER1_M32-NOT: #define __3dNOW__ 1
 // CHECK_BDVER1_M32: #define __AES__ 1
 // CHECK_BDVER1_M32: #define __AVX__ 1
 // CHECK_BDVER1_M32: #define __FMA4__ 1
@@ -3308,8 +3250,6 @@
 // RUN: %clang -march=bdver1 -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER1_M64
-// CHECK_BDVER1_M64-NOT: #define __3dNOW_A__ 1
-// CHECK_BDVER1_M64-NOT: #define __3dNOW__ 1
 // CHECK_BDVER1_M64: #define __AES__ 1
 // CHECK_BDVER1_M64: #define __AVX__ 1
 // CHECK_BDVER1_M64: #define __FMA4__ 1
@@ -3341,8 +3281,6 @@
 // RUN: %clang -march=bdver2 -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER2_M32
-// CHECK_BDVER2_M32-NOT: #define __3dNOW_A__ 1
-// CHECK_BDVER2_M32-NOT: #define __3dNOW__ 1
 // CHECK_BDVER2_M32: #define __AES__ 1
 // CHECK_BDVER2_M32: #define __AVX__ 1
 // CHECK_BDVER2_M32: #define __BMI__ 1
@@ -3376,8 +3314,6 @@
 // RUN: %clang -march=bdver2 -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER2_M64
-// CHECK_BDVER2_M64-NOT: #define __3dNOW_A__ 1
-// CHECK_BDVER2_M64-NOT: #define __3dNOW__ 1
 // CHECK_BDVER2_M64: #define __AES__ 1
 // CHECK_BDVER2_M64: #define __AVX__ 1
 // CHECK_BDVER2_M64: #define __BMI__ 1
@@ -3413,8 +3349,6 @@
 // RUN: %clang -march=bdver3 -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER3_M32
-// CHECK_BDVER3_M32-NOT: #define __3dNOW_A__ 1
-// CHECK_BDVER3_M32-NOT: #define __3dNOW__ 1
 // CHECK_BDVER3_M32: #define __AES__ 1
 // CHECK_BDVER3_M32: #define __AVX__ 1
 // CHECK_BDVER3_M32: #define __BMI__ 1
@@ -3450,8 +3384,6 @@
 // RUN: %clang -march=bdver3 -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER3_M64
-// CHECK_BDVER3_M64-NOT: #define __3dNOW_A__ 1
-// CHECK_BDVER3_M64-NOT: #define __3dNOW__ 1
 // CHECK_BDVER3_M64: #define __AES__ 1
 // CHECK_BDVER3_M64: #define __AVX__ 1
 // CHECK_BDVER3_M64: #define __BMI__ 1
@@ -3489,8 +3421,6 @@
 // RUN: %clang -march=bdver4 -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER4_M32
-// CHECK_BDVER4_M32-NOT: #define __3dNOW_A__ 1
-// CHECK_BDVER4_M32-NOT: #define __3dNOW__ 1
 // CHECK_BDVER4_M32: #define __AES__ 1
 // CHECK_BDVER4_M32: #define __AVX2__ 1
 // CHECK_BDVER4_M32: #define __AVX__ 1
@@ -3529,8 +3459,6 @@
 // RUN: %clang -march=bdver4 -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER4_M64
-// CHECK_BDVER4_M64-NOT: #define __3dNOW_A__ 1
-// CHECK_BDVER4_M64-NOT: #define __3dNOW__ 1
 // CHECK_BDVER4_M64: #define __AES__ 1
 // CHECK_BDVER4_M64: #define __AVX2__ 1
 // CHECK_BDVER4_M64: #define __AVX__ 1
@@ -3571,8 +3499,6 @@
 // RUN: %clang -march=znver1 -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER1_M32
-// CHECK_ZNVER1_M32-NOT: #define __3dNOW_A__ 1
-// CHECK_ZNVER1_M32-NOT: #define __3dNOW__ 1
 // CHECK_ZNVER1_M32: #define __ADX__ 1
 // CHECK_ZNVER1_M32: #define __AES__ 1
 // CHECK_ZNVER1_M32: #define __AVX2__ 1
@@ -3618,8 +3544,6 @@
 // RUN: %clang -march=znver1 -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER1_M64
-// CHECK_ZNVER1_M64-NOT: #define __3dNOW_A__ 1
-// CHECK_ZNVER1_M64-NOT: #define __3dNOW__ 1
 // CHECK_ZNVER1_M64: #define __ADX__ 1
 // CHECK_ZNVER1_M64: #define __AES__ 1
 // CHECK_ZNVER1_M64: #define __AVX2__ 1
@@ -3668,8 +3592,6 @@
 // RUN: %clang -march=znver2 -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER2_M32
-// CHECK_ZNVER2_M32-NOT: #define __3dNOW_A__ 1
-// CHECK_ZNVER2_M32-NOT: #define __3dNOW__ 1
 // CHECK_ZNVER2_M32: #define __ADX__ 1
 // CHECK_ZNVER2_M32: #define __AES__ 1
 // CHECK_ZNVER2_M32: #define __AVX2__ 1
@@ -3719,8 +3641,6 @@
 // RUN: %clang -march=znver2 -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER2_M64
-// CHECK_ZNVER2_M64-NOT: #define __3dNOW_A__ 1
-// CHECK_ZNVER2_M64-NOT: #define __3dNOW__ 1
 // CHECK_ZNVER2_M64: #define __ADX__ 1
 // CHECK_ZNVER2_M64: #define __AES__ 1
 // CHECK_ZNVER2_M64: #define __AVX2__ 1
@@ -3772,8 +3692,6 @@
 // RUN: %clang -march=znver3 -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER3_M32
-// CHECK_ZNVER3_M32-NOT: #define __3dNOW_A__ 1
-// CHECK_ZNVER3_M32-NOT: #define __3dNOW__ 1
 // CHECK_ZNVER3_M32: #define __ADX__ 1
 // CHECK_ZNVER3_M32: #define __AES__ 1
 // CHECK_ZNVER3_M32: #define __AVX2__ 1
@@ -3823,8 +3741,6 @@
 // RUN: %clang -march=znver3 -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER3_M64
-// CHECK_ZNVER3_M64-NOT: #define __3dNOW_A__ 1
-// CHECK_ZNVER3_M64-NOT: #define __3dNOW__ 1
 // CHECK_ZNVER3_M64: #define __ADX__ 1
 // CHECK_ZNVER3_M64: #define __AES__ 1
 // CHECK_ZNVER3_M64: #define __AVX2__ 1
@@ -3878,8 +3794,6 @@
 // RUN: %clang -march=znver4 -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER4_M32
-// CHECK_ZNVER4_M32-NOT: #define __3dNOW_A__ 1
-// CHECK_ZNVER4_M32-NOT: #define __3dNOW__ 1
 // CHECK_ZNVER4_M32: #define __ADX__ 1
 // CHECK_ZNVER4_M32: #define __AES__ 1
 // CHECK_ZNVER4_M32: #define __AVX2__ 1
@@ -3944,8 +3858,6 @@
 // RUN: %clang -march=znver4 -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER4_M64
-// CHECK_ZNVER4_M64-NOT: #define __3dNOW_A__ 1
-// CHECK_ZNVER4_M64-NOT: #define __3dNOW__ 1
 // CHECK_ZNVER4_M64: #define __ADX__ 1
 // CHECK_ZNVER4_M64: #define __AES__ 1
 // CHECK_ZNVER4_M64: #define __AVX2__ 1

diff  --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c
index 6e9c968273a46..5c0b815c8ae6f 100644
--- a/clang/test/Preprocessor/x86_target_features.c
+++ b/clang/test/Preprocessor/x86_target_features.c
@@ -348,12 +348,12 @@
 
 // RUN: %clang -target i386-unknown-unknown -march=atom -m3dnow -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=3DNOWPRFCHW %s
 
-// 3DNOWPRFCHW: #define __3dNOW__ 1
+// 3DNOWPRFCHW-NOT: #define __3dNOW__ 1
 // 3DNOWPRFCHW-NOT: #define __PRFCHW__ 1
 
 // RUN: %clang -target i386-unknown-unknown -march=atom -mno-prfchw -m3dnow -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=3DNOWNOPRFCHW %s
 
-// 3DNOWNOPRFCHW: #define __3dNOW__ 1
+// 3DNOWNOPRFCHW-NOT: #define __3dNOW__ 1
 // 3DNOWNOPRFCHW-NOT: #define __PRFCHW__ 1
 
 // RUN: %clang -target i386-unknown-unknown -march=atom -mprfchw -mno-3dnow -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NO3DNOWPRFCHW %s

diff  --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index e2d9e2cd4e51e..311ae0ea255ef 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -218,6 +218,9 @@ Changes to the X86 Backend
 - Removed knl/knm specific ISA intrinsics: AVX512PF, AVX512ER, PREFETCHWT1,
   while assembly encoding/decoding supports are kept.
 
+- Removed ``3DNow!``-specific ISA intrinsics and codegen support. The ``3dnow`` and ``3dnowa`` target features are no longer supported. The intrinsics ``llvm.x86.3dnow.*``, ``llvm.x86.3dnowa.*``, and ``llvm.x86.mmx.femms`` have been removed. Assembly encoding/decoding for the corresponding instructions remains supported.
+
+
 Changes to the OCaml bindings
 -----------------------------
 

diff  --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index aee804047e1b0..adc46f9789ebb 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -129,83 +129,6 @@ let TargetPrefix = "x86" in {
               Intrinsic<[], [llvm_ptr_ty], []>;
 }
 
-//===----------------------------------------------------------------------===//
-// 3DNow!
-
-let TargetPrefix = "x86" in {
-  def int_x86_3dnow_pavgusb : ClangBuiltin<"__builtin_ia32_pavgusb">,
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                            [IntrNoMem]>;
-  def int_x86_3dnow_pf2id : ClangBuiltin<"__builtin_ia32_pf2id">,
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_3dnow_pfacc : ClangBuiltin<"__builtin_ia32_pfacc">,
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                            [IntrNoMem]>;
-  def int_x86_3dnow_pfadd : ClangBuiltin<"__builtin_ia32_pfadd">,
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                            [IntrNoMem]>;
-  def int_x86_3dnow_pfcmpeq : ClangBuiltin<"__builtin_ia32_pfcmpeq">,
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                            [IntrNoMem]>;
-  def int_x86_3dnow_pfcmpge : ClangBuiltin<"__builtin_ia32_pfcmpge">,
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                            [IntrNoMem]>;
-  def int_x86_3dnow_pfcmpgt : ClangBuiltin<"__builtin_ia32_pfcmpgt">,
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                            [IntrNoMem]>;
-  def int_x86_3dnow_pfmax : ClangBuiltin<"__builtin_ia32_pfmax">,
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                            [IntrNoMem]>;
-  def int_x86_3dnow_pfmin : ClangBuiltin<"__builtin_ia32_pfmin">,
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                            [IntrNoMem]>;
-  def int_x86_3dnow_pfmul : ClangBuiltin<"__builtin_ia32_pfmul">,
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                            [IntrNoMem]>;
-  def int_x86_3dnow_pfrcp : ClangBuiltin<"__builtin_ia32_pfrcp">,
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_3dnow_pfrcpit1 : ClangBuiltin<"__builtin_ia32_pfrcpit1">,
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                            [IntrNoMem]>;
-  def int_x86_3dnow_pfrcpit2 : ClangBuiltin<"__builtin_ia32_pfrcpit2">,
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                            [IntrNoMem]>;
-  def int_x86_3dnow_pfrsqrt : ClangBuiltin<"__builtin_ia32_pfrsqrt">,
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_3dnow_pfrsqit1 : ClangBuiltin<"__builtin_ia32_pfrsqit1">,
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                            [IntrNoMem]>;
-  def int_x86_3dnow_pfsub : ClangBuiltin<"__builtin_ia32_pfsub">,
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                            [IntrNoMem]>;
-  def int_x86_3dnow_pfsubr : ClangBuiltin<"__builtin_ia32_pfsubr">,
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                            [IntrNoMem]>;
-  def int_x86_3dnow_pi2fd : ClangBuiltin<"__builtin_ia32_pi2fd">,
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_3dnow_pmulhrw : ClangBuiltin<"__builtin_ia32_pmulhrw">,
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                            [IntrNoMem]>;
-}
-
-//===----------------------------------------------------------------------===//
-// 3DNow! extensions
-
-let TargetPrefix = "x86" in {
-  def int_x86_3dnowa_pf2iw : ClangBuiltin<"__builtin_ia32_pf2iw">,
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_3dnowa_pfnacc : ClangBuiltin<"__builtin_ia32_pfnacc">,
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                            [IntrNoMem]>;
-  def int_x86_3dnowa_pfpnacc : ClangBuiltin<"__builtin_ia32_pfpnacc">,
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                            [IntrNoMem]>;
-  def int_x86_3dnowa_pi2fw : ClangBuiltin<"__builtin_ia32_pi2fw">,
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_3dnowa_pswapd :
-      DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
-}
-
 //===----------------------------------------------------------------------===//
 // SSE1
 
@@ -2332,8 +2255,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_mmx_emms  : ClangBuiltin<"__builtin_ia32_emms">,
               Intrinsic<[], [], []>;
-  def int_x86_mmx_femms : ClangBuiltin<"__builtin_ia32_femms">,
-              Intrinsic<[], [], []>;
 }
 
 // Integer arithmetic ops.

diff  --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 8cb003b838d06..9dafd5e628ca8 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -86,14 +86,8 @@ def FeatureSSE42   : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
 // The MMX subtarget feature is separate from the rest of the SSE features
 // because it's important (for odd compatibility reasons) to be able to
 // turn it off explicitly while allowing SSE+ to be on.
-def FeatureMMX     : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
+def FeatureMMX     : SubtargetFeature<"mmx","HasMMX", "true",
                                       "Enable MMX instructions">;
-def Feature3DNow   : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
-                                      "Enable 3DNow! instructions",
-                                      [FeatureMMX]>;
-def Feature3DNowA  : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
-                                      "Enable 3DNow! Athlon instructions",
-                                      [Feature3DNow]>;
 // All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
 // feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
 // without disabling 64-bit mode. Nothing should imply this feature bit. It
@@ -1341,7 +1335,6 @@ def ProcessorFeatures {
   list<SubtargetFeature> BarcelonaFeatures = [FeatureX87,
                                               FeatureCX8,
                                               FeatureSSE4A,
-                                              Feature3DNowA,
                                               FeatureFXSR,
                                               FeatureNOPL,
                                               FeatureCX16,
@@ -1834,32 +1827,32 @@ def : ProcModel<P, SapphireRapidsModel,
 
 def : Proc<"k6",   [FeatureX87, FeatureCX8, FeatureMMX],
                    [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
-def : Proc<"k6-2", [FeatureX87, FeatureCX8, Feature3DNow],
+def : Proc<"k6-2", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
                    [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
-def : Proc<"k6-3", [FeatureX87, FeatureCX8, Feature3DNow],
+def : Proc<"k6-3", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
                    [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
 
 foreach P = ["athlon", "athlon-tbird"] in {
-  def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, Feature3DNowA,
+  def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeaturePRFCHW,
                  FeatureNOPL],
                 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
 }
 
 foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
   def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV,
-                 FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL],
+                 FeatureSSE1, FeatureMMX, FeaturePRFCHW, FeatureFXSR, FeatureNOPL],
                 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
 }
 
 foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
-  def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE2, Feature3DNowA,
+  def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE2, FeatureMMX, FeaturePRFCHW,
                  FeatureFXSR, FeatureNOPL, FeatureX86_64, FeatureCMOV],
                 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
                  TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
 }
 
 foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
-  def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE3, Feature3DNowA,
+  def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE3, FeatureMMX, FeaturePRFCHW,
                  FeatureFXSR, FeatureNOPL, FeatureCX16, FeatureCMOV,
                  FeatureX86_64],
                 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
@@ -1900,14 +1893,14 @@ def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
 def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features,
            ProcessorFeatures.ZN4Tuning>;
 
-def : Proc<"geode",           [FeatureX87, FeatureCX8, Feature3DNowA],
+def : Proc<"geode",           [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
 
 def : Proc<"winchip-c6",      [FeatureX87, FeatureMMX],
                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
-def : Proc<"winchip2",        [FeatureX87, Feature3DNow],
+def : Proc<"winchip2",        [FeatureX87, FeatureMMX, FeaturePRFCHW],
                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
-def : Proc<"c3",              [FeatureX87, Feature3DNow],
+def : Proc<"c3",              [FeatureX87, FeatureMMX, FeaturePRFCHW],
                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
 def : Proc<"c3-2",            [FeatureX87, FeatureCX8, FeatureMMX,
                                FeatureSSE1, FeatureFXSR, FeatureCMOV],

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 34f7c005efef0..9d651d4db6731 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -530,7 +530,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::SRL_PARTS, VT, Custom);
   }
 
-  if (Subtarget.hasSSEPrefetch() || Subtarget.hasThreeDNow())
+  if (Subtarget.hasSSEPrefetch())
     setOperationAction(ISD::PREFETCH      , MVT::Other, Custom);
 
   setOperationAction(ISD::ATOMIC_FENCE  , MVT::Other, Custom);

diff  --git a/llvm/lib/Target/X86/X86Instr3DNow.td b/llvm/lib/Target/X86/X86Instr3DNow.td
index 03612de0fad94..13fe7d2ccbe77 100644
--- a/llvm/lib/Target/X86/X86Instr3DNow.td
+++ b/llvm/lib/Target/X86/X86Instr3DNow.td
@@ -12,7 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pat>
-      : I<o, F, outs, ins, asm, pat>, Requires<[Has3DNow]> {
+      : I<o, F, outs, ins, asm, pat> {
 }
 
 class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
@@ -25,66 +25,60 @@ class I3DNow_conv<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
       : I3DNow<o, F, (outs VR64:$dst), ins,
           !strconcat(Mnemonic, "\t{$src, $dst|$dst, $src}"), pat>, ThreeDNow;
 
-multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn,
-                               X86FoldableSchedWrite sched, bit Commutable = 0,
-                               string Ver = ""> {
-  let isCommutable = Commutable in
-  def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn,
-    [(set VR64:$dst, (!cast<Intrinsic>(
-      !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>,
-      Sched<[sched]>;
-  def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn,
-    [(set VR64:$dst, (!cast<Intrinsic>(
-      !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1,
-        (bitconvert (load_mmx addr:$src2))))]>,
-        Sched<[sched.Folded, sched.ReadAfterFold]>;
+multiclass I3DNow_binop_rm<bits<8> opc, string Mn,
+                           X86FoldableSchedWrite sched, bit Commutable = 0> {
+  let mayStore=0, hasSideEffects=0 in {
+    let isCommutable = Commutable, mayLoad=0 in
+    def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn,
+      []>, Sched<[sched]>;
+    let mayLoad=1 in
+    def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn,
+      []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
+  }
 }
 
-multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn,
-                              X86FoldableSchedWrite sched, string Ver = ""> {
-  def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src), Mn,
-    [(set VR64:$dst, (!cast<Intrinsic>(
-      !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src))]>,
-      Sched<[sched]>;
-  def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src), Mn,
-    [(set VR64:$dst, (!cast<Intrinsic>(
-      !strconcat("int_x86_3dnow", Ver, "_", Mn))
-        (bitconvert (load_mmx addr:$src))))]>,
-        Sched<[sched.Folded, sched.ReadAfterFold]>;
+multiclass I3DNow_conv_rm<bits<8> opc, string Mn,
+                              X86FoldableSchedWrite sched> {
+  let mayStore=0, hasSideEffects=0 in {
+    let mayLoad=0 in
+    def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src), Mn,
+      []>, Sched<[sched]>;
+    let mayLoad=1 in
+    def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src), Mn,
+      []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
+  }
 }
 
-defm PAVGUSB  : I3DNow_binop_rm_int<0xBF, "pavgusb", SchedWriteVecALU.MMX, 1>;
-defm PF2ID    : I3DNow_conv_rm_int<0x1D, "pf2id", WriteCvtPS2I>;
-defm PFACC    : I3DNow_binop_rm_int<0xAE, "pfacc", WriteFAdd>;
-defm PFADD    : I3DNow_binop_rm_int<0x9E, "pfadd", WriteFAdd, 1>;
-defm PFCMPEQ  : I3DNow_binop_rm_int<0xB0, "pfcmpeq", WriteFAdd, 1>;
-defm PFCMPGE  : I3DNow_binop_rm_int<0x90, "pfcmpge", WriteFAdd>;
-defm PFCMPGT  : I3DNow_binop_rm_int<0xA0, "pfcmpgt", WriteFAdd>;
-defm PFMAX    : I3DNow_binop_rm_int<0xA4, "pfmax", WriteFAdd>;
-defm PFMIN    : I3DNow_binop_rm_int<0x94, "pfmin", WriteFAdd>;
-defm PFMUL    : I3DNow_binop_rm_int<0xB4, "pfmul", WriteFAdd, 1>;
-defm PFRCP    : I3DNow_conv_rm_int<0x96, "pfrcp", WriteFAdd>;
-defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1", WriteFAdd>;
-defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2", WriteFAdd>;
-defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1", WriteFAdd>;
-defm PFRSQRT  : I3DNow_conv_rm_int<0x97, "pfrsqrt", WriteFAdd>;
-defm PFSUB    : I3DNow_binop_rm_int<0x9A, "pfsub", WriteFAdd, 1>;
-defm PFSUBR   : I3DNow_binop_rm_int<0xAA, "pfsubr", WriteFAdd, 1>;
-defm PI2FD    : I3DNow_conv_rm_int<0x0D, "pi2fd", WriteCvtI2PS>;
-defm PMULHRW  : I3DNow_binop_rm_int<0xB7, "pmulhrw", SchedWriteVecIMul.MMX, 1>;
+defm PAVGUSB  : I3DNow_binop_rm<0xBF, "pavgusb", SchedWriteVecALU.MMX, 1>;
+defm PF2ID    : I3DNow_conv_rm<0x1D, "pf2id", WriteCvtPS2I>;
+defm PFACC    : I3DNow_binop_rm<0xAE, "pfacc", WriteFAdd>;
+defm PFADD    : I3DNow_binop_rm<0x9E, "pfadd", WriteFAdd, 1>;
+defm PFCMPEQ  : I3DNow_binop_rm<0xB0, "pfcmpeq", WriteFAdd, 1>;
+defm PFCMPGE  : I3DNow_binop_rm<0x90, "pfcmpge", WriteFAdd>;
+defm PFCMPGT  : I3DNow_binop_rm<0xA0, "pfcmpgt", WriteFAdd>;
+defm PFMAX    : I3DNow_binop_rm<0xA4, "pfmax", WriteFAdd>;
+defm PFMIN    : I3DNow_binop_rm<0x94, "pfmin", WriteFAdd>;
+defm PFMUL    : I3DNow_binop_rm<0xB4, "pfmul", WriteFAdd, 1>;
+defm PFRCP    : I3DNow_conv_rm<0x96, "pfrcp", WriteFAdd>;
+defm PFRCPIT1 : I3DNow_binop_rm<0xA6, "pfrcpit1", WriteFAdd>;
+defm PFRCPIT2 : I3DNow_binop_rm<0xB6, "pfrcpit2", WriteFAdd>;
+defm PFRSQIT1 : I3DNow_binop_rm<0xA7, "pfrsqit1", WriteFAdd>;
+defm PFRSQRT  : I3DNow_conv_rm<0x97, "pfrsqrt", WriteFAdd>;
+defm PFSUB    : I3DNow_binop_rm<0x9A, "pfsub", WriteFAdd, 1>;
+defm PFSUBR   : I3DNow_binop_rm<0xAA, "pfsubr", WriteFAdd, 1>;
+defm PI2FD    : I3DNow_conv_rm<0x0D, "pi2fd", WriteCvtI2PS>;
+defm PMULHRW  : I3DNow_binop_rm<0xB7, "pmulhrw", SchedWriteVecIMul.MMX, 1>;
 
-let SchedRW = [WriteEMMS],
-    Defs = [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
-            ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7] in
+let SchedRW = [WriteEMMS], mayLoad=1, mayStore=1, hasSideEffects=1 in
 def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
-                   [(int_x86_mmx_femms)]>, TB;
+                   []>, TB;
 
-let SchedRW = [WriteLoad] in {
-let Predicates = [Has3DNow, NoSSEPrefetch] in
+let SchedRW = [WriteLoad], mayLoad=1, mayStore=1, hasSideEffects=0 in {
 def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr),
                       "prefetch\t$addr",
-                      [(prefetch addr:$addr, timm, timm, (i32 1))]>, TB;
+                      []>, TB;
 
+// Note: PREFETCHW is the only instruction in this file which is NOT specific to 3DNow!
 def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr",
                   [(prefetch addr:$addr, (i32 1), (i32 PrefetchWLevel), (i32 1))]>,
                   TB, Requires<[HasPrefetchW]>;
@@ -94,8 +88,8 @@ def PREFETCHWT1 : I<0x0D, MRM2m, (outs), (ins i8mem:$addr), "prefetchwt1\t$addr"
 }
 
 // "3DNowA" instructions
-defm PF2IW    : I3DNow_conv_rm_int<0x1C, "pf2iw", WriteCvtPS2I, "a">;
-defm PI2FW    : I3DNow_conv_rm_int<0x0C, "pi2fw", WriteCvtI2PS, "a">;
-defm PFNACC   : I3DNow_binop_rm_int<0x8A, "pfnacc", WriteFAdd, 0, "a">;
-defm PFPNACC  : I3DNow_binop_rm_int<0x8E, "pfpnacc", WriteFAdd, 0, "a">;
-defm PSWAPD   : I3DNow_conv_rm_int<0xBB, "pswapd", SchedWriteShuffle.MMX, "a">;
+defm PF2IW    : I3DNow_conv_rm<0x1C, "pf2iw", WriteCvtPS2I>;
+defm PI2FW    : I3DNow_conv_rm<0x0C, "pi2fw", WriteCvtI2PS>;
+defm PFNACC   : I3DNow_binop_rm<0x8A, "pfnacc", WriteFAdd, 0>;
+defm PFPNACC  : I3DNow_binop_rm<0x8E, "pfpnacc", WriteFAdd, 0>;
+defm PSWAPD   : I3DNow_conv_rm<0xBB, "pswapd", SchedWriteShuffle.MMX>;

diff  --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td
index 419ff9e6f5c0f..f6038cf7a94cb 100644
--- a/llvm/lib/Target/X86/X86InstrPredicates.td
+++ b/llvm/lib/Target/X86/X86InstrPredicates.td
@@ -50,8 +50,6 @@ def HasCMOV      : Predicate<"Subtarget->canUseCMOV()">;
 def NoCMOV       : Predicate<"!Subtarget->canUseCMOV()">;
 def HasNOPL      : Predicate<"Subtarget->hasNOPL()">;
 def HasMMX       : Predicate<"Subtarget->hasMMX()">;
-def Has3DNow     : Predicate<"Subtarget->hasThreeDNow()">;
-def Has3DNowA    : Predicate<"Subtarget->hasThreeDNowA()">;
 def HasSSE1      : Predicate<"Subtarget->hasSSE1()">;
 def UseSSE1      : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">;
 def HasSSE2      : Predicate<"Subtarget->hasSSE2()">;
@@ -141,7 +139,6 @@ def HasSGX       : Predicate<"Subtarget->hasSGX()">;
 def HasSM3       : Predicate<"Subtarget->hasSM3()">;
 def HasRDSEED    : Predicate<"Subtarget->hasRDSEED()">;
 def HasSSEPrefetch : Predicate<"Subtarget->hasSSEPrefetch()">;
-def NoSSEPrefetch : Predicate<"!Subtarget->hasSSEPrefetch()">;
 def HasPRFCHW    : Predicate<"Subtarget->hasPRFCHW()">;
 def HasPREFETCHI : Predicate<"Subtarget->hasPREFETCHI()">;
 def HasPrefetchW : Predicate<"Subtarget->hasPrefetchW()">;

diff  --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
index 6c9a94c949590..4e8e04b1112c0 100644
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -290,8 +290,7 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU,
     IsUnalignedMem16Slow = false;
 
   LLVM_DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
-                    << ", 3DNowLevel " << X863DNowLevel << ", 64bit "
-                    << HasX86_64 << "\n");
+                    << ", MMX " << HasMMX << ", 64bit " << HasX86_64 << "\n");
   if (Is64Bit && !HasX86_64)
     report_fatal_error("64-bit code requested on a subtarget that doesn't "
                        "support it!");

diff  --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 4532db134fcb4..e3cb9ee8ce190 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -55,10 +55,6 @@ class X86Subtarget final : public X86GenSubtargetInfo {
     NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512
   };
 
-  enum X863DNowEnum {
-    NoThreeDNow, MMX, ThreeDNow, ThreeDNowA
-  };
-
   /// Which PIC style to use
   PICStyles::Style PICStyle;
 
@@ -67,9 +63,6 @@ class X86Subtarget final : public X86GenSubtargetInfo {
   /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
   X86SSEEnum X86SSELevel = NoSSE;
 
-  /// MMX, 3DNow, 3DNow Athlon, or none supported.
-  X863DNowEnum X863DNowLevel = NoThreeDNow;
-
 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
   bool ATTRIBUTE = DEFAULT;
 #include "X86GenSubtargetInfo.inc"
@@ -207,21 +200,16 @@ class X86Subtarget final : public X86GenSubtargetInfo {
   bool hasAVX2() const { return X86SSELevel >= AVX2; }
   bool hasAVX512() const { return X86SSELevel >= AVX512; }
   bool hasInt256() const { return hasAVX2(); }
-  bool hasMMX() const { return X863DNowLevel >= MMX; }
-  bool hasThreeDNow() const { return X863DNowLevel >= ThreeDNow; }
-  bool hasThreeDNowA() const { return X863DNowLevel >= ThreeDNowA; }
   bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
   bool hasPrefetchW() const {
     // The PREFETCHW instruction was added with 3DNow but later CPUs gave it
-    // its own CPUID bit as part of deprecating 3DNow. We assume the
-    // L1 version exists if the L2 version does.
-    return hasThreeDNow() || hasPRFCHW();
+    // its own CPUID bit as part of deprecating 3DNow.
+    return hasPRFCHW();
   }
   bool hasSSEPrefetch() const {
-    // We implicitly enable these when we have a write prefix supporting cache
-    // level OR if we have prfchw, but don't already have a read prefetch from
-    // 3dnow.
-    return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHI();
+    // We also implicitly enable these when we have a write prefix supporting
+    // cache level OR if we have prfchw.
+    return hasSSE1() || hasPRFCHW() || hasPREFETCHI();
   }
   bool canUseLAHFSAHF() const { return hasLAHFSAHF64() || !is64Bit(); }
   // These are generic getters that OR together all of the thunk types

diff  --git a/llvm/test/CodeGen/X86/3dnow-intrinsics.ll b/llvm/test/CodeGen/X86/3dnow-intrinsics.ll
deleted file mode 100644
index a82f705b77d84..0000000000000
--- a/llvm/test/CodeGen/X86/3dnow-intrinsics.ll
+++ /dev/null
@@ -1,896 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefix=X86
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefix=X64
-
-define <8 x i8> @test_pavgusb(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
-; X86-LABEL: test_pavgusb:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    pavgusb %mm1, %mm0
-; X86-NEXT:    movq %mm0, (%eax)
-; X86-NEXT:    retl $4
-;
-; X64-LABEL: test_pavgusb:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    pavgusb %mm1, %mm0
-; X64-NEXT:    movq2dq %mm0, %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast x86_mmx %a.coerce to <8 x i8>
-  %1 = bitcast x86_mmx %b.coerce to <8 x i8>
-  %2 = bitcast <8 x i8> %0 to x86_mmx
-  %3 = bitcast <8 x i8> %1 to x86_mmx
-  %4 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %2, x86_mmx %3)
-  %5 = bitcast x86_mmx %4 to <8 x i8>
-  ret <8 x i8> %5
-}
-
-declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone
-
-define <2 x i32> @test_pf2id(<2 x float> %a) nounwind readnone {
-; X86-LABEL: test_pf2id:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movd 12(%ebp), %mm0
-; X86-NEXT:    movd 8(%ebp), %mm1
-; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
-; X86-NEXT:    pf2id %mm1, %mm0
-; X86-NEXT:    movq %mm0, (%esp)
-; X86-NEXT:    movl (%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl
-;
-; X64-LABEL: test_pf2id:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    movdq2q %xmm0, %mm0
-; X64-NEXT:    pf2id %mm0, %mm0
-; X64-NEXT:    movq2dq %mm0, %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <2 x float> %a to x86_mmx
-  %1 = tail call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %0)
-  %2 = bitcast x86_mmx %1 to <2 x i32>
-  ret <2 x i32> %2
-}
-
-declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone
-
-define <2 x float> @test_pfacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
-; X86-LABEL: test_pfacc:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movd 20(%ebp), %mm0
-; X86-NEXT:    movd 16(%ebp), %mm1
-; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
-; X86-NEXT:    movd 12(%ebp), %mm0
-; X86-NEXT:    movd 8(%ebp), %mm2
-; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
-; X86-NEXT:    pfacc %mm1, %mm2
-; X86-NEXT:    movq %mm2, (%esp)
-; X86-NEXT:    flds {{[0-9]+}}(%esp)
-; X86-NEXT:    flds (%esp)
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl
-;
-; X64-LABEL: test_pfacc:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    movdq2q %xmm1, %mm0
-; X64-NEXT:    movdq2q %xmm0, %mm1
-; X64-NEXT:    pfacc %mm0, %mm1
-; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <2 x float> %a to x86_mmx
-  %1 = bitcast <2 x float> %b to x86_mmx
-  %2 = tail call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %0, x86_mmx %1)
-  %3 = bitcast x86_mmx %2 to <2 x float>
-  ret <2 x float> %3
-}
-
-declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone
-
-define <2 x float> @test_pfadd(<2 x float> %a, <2 x float> %b) nounwind readnone {
-; X86-LABEL: test_pfadd:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movd 20(%ebp), %mm0
-; X86-NEXT:    movd 16(%ebp), %mm1
-; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
-; X86-NEXT:    movd 12(%ebp), %mm0
-; X86-NEXT:    movd 8(%ebp), %mm2
-; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
-; X86-NEXT:    pfadd %mm1, %mm2
-; X86-NEXT:    movq %mm2, (%esp)
-; X86-NEXT:    flds {{[0-9]+}}(%esp)
-; X86-NEXT:    flds (%esp)
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl
-;
-; X64-LABEL: test_pfadd:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    movdq2q %xmm1, %mm0
-; X64-NEXT:    movdq2q %xmm0, %mm1
-; X64-NEXT:    pfadd %mm0, %mm1
-; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <2 x float> %a to x86_mmx
-  %1 = bitcast <2 x float> %b to x86_mmx
-  %2 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %0, x86_mmx %1)
-  %3 = bitcast x86_mmx %2 to <2 x float>
-  ret <2 x float> %3
-}
-
-declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone
-
-define <2 x i32> @test_pfcmpeq(<2 x float> %a, <2 x float> %b) nounwind readnone {
-; X86-LABEL: test_pfcmpeq:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movd 20(%ebp), %mm0
-; X86-NEXT:    movd 16(%ebp), %mm1
-; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
-; X86-NEXT:    movd 12(%ebp), %mm0
-; X86-NEXT:    movd 8(%ebp), %mm2
-; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
-; X86-NEXT:    pfcmpeq %mm1, %mm2
-; X86-NEXT:    movq %mm2, (%esp)
-; X86-NEXT:    movl (%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl
-;
-; X64-LABEL: test_pfcmpeq:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    movdq2q %xmm1, %mm0
-; X64-NEXT:    movdq2q %xmm0, %mm1
-; X64-NEXT:    pfcmpeq %mm0, %mm1
-; X64-NEXT:    movq2dq %mm1, %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <2 x float> %a to x86_mmx
-  %1 = bitcast <2 x float> %b to x86_mmx
-  %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %0, x86_mmx %1)
-  %3 = bitcast x86_mmx %2 to <2 x i32>
-  ret <2 x i32> %3
-}
-
-declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone
-
-define <2 x i32> @test_pfcmpge(<2 x float> %a, <2 x float> %b) nounwind readnone {
-; X86-LABEL: test_pfcmpge:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movd 20(%ebp), %mm0
-; X86-NEXT:    movd 16(%ebp), %mm1
-; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
-; X86-NEXT:    movd 12(%ebp), %mm0
-; X86-NEXT:    movd 8(%ebp), %mm2
-; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
-; X86-NEXT:    pfcmpge %mm1, %mm2
-; X86-NEXT:    movq %mm2, (%esp)
-; X86-NEXT:    movl (%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl
-;
-; X64-LABEL: test_pfcmpge:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    movdq2q %xmm1, %mm0
-; X64-NEXT:    movdq2q %xmm0, %mm1
-; X64-NEXT:    pfcmpge %mm0, %mm1
-; X64-NEXT:    movq2dq %mm1, %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <2 x float> %a to x86_mmx
-  %1 = bitcast <2 x float> %b to x86_mmx
-  %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %0, x86_mmx %1)
-  %3 = bitcast x86_mmx %2 to <2 x i32>
-  ret <2 x i32> %3
-}
-
-declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone
-
-define <2 x i32> @test_pfcmpgt(<2 x float> %a, <2 x float> %b) nounwind readnone {
-; X86-LABEL: test_pfcmpgt:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movd 20(%ebp), %mm0
-; X86-NEXT:    movd 16(%ebp), %mm1
-; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
-; X86-NEXT:    movd 12(%ebp), %mm0
-; X86-NEXT:    movd 8(%ebp), %mm2
-; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
-; X86-NEXT:    pfcmpgt %mm1, %mm2
-; X86-NEXT:    movq %mm2, (%esp)
-; X86-NEXT:    movl (%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl
-;
-; X64-LABEL: test_pfcmpgt:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    movdq2q %xmm1, %mm0
-; X64-NEXT:    movdq2q %xmm0, %mm1
-; X64-NEXT:    pfcmpgt %mm0, %mm1
-; X64-NEXT:    movq2dq %mm1, %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <2 x float> %a to x86_mmx
-  %1 = bitcast <2 x float> %b to x86_mmx
-  %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %0, x86_mmx %1)
-  %3 = bitcast x86_mmx %2 to <2 x i32>
-  ret <2 x i32> %3
-}
-
-declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone
-
-define <2 x float> @test_pfmax(<2 x float> %a, <2 x float> %b) nounwind readnone {
-; X86-LABEL: test_pfmax:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movd 20(%ebp), %mm0
-; X86-NEXT:    movd 16(%ebp), %mm1
-; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
-; X86-NEXT:    movd 12(%ebp), %mm0
-; X86-NEXT:    movd 8(%ebp), %mm2
-; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
-; X86-NEXT:    pfmax %mm1, %mm2
-; X86-NEXT:    movq %mm2, (%esp)
-; X86-NEXT:    flds {{[0-9]+}}(%esp)
-; X86-NEXT:    flds (%esp)
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl
-;
-; X64-LABEL: test_pfmax:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    movdq2q %xmm1, %mm0
-; X64-NEXT:    movdq2q %xmm0, %mm1
-; X64-NEXT:    pfmax %mm0, %mm1
-; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <2 x float> %a to x86_mmx
-  %1 = bitcast <2 x float> %b to x86_mmx
-  %2 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %0, x86_mmx %1)
-  %3 = bitcast x86_mmx %2 to <2 x float>
-  ret <2 x float> %3
-}
-
-declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone
-
-define <2 x float> @test_pfmin(<2 x float> %a, <2 x float> %b) nounwind readnone {
-; X86-LABEL: test_pfmin:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movd 20(%ebp), %mm0
-; X86-NEXT:    movd 16(%ebp), %mm1
-; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
-; X86-NEXT:    movd 12(%ebp), %mm0
-; X86-NEXT:    movd 8(%ebp), %mm2
-; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
-; X86-NEXT:    pfmin %mm1, %mm2
-; X86-NEXT:    movq %mm2, (%esp)
-; X86-NEXT:    flds {{[0-9]+}}(%esp)
-; X86-NEXT:    flds (%esp)
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl
-;
-; X64-LABEL: test_pfmin:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    movdq2q %xmm1, %mm0
-; X64-NEXT:    movdq2q %xmm0, %mm1
-; X64-NEXT:    pfmin %mm0, %mm1
-; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <2 x float> %a to x86_mmx
-  %1 = bitcast <2 x float> %b to x86_mmx
-  %2 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %0, x86_mmx %1)
-  %3 = bitcast x86_mmx %2 to <2 x float>
-  ret <2 x float> %3
-}
-
-declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone
-
-define <2 x float> @test_pfmul(<2 x float> %a, <2 x float> %b) nounwind readnone {
-; X86-LABEL: test_pfmul:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movd 20(%ebp), %mm0
-; X86-NEXT:    movd 16(%ebp), %mm1
-; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
-; X86-NEXT:    movd 12(%ebp), %mm0
-; X86-NEXT:    movd 8(%ebp), %mm2
-; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
-; X86-NEXT:    pfmul %mm1, %mm2
-; X86-NEXT:    movq %mm2, (%esp)
-; X86-NEXT:    flds {{[0-9]+}}(%esp)
-; X86-NEXT:    flds (%esp)
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl
-;
-; X64-LABEL: test_pfmul:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    movdq2q %xmm1, %mm0
-; X64-NEXT:    movdq2q %xmm0, %mm1
-; X64-NEXT:    pfmul %mm0, %mm1
-; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <2 x float> %a to x86_mmx
-  %1 = bitcast <2 x float> %b to x86_mmx
-  %2 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %0, x86_mmx %1)
-  %3 = bitcast x86_mmx %2 to <2 x float>
-  ret <2 x float> %3
-}
-
-declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone
-
-define <2 x float> @test_pfrcp(<2 x float> %a) nounwind readnone {
-; X86-LABEL: test_pfrcp:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movd 12(%ebp), %mm0
-; X86-NEXT:    movd 8(%ebp), %mm1
-; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
-; X86-NEXT:    pfrcp %mm1, %mm0
-; X86-NEXT:    movq %mm0, (%esp)
-; X86-NEXT:    flds {{[0-9]+}}(%esp)
-; X86-NEXT:    flds (%esp)
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl
-;
-; X64-LABEL: test_pfrcp:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    movdq2q %xmm0, %mm0
-; X64-NEXT:    pfrcp %mm0, %mm0
-; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <2 x float> %a to x86_mmx
-  %1 = tail call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %0)
-  %2 = bitcast x86_mmx %1 to <2 x float>
-  ret <2 x float> %2
-}
-
-declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone
-
-define <2 x float> @test_pfrcpit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
-; X86-LABEL: test_pfrcpit1:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movd 20(%ebp), %mm0
-; X86-NEXT:    movd 16(%ebp), %mm1
-; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
-; X86-NEXT:    movd 12(%ebp), %mm0
-; X86-NEXT:    movd 8(%ebp), %mm2
-; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
-; X86-NEXT:    pfrcpit1 %mm1, %mm2
-; X86-NEXT:    movq %mm2, (%esp)
-; X86-NEXT:    flds {{[0-9]+}}(%esp)
-; X86-NEXT:    flds (%esp)
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl
-;
-; X64-LABEL: test_pfrcpit1:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    movdq2q %xmm1, %mm0
-; X64-NEXT:    movdq2q %xmm0, %mm1
-; X64-NEXT:    pfrcpit1 %mm0, %mm1
-; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <2 x float> %a to x86_mmx
-  %1 = bitcast <2 x float> %b to x86_mmx
-  %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %0, x86_mmx %1)
-  %3 = bitcast x86_mmx %2 to <2 x float>
-  ret <2 x float> %3
-}
-
-declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone
-
-define <2 x float> @test_pfrcpit2(<2 x float> %a, <2 x float> %b) nounwind readnone {
-; X86-LABEL: test_pfrcpit2:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movd 20(%ebp), %mm0
-; X86-NEXT:    movd 16(%ebp), %mm1
-; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
-; X86-NEXT:    movd 12(%ebp), %mm0
-; X86-NEXT:    movd 8(%ebp), %mm2
-; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
-; X86-NEXT:    pfrcpit2 %mm1, %mm2
-; X86-NEXT:    movq %mm2, (%esp)
-; X86-NEXT:    flds {{[0-9]+}}(%esp)
-; X86-NEXT:    flds (%esp)
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl
-;
-; X64-LABEL: test_pfrcpit2:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    movdq2q %xmm1, %mm0
-; X64-NEXT:    movdq2q %xmm0, %mm1
-; X64-NEXT:    pfrcpit2 %mm0, %mm1
-; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <2 x float> %a to x86_mmx
-  %1 = bitcast <2 x float> %b to x86_mmx
-  %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %0, x86_mmx %1)
-  %3 = bitcast x86_mmx %2 to <2 x float>
-  ret <2 x float> %3
-}
-
-declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone
-
-define <2 x float> @test_pfrsqrt(<2 x float> %a) nounwind readnone {
-; X86-LABEL: test_pfrsqrt:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movd 12(%ebp), %mm0
-; X86-NEXT:    movd 8(%ebp), %mm1
-; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
-; X86-NEXT:    pfrsqrt %mm1, %mm0
-; X86-NEXT:    movq %mm0, (%esp)
-; X86-NEXT:    flds {{[0-9]+}}(%esp)
-; X86-NEXT:    flds (%esp)
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl
-;
-; X64-LABEL: test_pfrsqrt:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    movdq2q %xmm0, %mm0
-; X64-NEXT:    pfrsqrt %mm0, %mm0
-; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <2 x float> %a to x86_mmx
-  %1 = tail call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %0)
-  %2 = bitcast x86_mmx %1 to <2 x float>
-  ret <2 x float> %2
-}
-
-declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone
-
-define <2 x float> @test_pfrsqit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
-; X86-LABEL: test_pfrsqit1:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movd 20(%ebp), %mm0
-; X86-NEXT:    movd 16(%ebp), %mm1
-; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
-; X86-NEXT:    movd 12(%ebp), %mm0
-; X86-NEXT:    movd 8(%ebp), %mm2
-; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
-; X86-NEXT:    pfrsqit1 %mm1, %mm2
-; X86-NEXT:    movq %mm2, (%esp)
-; X86-NEXT:    flds {{[0-9]+}}(%esp)
-; X86-NEXT:    flds (%esp)
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl
-;
-; X64-LABEL: test_pfrsqit1:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    movdq2q %xmm1, %mm0
-; X64-NEXT:    movdq2q %xmm0, %mm1
-; X64-NEXT:    pfrsqit1 %mm0, %mm1
-; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <2 x float> %a to x86_mmx
-  %1 = bitcast <2 x float> %b to x86_mmx
-  %2 = tail call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %0, x86_mmx %1)
-  %3 = bitcast x86_mmx %2 to <2 x float>
-  ret <2 x float> %3
-}
-
-declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone
-
-define <2 x float> @test_pfsub(<2 x float> %a, <2 x float> %b) nounwind readnone {
-; X86-LABEL: test_pfsub:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movd 20(%ebp), %mm0
-; X86-NEXT:    movd 16(%ebp), %mm1
-; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
-; X86-NEXT:    movd 12(%ebp), %mm0
-; X86-NEXT:    movd 8(%ebp), %mm2
-; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
-; X86-NEXT:    pfsub %mm1, %mm2
-; X86-NEXT:    movq %mm2, (%esp)
-; X86-NEXT:    flds {{[0-9]+}}(%esp)
-; X86-NEXT:    flds (%esp)
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl
-;
-; X64-LABEL: test_pfsub:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    movdq2q %xmm1, %mm0
-; X64-NEXT:    movdq2q %xmm0, %mm1
-; X64-NEXT:    pfsub %mm0, %mm1
-; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <2 x float> %a to x86_mmx
-  %1 = bitcast <2 x float> %b to x86_mmx
-  %2 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %0, x86_mmx %1)
-  %3 = bitcast x86_mmx %2 to <2 x float>
-  ret <2 x float> %3
-}
-
-declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone
-
-define <2 x float> @test_pfsubr(<2 x float> %a, <2 x float> %b) nounwind readnone {
-; X86-LABEL: test_pfsubr:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movd 20(%ebp), %mm0
-; X86-NEXT:    movd 16(%ebp), %mm1
-; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
-; X86-NEXT:    movd 12(%ebp), %mm0
-; X86-NEXT:    movd 8(%ebp), %mm2
-; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
-; X86-NEXT:    pfsubr %mm1, %mm2
-; X86-NEXT:    movq %mm2, (%esp)
-; X86-NEXT:    flds {{[0-9]+}}(%esp)
-; X86-NEXT:    flds (%esp)
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl
-;
-; X64-LABEL: test_pfsubr:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    movdq2q %xmm1, %mm0
-; X64-NEXT:    movdq2q %xmm0, %mm1
-; X64-NEXT:    pfsubr %mm0, %mm1
-; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <2 x float> %a to x86_mmx
-  %1 = bitcast <2 x float> %b to x86_mmx
-  %2 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %0, x86_mmx %1)
-  %3 = bitcast x86_mmx %2 to <2 x float>
-  ret <2 x float> %3
-}
-
-declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone
-
-define <2 x float> @test_pi2fd(x86_mmx %a.coerce) nounwind readnone {
-; X86-LABEL: test_pi2fd:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    pi2fd %mm0, %mm0
-; X86-NEXT:    movq %mm0, (%esp)
-; X86-NEXT:    flds {{[0-9]+}}(%esp)
-; X86-NEXT:    flds (%esp)
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl
-;
-; X64-LABEL: test_pi2fd:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    pi2fd %mm0, %mm0
-; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast x86_mmx %a.coerce to <2 x i32>
-  %1 = bitcast <2 x i32> %0 to x86_mmx
-  %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1)
-  %3 = bitcast x86_mmx %2 to <2 x float>
-  ret <2 x float> %3
-}
-
-declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone
-
-define <4 x i16> @test_pmulhrw(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
-; X86-LABEL: test_pmulhrw:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    pmulhrw %mm1, %mm0
-; X86-NEXT:    movq %mm0, (%eax)
-; X86-NEXT:    retl $4
-;
-; X64-LABEL: test_pmulhrw:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    pmulhrw %mm1, %mm0
-; X64-NEXT:    movq2dq %mm0, %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast x86_mmx %a.coerce to <4 x i16>
-  %1 = bitcast x86_mmx %b.coerce to <4 x i16>
-  %2 = bitcast <4 x i16> %0 to x86_mmx
-  %3 = bitcast <4 x i16> %1 to x86_mmx
-  %4 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %2, x86_mmx %3)
-  %5 = bitcast x86_mmx %4 to <4 x i16>
-  ret <4 x i16> %5
-}
-
-declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone
-
-define <2 x i32> @test_pf2iw(<2 x float> %a) nounwind readnone {
-; X86-LABEL: test_pf2iw:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movd 12(%ebp), %mm0
-; X86-NEXT:    movd 8(%ebp), %mm1
-; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
-; X86-NEXT:    pf2iw %mm1, %mm0
-; X86-NEXT:    movq %mm0, (%esp)
-; X86-NEXT:    movl (%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl
-;
-; X64-LABEL: test_pf2iw:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    movdq2q %xmm0, %mm0
-; X64-NEXT:    pf2iw %mm0, %mm0
-; X64-NEXT:    movq2dq %mm0, %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <2 x float> %a to x86_mmx
-  %1 = tail call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %0)
-  %2 = bitcast x86_mmx %1 to <2 x i32>
-  ret <2 x i32> %2
-}
-
-declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone
-
-define <2 x float> @test_pfnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
-; X86-LABEL: test_pfnacc:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movd 20(%ebp), %mm0
-; X86-NEXT:    movd 16(%ebp), %mm1
-; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
-; X86-NEXT:    movd 12(%ebp), %mm0
-; X86-NEXT:    movd 8(%ebp), %mm2
-; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
-; X86-NEXT:    pfnacc %mm1, %mm2
-; X86-NEXT:    movq %mm2, (%esp)
-; X86-NEXT:    flds {{[0-9]+}}(%esp)
-; X86-NEXT:    flds (%esp)
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl
-;
-; X64-LABEL: test_pfnacc:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    movdq2q %xmm1, %mm0
-; X64-NEXT:    movdq2q %xmm0, %mm1
-; X64-NEXT:    pfnacc %mm0, %mm1
-; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <2 x float> %a to x86_mmx
-  %1 = bitcast <2 x float> %b to x86_mmx
-  %2 = tail call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %0, x86_mmx %1)
-  %3 = bitcast x86_mmx %2 to <2 x float>
-  ret <2 x float> %3
-}
-
-declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone
-
-define <2 x float> @test_pfpnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
-; X86-LABEL: test_pfpnacc:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movd 20(%ebp), %mm0
-; X86-NEXT:    movd 16(%ebp), %mm1
-; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
-; X86-NEXT:    movd 12(%ebp), %mm0
-; X86-NEXT:    movd 8(%ebp), %mm2
-; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
-; X86-NEXT:    pfpnacc %mm1, %mm2
-; X86-NEXT:    movq %mm2, (%esp)
-; X86-NEXT:    flds {{[0-9]+}}(%esp)
-; X86-NEXT:    flds (%esp)
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl
-;
-; X64-LABEL: test_pfpnacc:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    movdq2q %xmm1, %mm0
-; X64-NEXT:    movdq2q %xmm0, %mm1
-; X64-NEXT:    pfpnacc %mm0, %mm1
-; X64-NEXT:    movq %mm1, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <2 x float> %a to x86_mmx
-  %1 = bitcast <2 x float> %b to x86_mmx
-  %2 = tail call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %0, x86_mmx %1)
-  %3 = bitcast x86_mmx %2 to <2 x float>
-  ret <2 x float> %3
-}
-
-declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone
-
-define <2 x float> @test_pi2fw(x86_mmx %a.coerce) nounwind readnone {
-; X86-LABEL: test_pi2fw:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    pi2fw %mm0, %mm0
-; X86-NEXT:    movq %mm0, (%esp)
-; X86-NEXT:    flds {{[0-9]+}}(%esp)
-; X86-NEXT:    flds (%esp)
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl
-;
-; X64-LABEL: test_pi2fw:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    pi2fw %mm0, %mm0
-; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast x86_mmx %a.coerce to <2 x i32>
-  %1 = bitcast <2 x i32> %0 to x86_mmx
-  %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1)
-  %3 = bitcast x86_mmx %2 to <2 x float>
-  ret <2 x float> %3
-}
-
-declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone
-
-define <2 x float> @test_pswapdsf(<2 x float> %a) nounwind readnone {
-; X86-LABEL: test_pswapdsf:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movd 12(%ebp), %mm0
-; X86-NEXT:    movd 8(%ebp), %mm1
-; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
-; X86-NEXT:    pswapd %mm1, %mm0 # mm0 = mm1[1,0]
-; X86-NEXT:    movq %mm0, (%esp)
-; X86-NEXT:    flds {{[0-9]+}}(%esp)
-; X86-NEXT:    flds (%esp)
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl
-;
-; X64-LABEL: test_pswapdsf:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    movdq2q %xmm0, %mm0
-; X64-NEXT:    pswapd %mm0, %mm0 # mm0 = mm0[1,0]
-; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <2 x float> %a to x86_mmx
-  %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
-  %2 = bitcast x86_mmx %1 to <2 x float>
-  ret <2 x float> %2
-}
-
-define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone {
-; X86-LABEL: test_pswapdsi:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movd 12(%ebp), %mm0
-; X86-NEXT:    movd 8(%ebp), %mm1
-; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
-; X86-NEXT:    pswapd %mm1, %mm0 # mm0 = mm1[1,0]
-; X86-NEXT:    movq %mm0, (%esp)
-; X86-NEXT:    movl (%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl
-;
-; X64-LABEL: test_pswapdsi:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    movdq2q %xmm0, %mm0
-; X64-NEXT:    pswapd %mm0, %mm0 # mm0 = mm0[1,0]
-; X64-NEXT:    movq2dq %mm0, %xmm0
-; X64-NEXT:    retq
-entry:
-  %0 = bitcast <2 x i32> %a to x86_mmx
-  %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
-  %2 = bitcast x86_mmx %1 to <2 x i32>
-  ret <2 x i32> %2
-}
-
-declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone

diff  --git a/llvm/test/CodeGen/X86/commute-3dnow.ll b/llvm/test/CodeGen/X86/commute-3dnow.ll
deleted file mode 100644
index dc3910920365d..0000000000000
--- a/llvm/test/CodeGen/X86/commute-3dnow.ll
+++ /dev/null
@@ -1,270 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+3dnow | FileCheck %s --check-prefix=X86
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+3dnow | FileCheck %s --check-prefix=X64
-
-define void @commute_m_pfadd(ptr%a0, ptr%a1, ptr%a2) nounwind {
-; X86-LABEL: commute_m_pfadd:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movq (%edx), %mm0
-; X86-NEXT:    pfadd (%eax), %mm0
-; X86-NEXT:    pfadd (%ecx), %mm0
-; X86-NEXT:    movq %mm0, (%ecx)
-; X86-NEXT:    retl
-;
-; X64-LABEL: commute_m_pfadd:
-; X64:       # %bb.0:
-; X64-NEXT:    movq (%rdi), %mm0
-; X64-NEXT:    pfadd (%rsi), %mm0
-; X64-NEXT:    pfadd (%rdx), %mm0
-; X64-NEXT:    movq %mm0, (%rdx)
-; X64-NEXT:    retq
-  %1 = load x86_mmx, ptr %a0
-  %2 = load x86_mmx, ptr %a1
-  %3 = load x86_mmx, ptr %a2
-  %4 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %1, x86_mmx %2)
-  %5 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %3, x86_mmx %4)
-  store x86_mmx %5, ptr %a2
-  ret void
-}
-declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx)
-
-define void @commute_m_pfsub(ptr%a0, ptr%a1, ptr%a2) nounwind {
-; X86-LABEL: commute_m_pfsub:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movq (%edx), %mm0
-; X86-NEXT:    pfsub (%eax), %mm0
-; X86-NEXT:    pfsubr (%ecx), %mm0
-; X86-NEXT:    movq %mm0, (%ecx)
-; X86-NEXT:    retl
-;
-; X64-LABEL: commute_m_pfsub:
-; X64:       # %bb.0:
-; X64-NEXT:    movq (%rdi), %mm0
-; X64-NEXT:    pfsub (%rsi), %mm0
-; X64-NEXT:    pfsubr (%rdx), %mm0
-; X64-NEXT:    movq %mm0, (%rdx)
-; X64-NEXT:    retq
-  %1 = load x86_mmx, ptr %a0
-  %2 = load x86_mmx, ptr %a1
-  %3 = load x86_mmx, ptr %a2
-  %4 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %1, x86_mmx %2)
-  %5 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %3, x86_mmx %4)
-  store x86_mmx %5, ptr %a2
-  ret void
-}
-declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx)
-
-define void @commute_m_pfsubr(ptr%a0, ptr%a1, ptr%a2) nounwind {
-; X86-LABEL: commute_m_pfsubr:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movq (%edx), %mm0
-; X86-NEXT:    pfsubr (%eax), %mm0
-; X86-NEXT:    pfsub (%ecx), %mm0
-; X86-NEXT:    movq %mm0, (%ecx)
-; X86-NEXT:    retl
-;
-; X64-LABEL: commute_m_pfsubr:
-; X64:       # %bb.0:
-; X64-NEXT:    movq (%rdi), %mm0
-; X64-NEXT:    pfsubr (%rsi), %mm0
-; X64-NEXT:    pfsub (%rdx), %mm0
-; X64-NEXT:    movq %mm0, (%rdx)
-; X64-NEXT:    retq
-  %1 = load x86_mmx, ptr %a0
-  %2 = load x86_mmx, ptr %a1
-  %3 = load x86_mmx, ptr %a2
-  %4 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %1, x86_mmx %2)
-  %5 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %3, x86_mmx %4)
-  store x86_mmx %5, ptr %a2
-  ret void
-}
-declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx)
-
-define void @commute_m_pfmul(ptr%a0, ptr%a1, ptr%a2) nounwind {
-; X86-LABEL: commute_m_pfmul:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movq (%edx), %mm0
-; X86-NEXT:    pfmul (%eax), %mm0
-; X86-NEXT:    pfmul (%ecx), %mm0
-; X86-NEXT:    movq %mm0, (%ecx)
-; X86-NEXT:    retl
-;
-; X64-LABEL: commute_m_pfmul:
-; X64:       # %bb.0:
-; X64-NEXT:    movq (%rdi), %mm0
-; X64-NEXT:    pfmul (%rsi), %mm0
-; X64-NEXT:    pfmul (%rdx), %mm0
-; X64-NEXT:    movq %mm0, (%rdx)
-; X64-NEXT:    retq
-  %1 = load x86_mmx, ptr %a0
-  %2 = load x86_mmx, ptr %a1
-  %3 = load x86_mmx, ptr %a2
-  %4 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %1, x86_mmx %2)
-  %5 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %3, x86_mmx %4)
-  store x86_mmx %5, ptr %a2
-  ret void
-}
-declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx)
-
-; PFMAX can't commute without fast-math.
-define void @commute_m_pfmax(ptr%a0, ptr%a1, ptr%a2) nounwind {
-; X86-LABEL: commute_m_pfmax:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movq (%edx), %mm0
-; X86-NEXT:    movq (%ecx), %mm1
-; X86-NEXT:    pfmax (%eax), %mm0
-; X86-NEXT:    pfmax %mm0, %mm1
-; X86-NEXT:    movq %mm1, (%ecx)
-; X86-NEXT:    retl
-;
-; X64-LABEL: commute_m_pfmax:
-; X64:       # %bb.0:
-; X64-NEXT:    movq (%rdi), %mm0
-; X64-NEXT:    movq (%rdx), %mm1
-; X64-NEXT:    pfmax (%rsi), %mm0
-; X64-NEXT:    pfmax %mm0, %mm1
-; X64-NEXT:    movq %mm1, (%rdx)
-; X64-NEXT:    retq
-  %1 = load x86_mmx, ptr %a0
-  %2 = load x86_mmx, ptr %a1
-  %3 = load x86_mmx, ptr %a2
-  %4 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %1, x86_mmx %2)
-  %5 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %3, x86_mmx %4)
-  store x86_mmx %5, ptr %a2
-  ret void
-}
-declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx)
-
-; PFMIN can't commute without fast-math.
-define void @commute_m_pfmin(ptr%a0, ptr%a1, ptr%a2) nounwind {
-; X86-LABEL: commute_m_pfmin:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movq (%edx), %mm0
-; X86-NEXT:    movq (%ecx), %mm1
-; X86-NEXT:    pfmin (%eax), %mm0
-; X86-NEXT:    pfmin %mm0, %mm1
-; X86-NEXT:    movq %mm1, (%ecx)
-; X86-NEXT:    retl
-;
-; X64-LABEL: commute_m_pfmin:
-; X64:       # %bb.0:
-; X64-NEXT:    movq (%rdi), %mm0
-; X64-NEXT:    movq (%rdx), %mm1
-; X64-NEXT:    pfmin (%rsi), %mm0
-; X64-NEXT:    pfmin %mm0, %mm1
-; X64-NEXT:    movq %mm1, (%rdx)
-; X64-NEXT:    retq
-  %1 = load x86_mmx, ptr %a0
-  %2 = load x86_mmx, ptr %a1
-  %3 = load x86_mmx, ptr %a2
-  %4 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %1, x86_mmx %2)
-  %5 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %3, x86_mmx %4)
-  store x86_mmx %5, ptr %a2
-  ret void
-}
-declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx)
-
-define void @commute_m_pfcmpeq(ptr%a0, ptr%a1, ptr%a2) nounwind {
-; X86-LABEL: commute_m_pfcmpeq:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movq (%edx), %mm0
-; X86-NEXT:    pfcmpeq (%eax), %mm0
-; X86-NEXT:    pfcmpeq (%ecx), %mm0
-; X86-NEXT:    movq %mm0, (%ecx)
-; X86-NEXT:    retl
-;
-; X64-LABEL: commute_m_pfcmpeq:
-; X64:       # %bb.0:
-; X64-NEXT:    movq (%rdi), %mm0
-; X64-NEXT:    pfcmpeq (%rsi), %mm0
-; X64-NEXT:    pfcmpeq (%rdx), %mm0
-; X64-NEXT:    movq %mm0, (%rdx)
-; X64-NEXT:    retq
-  %1 = load x86_mmx, ptr %a0
-  %2 = load x86_mmx, ptr %a1
-  %3 = load x86_mmx, ptr %a2
-  %4 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %1, x86_mmx %2)
-  %5 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %3, x86_mmx %4)
-  store x86_mmx %5, ptr %a2
-  ret void
-}
-declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx)
-
-define void @commute_m_pavgusb(ptr%a0, ptr%a1, ptr%a2) nounwind {
-; X86-LABEL: commute_m_pavgusb:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movq (%edx), %mm0
-; X86-NEXT:    pavgusb (%eax), %mm0
-; X86-NEXT:    pavgusb (%ecx), %mm0
-; X86-NEXT:    movq %mm0, (%ecx)
-; X86-NEXT:    retl
-;
-; X64-LABEL: commute_m_pavgusb:
-; X64:       # %bb.0:
-; X64-NEXT:    movq (%rdi), %mm0
-; X64-NEXT:    pavgusb (%rsi), %mm0
-; X64-NEXT:    pavgusb (%rdx), %mm0
-; X64-NEXT:    movq %mm0, (%rdx)
-; X64-NEXT:    retq
-  %1 = load x86_mmx, ptr %a0
-  %2 = load x86_mmx, ptr %a1
-  %3 = load x86_mmx, ptr %a2
-  %4 = tail call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %1, x86_mmx %2)
-  %5 = tail call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %3, x86_mmx %4)
-  store x86_mmx %5, ptr %a2
-  ret void
-}
-declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx)
-
-define void @commute_m_pmulhrw(ptr%a0, ptr%a1, ptr%a2) nounwind {
-; X86-LABEL: commute_m_pmulhrw:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movq (%edx), %mm0
-; X86-NEXT:    pmulhrw (%eax), %mm0
-; X86-NEXT:    pmulhrw (%ecx), %mm0
-; X86-NEXT:    movq %mm0, (%ecx)
-; X86-NEXT:    retl
-;
-; X64-LABEL: commute_m_pmulhrw:
-; X64:       # %bb.0:
-; X64-NEXT:    movq (%rdi), %mm0
-; X64-NEXT:    pmulhrw (%rsi), %mm0
-; X64-NEXT:    pmulhrw (%rdx), %mm0
-; X64-NEXT:    movq %mm0, (%rdx)
-; X64-NEXT:    retq
-  %1 = load x86_mmx, ptr %a0
-  %2 = load x86_mmx, ptr %a1
-  %3 = load x86_mmx, ptr %a2
-  %4 = tail call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %1, x86_mmx %2)
-  %5 = tail call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %3, x86_mmx %4)
-  store x86_mmx %5, ptr %a2
-  ret void
-}
-declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx)

diff  --git a/llvm/test/CodeGen/X86/expand-vr64-gr64-copy.mir b/llvm/test/CodeGen/X86/expand-vr64-gr64-copy.mir
index 800af1ce5432e..559560ac20f8a 100644
--- a/llvm/test/CodeGen/X86/expand-vr64-gr64-copy.mir
+++ b/llvm/test/CodeGen/X86/expand-vr64-gr64-copy.mir
@@ -1,36 +1,31 @@
-# RUN: llc -run-pass postrapseudos -mtriple=x86_64-unknown-unknown -mattr=+3dnow -o - %s | FileCheck %s
+# RUN: llc -run-pass postrapseudos -mtriple=x86_64-unknown-unknown -mattr=+mmx -o - %s | FileCheck %s
 # This test verifies that the ExpandPostRA pass expands the GR64 <-> VR64
 # copies into appropriate MMX_MOV instructions.
 
 --- |
 
-  define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone {
+  define <2 x i32> @test_paddw(<2 x i32> %a) nounwind readnone {
   entry:
     %0 = bitcast <2 x i32> %a to x86_mmx
-    %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
+    %1 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %0, x86_mmx %0)
     %2 = bitcast x86_mmx %1 to <2 x i32>
     ret <2 x i32> %2
   }
 
-  declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone
-
 ...
 ---
-name:            test_pswapdsi
+name:            test_paddw
 tracksRegLiveness: true
 body: |
   bb.0.entry:
     liveins: $xmm0
-
-    $xmm0 = PSHUFDri killed $xmm0, -24
-    MOVPQI2QImr $rsp, 1, $noreg, -8, $noreg, killed $xmm0
-    $mm0 = PSWAPDrm $rsp, 1, $noreg, -8, $noreg
+    $mm0 = MMX_MOVDQ2Qrr killed $xmm0
+    $mm0 = MMX_PADDWrr killed $mm0, $mm0
+  ; Inserted dummy copy here, for test:
   ; CHECK:      $rax = MMX_MOVD64from64rr $mm0
   ; CHECK-NEXT: $mm0 = MMX_MOVD64to64rr $rax
     $rax = COPY $mm0
     $mm0 = COPY $rax
-    MMX_MOVQ64mr $rsp, 1, $noreg, -16, $noreg, killed $mm0
-    $xmm0 = MOVQI2PQIrm $rsp, 1, $noreg, -16, $noreg
-    $xmm0 = PSHUFDri killed $xmm0, -44
-    RET64 $xmm0
+    $xmm0 = MMX_MOVQ2DQrr killed $mm0
+    RET 0, $xmm0
 ...

diff  --git a/llvm/test/CodeGen/X86/pr35982.ll b/llvm/test/CodeGen/X86/pr35982.ll
index 4a79a109f8b60..b6022698edaeb 100644
--- a/llvm/test/CodeGen/X86/pr35982.ll
+++ b/llvm/test/CodeGen/X86/pr35982.ll
@@ -46,50 +46,5 @@ define float @PR35982_emms(<1 x i64>) nounwind {
   ret float %11
 }
 
-define float @PR35982_femms(<1 x i64>) nounwind {
-; NO-POSTRA-LABEL: PR35982_femms:
-; NO-POSTRA:       # %bb.0:
-; NO-POSTRA-NEXT:    subl $8, %esp
-; NO-POSTRA-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; NO-POSTRA-NEXT:    movq {{[0-9]+}}(%esp), %mm0
-; NO-POSTRA-NEXT:    punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
-; NO-POSTRA-NEXT:    movd %mm0, %ecx
-; NO-POSTRA-NEXT:    femms
-; NO-POSTRA-NEXT:    movl %eax, (%esp)
-; NO-POSTRA-NEXT:    fildl (%esp)
-; NO-POSTRA-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
-; NO-POSTRA-NEXT:    fiaddl {{[0-9]+}}(%esp)
-; NO-POSTRA-NEXT:    addl $8, %esp
-; NO-POSTRA-NEXT:    retl
-;
-; POSTRA-LABEL: PR35982_femms:
-; POSTRA:       # %bb.0:
-; POSTRA-NEXT:    subl $8, %esp
-; POSTRA-NEXT:    movq {{[0-9]+}}(%esp), %mm0
-; POSTRA-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; POSTRA-NEXT:    punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
-; POSTRA-NEXT:    movd %mm0, %ecx
-; POSTRA-NEXT:    femms
-; POSTRA-NEXT:    movl %eax, (%esp)
-; POSTRA-NEXT:    fildl (%esp)
-; POSTRA-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
-; POSTRA-NEXT:    fiaddl {{[0-9]+}}(%esp)
-; POSTRA-NEXT:    addl $8, %esp
-; POSTRA-NEXT:    retl
-  %2 = bitcast <1 x i64> %0 to <2 x i32>
-  %3 = extractelement <2 x i32> %2, i32 0
-  %4 = extractelement <1 x i64> %0, i32 0
-  %5 = bitcast i64 %4 to x86_mmx
-  %6 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %5, x86_mmx %5)
-  %7 = bitcast x86_mmx %6 to <2 x i32>
-  %8 = extractelement <2 x i32> %7, i32 0
-  tail call void @llvm.x86.mmx.femms()
-  %9 = sitofp i32 %3 to float
-  %10 = sitofp i32 %8 to float
-  %11 = fadd float %9, %10
-  ret float %11
-}
-
 declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx)
-declare void @llvm.x86.mmx.femms()
 declare void @llvm.x86.mmx.emms()

diff  --git a/llvm/test/CodeGen/X86/prefetch.ll b/llvm/test/CodeGen/X86/prefetch.ll
index c10e0526787d5..c3551644dfb7f 100644
--- a/llvm/test/CodeGen/X86/prefetch.ll
+++ b/llvm/test/CodeGen/X86/prefetch.ll
@@ -6,16 +6,11 @@
 ; RUN: llc < %s -mtriple=i686-- -mcpu=slm | FileCheck %s -check-prefix=X86-PRFCHWSSE
 ; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 | FileCheck %s -check-prefix=X86-PRFCHWSSE
 ; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 -mattr=-prfchw | FileCheck %s -check-prefix=X86-SSE
-; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow | FileCheck %s -check-prefix=X86-3DNOW
-; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow,+prfchw | FileCheck %s -check-prefix=X86-3DNOW
+; RUN: llc < %s -mtriple=i686-- -mattr=+prfchw | FileCheck %s -check-prefix=X86-PRFCHWSSE
 
 ; Rules:
-; 3dnow by itself get you just the single prefetch instruction with no hints
 ; sse provides prefetch0/1/2/nta
-; supporting prefetchw, but not 3dnow implicitly provides prefetcht0/1/2/nta regardless of sse setting as we need something to fall back to for the non-write hint.
-; 3dnow prefetch instruction will only get used if you have no other prefetch instructions enabled
-
-; rdar://10538297
+; supporting prefetchw implicitly provides prefetcht0/1/2/nta as well, as we need something to fall back to for the non-write hint.
 
 define void @t(ptr %ptr) nounwind  {
 ; X86-SSE-LABEL: t:
@@ -43,19 +38,7 @@ define void @t(ptr %ptr) nounwind  {
 ; X86-PRFCHWSSE-NEXT:    prefetchw (%eax)
 ; X86-PRFCHWSSE-NEXT:    prefetchw (%eax)
 ; X86-PRFCHWSSE-NEXT:    retl
-;
-; X86-3DNOW-LABEL: t:
-; X86-3DNOW:       # %bb.0: # %entry
-; X86-3DNOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-3DNOW-NEXT:    prefetch (%eax)
-; X86-3DNOW-NEXT:    prefetch (%eax)
-; X86-3DNOW-NEXT:    prefetch (%eax)
-; X86-3DNOW-NEXT:    prefetch (%eax)
-; X86-3DNOW-NEXT:    prefetchw (%eax)
-; X86-3DNOW-NEXT:    prefetchw (%eax)
-; X86-3DNOW-NEXT:    prefetchw (%eax)
-; X86-3DNOW-NEXT:    prefetchw (%eax)
-; X86-3DNOW-NEXT:    retl
+
 entry:
   tail call void @llvm.prefetch( ptr %ptr, i32 0, i32 1, i32 1 )
   tail call void @llvm.prefetch( ptr %ptr, i32 0, i32 2, i32 1 )

diff  --git a/llvm/test/CodeGen/X86/stack-folding-3dnow.ll b/llvm/test/CodeGen/X86/stack-folding-3dnow.ll
deleted file mode 100644
index 1cbd61567f327..0000000000000
--- a/llvm/test/CodeGen/X86/stack-folding-3dnow.ll
+++ /dev/null
@@ -1,387 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+3dnow | FileCheck %s
-
-define x86_mmx @stack_fold_pavgusb(x86_mmx %a, x86_mmx %b) {
-; CHECK-LABEL: stack_fold_pavgusb:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pavgusb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %a, x86_mmx %b) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone
-
-define x86_mmx @stack_fold_pf2id(x86_mmx %a) {
-; CHECK-LABEL: stack_fold_pf2id:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pf2id {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %a) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone
-
-define x86_mmx @stack_fold_pf2iw(x86_mmx %a) {
-; CHECK-LABEL: stack_fold_pf2iw:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pf2iw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %a) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone
-
-define x86_mmx @stack_fold_pfacc(x86_mmx %a, x86_mmx %b) {
-; CHECK-LABEL: stack_fold_pfacc:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pfacc {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %a, x86_mmx %b) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone
-
-define x86_mmx @stack_fold_pfadd(x86_mmx %a, x86_mmx %b) {
-; CHECK-LABEL: stack_fold_pfadd:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pfadd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %a, x86_mmx %b) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone
-
-define x86_mmx @stack_fold_pfcmpeq(x86_mmx %a, x86_mmx %b) {
-; CHECK-LABEL: stack_fold_pfcmpeq:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pfcmpeq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %a, x86_mmx %b) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone
-
-define x86_mmx @stack_fold_pfcmpge(x86_mmx %a, x86_mmx %b) {
-; CHECK-LABEL: stack_fold_pfcmpge:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pfcmpge {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %a, x86_mmx %b) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone
-
-define x86_mmx @stack_fold_pfcmpgt(x86_mmx %a, x86_mmx %b) {
-; CHECK-LABEL: stack_fold_pfcmpgt:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pfcmpgt {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %a, x86_mmx %b) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone
-
-define x86_mmx @stack_fold_pfmax(x86_mmx %a, x86_mmx %b) {
-; CHECK-LABEL: stack_fold_pfmax:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pfmax {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %a, x86_mmx %b) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone
-
-define x86_mmx @stack_fold_pfmin(x86_mmx %a, x86_mmx %b) {
-; CHECK-LABEL: stack_fold_pfmin:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pfmin {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %a, x86_mmx %b) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone
-
-define x86_mmx @stack_fold_pfmul(x86_mmx %a, x86_mmx %b) {
-; CHECK-LABEL: stack_fold_pfmul:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pfmul {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %a, x86_mmx %b) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone
-
-define x86_mmx @stack_fold_pfnacc(x86_mmx %a, x86_mmx %b) {
-; CHECK-LABEL: stack_fold_pfnacc:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pfnacc {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %a, x86_mmx %b) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone
-
-define x86_mmx @stack_fold_pfpnacc(x86_mmx %a, x86_mmx %b) {
-; CHECK-LABEL: stack_fold_pfpnacc:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pfpnacc {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %a, x86_mmx %b) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone
-
-define x86_mmx @stack_fold_pfrcp(x86_mmx %a) {
-; CHECK-LABEL: stack_fold_pfrcp:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pfrcp {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %a) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone
-
-define x86_mmx @stack_fold_pfrcpit1(x86_mmx %a, x86_mmx %b) {
-; CHECK-LABEL: stack_fold_pfrcpit1:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pfrcpit1 {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %a, x86_mmx %b) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone
-
-define x86_mmx @stack_fold_pfrcpit2(x86_mmx %a, x86_mmx %b) {
-; CHECK-LABEL: stack_fold_pfrcpit2:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pfrcpit2 {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %a, x86_mmx %b) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone
-
-define x86_mmx @stack_fold_pfrsqit1(x86_mmx %a, x86_mmx %b) {
-; CHECK-LABEL: stack_fold_pfrsqit1:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pfrsqit1 {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %a, x86_mmx %b) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone
-
-define x86_mmx @stack_fold_pfrsqrt(x86_mmx %a) {
-; CHECK-LABEL: stack_fold_pfrsqrt:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pfrsqrt {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %a) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone
-
-define x86_mmx @stack_fold_pfsub(x86_mmx %a, x86_mmx %b) {
-; CHECK-LABEL: stack_fold_pfsub:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pfsub {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %a, x86_mmx %b) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone
-
-define x86_mmx @stack_fold_pfsubr(x86_mmx %a, x86_mmx %b) {
-; CHECK-LABEL: stack_fold_pfsubr:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pfsubr {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %a, x86_mmx %b) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone
-
-define x86_mmx @stack_fold_pi2fd(x86_mmx %a) {
-; CHECK-LABEL: stack_fold_pi2fd:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pi2fd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %a) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone
-
-define x86_mmx @stack_fold_pi2fw(x86_mmx %a) {
-; CHECK-LABEL: stack_fold_pi2fw:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pi2fw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %a) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone
-
-define x86_mmx @stack_fold_pmulhrw(x86_mmx %a, x86_mmx %b) {
-; CHECK-LABEL: stack_fold_pmulhrw:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pmulhrw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %a, x86_mmx %b) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone
-
-define x86_mmx @stack_fold_pswapd(x86_mmx %a) {
-; CHECK-LABEL: stack_fold_pswapd:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pswapd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
-; CHECK-NEXT:    # mm0 = mem[1,0]
-; CHECK-NEXT:    movq2dq %mm0, %xmm0
-; CHECK-NEXT:    retq
-  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
-  %2 = call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %a) nounwind readnone
-  ret x86_mmx %2
-}
-declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone


        


More information about the cfe-commits mailing list