[llvm-commits] [llvm] r126238 - in /llvm/trunk/lib/Target/ARM: ARMBaseInstrInfo.h ARMHazardRecognizer.cpp ARMInstrFormats.td ARMInstrVFP.td MLxExpansionPass.cpp NEONMoveFix.cpp

Tue Feb 22 11:53:14 PST 2011

Author: evancheng
Date: Tue Feb 22 13:53:14 2011
New Revision: 126238

URL: http://llvm.org/viewvc/llvm-project?rev=126238&view=rev
Log:
VFP single precision arith instructions can go down to NEON pipeline, but on Cortex-A8 only.

Modified:
    llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h
    llvm/trunk/lib/Target/ARM/ARMHazardRecognizer.cpp
    llvm/trunk/lib/Target/ARM/ARMInstrFormats.td
    llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
    llvm/trunk/lib/Target/ARM/MLxExpansionPass.cpp
    llvm/trunk/lib/Target/ARM/NEONMoveFix.cpp

Modified: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h?rev=126238&r1=126237&r2=126238&view=diff
==============================================================================

--- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h Tue Feb 22 13:53:14 2011
@@ -155,10 +155,11 @@
     //===------------------------------------------------------------------===//
     // Code domain.
     DomainShift   = 18,
-    DomainMask    = 3 << DomainShift,
+    DomainMask    = 7 << DomainShift,
     DomainGeneral = 0 << DomainShift,
     DomainVFP     = 1 << DomainShift,
     DomainNEON    = 2 << DomainShift,
+    DomainNEONA8  = 4 << DomainShift,
 
     //===------------------------------------------------------------------===//
     // Field shifts - such shifts are used to set field while generating

Modified: llvm/trunk/lib/Target/ARM/ARMHazardRecognizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMHazardRecognizer.cpp?rev=126238&r1=126237&r2=126238&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMHazardRecognizer.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMHazardRecognizer.cpp Tue Feb 22 13:53:14 2011
@@ -21,17 +21,14 @@
   // FIXME: Detect integer instructions properly.
   const TargetInstrDesc &TID = MI->getDesc();
   unsigned Domain = TID.TSFlags & ARMII::DomainMask;
-  if (Domain == ARMII::DomainVFP) {
-    unsigned Opcode = MI->getOpcode();
-    if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD ||
-        Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
-      return false;
-  } else if (Domain == ARMII::DomainNEON) {
-    if (MI->getDesc().mayStore() || MI->getDesc().mayLoad())
-      return false;
-  } else
+  if (TID.mayStore())
     return false;
-  return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI);
+  unsigned Opcode = TID.getOpcode();
+  if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+    return false;
+  if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
+    return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI);
+  return false;
 }
 
 ScheduleHazardRecognizer::HazardType

Modified: llvm/trunk/lib/Target/ARM/ARMInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrFormats.td?rev=126238&r1=126237&r2=126238&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrFormats.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrFormats.td Tue Feb 22 13:53:14 2011
@@ -127,13 +127,14 @@
 def IndexModeUpd  : IndexMode<3>;
 
 // Instruction execution domain.
-class Domain<bits<2> val> {
-  bits<2> Value = val;
+class Domain<bits<3> val> {
+  bits<3> Value = val;
 }
 def GenericDomain : Domain<0>;
 def VFPDomain     : Domain<1>; // Instructions in VFP domain only
 def NeonDomain    : Domain<2>; // Instructions in Neon domain only
 def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
+def VFPNeonA8Domain : Domain<7>; // Instructions in VFP & Neon under A8
 
 //===----------------------------------------------------------------------===//
 // ARM special operands.
@@ -249,7 +250,7 @@
   let TSFlags{15-10} = Form;
   let TSFlags{16}    = isUnaryDataProc;
   let TSFlags{17}    = canXformTo16Bit;
-  let TSFlags{19-18} = D.Value;
+  let TSFlags{20-18} = D.Value;
 
   let Constraints = cstr;
   let Itinerary = itin;

Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td?rev=126238&r1=126237&r2=126238&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Tue Feb 22 13:53:14 2011
@@ -197,9 +197,9 @@
                    (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
                    IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
                    [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VSUBD  : ADbI<0b11100, 0b11, 1, 0,
@@ -211,9 +211,9 @@
                    (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
                    IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
                    [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VDIVD  : ADbI<0b11101, 0b00, 0, 0,
@@ -235,9 +235,9 @@
                    (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
                    IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
                    [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VNMULD : ADbI<0b11100, 0b10, 1, 0,
@@ -249,9 +249,9 @@
                   (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
                   IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
                   [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 // Match reassociated forms only if not sign dependent rounding.
@@ -271,9 +271,9 @@
                   (outs), (ins SPR:$Sd, SPR:$Sm),
                   IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
                   [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 // FIXME: Verify encoding after integrated assembler is working.
@@ -286,9 +286,9 @@
                   (outs), (ins SPR:$Sd, SPR:$Sm),
                   IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
                   [/* For disassembly only; pattern left blank */]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 } // Defs = [FPSCR]
 
@@ -305,9 +305,9 @@
                    (outs SPR:$Sd), (ins SPR:$Sm),
                    IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm",
                    [(set SPR:$Sd, (fabs SPR:$Sm))]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 let Defs = [FPSCR] in {
@@ -326,9 +326,9 @@
   let Inst{3-0} = 0b0000;
   let Inst{5}   = 0;
 
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 // FIXME: Verify encoding after integrated assembler is working.
@@ -347,9 +347,9 @@
   let Inst{3-0} = 0b0000;
   let Inst{5}   = 0;
 
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 } // Defs = [FPSCR]
 
@@ -423,9 +423,9 @@
                    (outs SPR:$Sd), (ins SPR:$Sm),
                    IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
                    [(set SPR:$Sd, (fneg SPR:$Sm))]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
@@ -598,9 +598,9 @@
                                 [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> {
   let Inst{7} = 1; // s32
 
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
@@ -616,9 +616,9 @@
                                 [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> {
   let Inst{7} = 0; // u32
 
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 // FP -> Int:
@@ -671,9 +671,9 @@
                                  [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> {
   let Inst{7} = 1; // Z bit
 
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
@@ -689,9 +689,9 @@
                                  [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> {
   let Inst{7} = 1; // Z bit
 
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
@@ -743,36 +743,36 @@
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
                  [/* For disassembly only; pattern left blank */]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits",
                  [/* For disassembly only; pattern left blank */]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits",
                  [/* For disassembly only; pattern left blank */]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits",
                  [/* For disassembly only; pattern left blank */]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0,
@@ -801,36 +801,36 @@
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
                  [/* For disassembly only; pattern left blank */]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits",
                  [/* For disassembly only; pattern left blank */]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits",
                  [/* For disassembly only; pattern left blank */]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
                        (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
                  IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits",
                  [/* For disassembly only; pattern left blank */]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0,
@@ -874,9 +874,9 @@
                                            SPR:$Sdin))]>,
               RegConstraint<"$Sdin = $Sd">,
               Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
@@ -901,9 +901,9 @@
                                            SPR:$Sdin))]>,
               RegConstraint<"$Sdin = $Sd">,
               Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
@@ -928,9 +928,9 @@
                                            SPR:$Sdin))]>,
                 RegConstraint<"$Sdin = $Sd">,
                 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
@@ -954,9 +954,9 @@
              [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
                          RegConstraint<"$Sdin = $Sd">,
                   Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 
 def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
@@ -995,9 +995,9 @@
                     IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
                     [/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
                  RegConstraint<"$Sn = $Sd"> {
-  // Some single precision VFP instructions may be executed on both NEON and VFP
-  // pipelines.
-  let D = VFPNeonDomain;
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
 }
 } // neverHasSideEffects
 

Modified: llvm/trunk/lib/Target/ARM/MLxExpansionPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/MLxExpansionPass.cpp?rev=126238&r1=126237&r2=126238&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/MLxExpansionPass.cpp (original)
+++ llvm/trunk/lib/Target/ARM/MLxExpansionPass.cpp Tue Feb 22 13:53:14 2011
@@ -132,22 +132,16 @@
 }
 
 bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
-  const TargetInstrDesc &TID = MI->getDesc();
   // FIXME: Detect integer instructions properly.
+  const TargetInstrDesc &TID = MI->getDesc();
   unsigned Domain = TID.TSFlags & ARMII::DomainMask;
-  if (Domain == ARMII::DomainVFP) {
-    unsigned Opcode = TID.getOpcode();
-    if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD ||
-        Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
-      return false;
-  } else if (Domain == ARMII::DomainNEON) {
-    if (TID.mayStore() || TID.mayLoad())
-      return false;
-  } else {
+  if (TID.mayStore())
     return false;
-  }
-
-  return MI->readsRegister(Reg, TRI);
+  unsigned Opcode = TID.getOpcode();
+  if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+    return false;
+  if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
+    return MI->readsRegister(Reg, TRI);
   return false;
 }
 

Modified: llvm/trunk/lib/Target/ARM/NEONMoveFix.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONMoveFix.cpp?rev=126238&r1=126237&r2=126238&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/NEONMoveFix.cpp (original)
+++ llvm/trunk/lib/Target/ARM/NEONMoveFix.cpp Tue Feb 22 13:53:14 2011
@@ -35,6 +35,7 @@
   private:
     const TargetRegisterInfo *TRI;
     const ARMBaseInstrInfo *TII;
+    bool isA8;
 
     typedef DenseMap<unsigned, const MachineInstr*> RegMap;
 
@@ -43,6 +44,16 @@
   char NEONMoveFixPass::ID = 0;
 }
 
+static bool inNEONDomain(unsigned Domain, bool isA8) {
+  if (Domain & ARMII::DomainNEON) {
+    // Some instructions only go down NEON pipeline when executed on CortexA8.
+    if (Domain & ARMII::DomainNEONA8)
+      return isA8;
+    return true;
+  }
+  return false;
+}
+
 bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
   RegMap Defs;
   bool Modified = false;
@@ -70,7 +81,7 @@
           Domain = ARMII::DomainNEON;
       }
 
-      if (Domain & ARMII::DomainNEON) {
+      if (inNEONDomain(Domain, isA8)) {
         // Convert VMOVD to VMOVDneon
         unsigned DestReg = MI->getOperand(0).getReg();
 
@@ -123,6 +134,7 @@
 
   TRI = TM.getRegisterInfo();
   TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+  isA8 = TM.getSubtarget<ARMSubtarget>().isCortexA8();
 
   bool Modified = false;
   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;