[llvm-commits] [llvm] r85697 - in /llvm/trunk: lib/Target/ARM/ARMInstrNEON.td test/CodeGen/ARM/fmacs.ll test/CodeGen/ARM/fnmacs.ll test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
Jim Grosbach
grosbach at apple.com
Sat Oct 31 15:57:37 PDT 2009
Author: grosbach
Date: Sat Oct 31 17:57:36 2009
New Revision: 85697
URL: http://llvm.org/viewvc/llvm-project?rev=85697&view=rev
Log:
vml[as].f32 cause stalls in following advanced SIMD instructions. Avoid using
them for scalar floating point operations for now.
Modified:
llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
llvm/trunk/test/CodeGen/ARM/fmacs.ll
llvm/trunk/test/CodeGen/ARM/fnmacs.ll
llvm/trunk/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=85697&r1=85696&r2=85697&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Sat Oct 31 17:57:36 2009
@@ -2841,13 +2841,16 @@
def : N3VDsPat<fmul, VMULfd_sfp>;
// Vector Multiply-Accumulate/Subtract used for single-precision FP
-let neverHasSideEffects = 1 in
-def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32,fmul,fadd>;
-def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>;
+// vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so
+// we want to avoid them for now. e.g., alternating vmla/vadd instructions.
-let neverHasSideEffects = 1 in
-def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32,fmul,fsub>;
-def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>;
+//let neverHasSideEffects = 1 in
+//def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32,fmul,fadd>;
+//def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>;
+
+//let neverHasSideEffects = 1 in
+//def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32,fmul,fsub>;
+//def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>;
// Vector Absolute used for single-precision FP
let neverHasSideEffects = 1 in
Modified: llvm/trunk/test/CodeGen/ARM/fmacs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fmacs.ll?rev=85697&r1=85696&r2=85697&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/fmacs.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/fmacs.ll Sat Oct 31 17:57:36 2009
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmla.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmla.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1
; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1
define float @test(float %acc, float %a, float %b) {
Modified: llvm/trunk/test/CodeGen/ARM/fnmacs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fnmacs.ll?rev=85697&r1=85696&r2=85697&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/fnmacs.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/fnmacs.ll Sat Oct 31 17:57:36 2009
@@ -7,8 +7,10 @@
; VFP2: fnmacs
; NEON: fnmacs
-; NEONFP: vmls
+; NEONFP-NOT: vmls
; NEONFP-NOT: fcpys
+; NEONFP: vmul.f32
+; NEONFP: vsub.f32
; NEONFP: fmrs
%0 = fmul float %a, %b
Modified: llvm/trunk/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll?rev=85697&r1=85696&r2=85697&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll Sat Oct 31 17:57:36 2009
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep fcpys | count 5
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep fcpys | count 4
define arm_apcscc void @fht(float* nocapture %fz, i16 signext %n) nounwind {
entry:
More information about the llvm-commits
mailing list