[PATCH] D52289: [ARM] Do not fuse VADD and VMUL on the Cortex-M4 and Cortex-M33

Thu Sep 20 01:53:11 PDT 2018

SjoerdMeijer created this revision.
SjoerdMeijer added reviewers: samparker, dmgreen, t.p.northover, john.brawn.
Herald added a reviewer: javed.absar.
Herald added subscribers: chrib, kristof.beyls.

A sequence of VMUL and VADD instructions always give the same or better
performance than a fused VMLA instruction on the Cortex-M4 and Cortex-M33.
Executing the VMUL and VADD back-to-back requires the same cycles, but
having separate instructions allows scheduling to avoid the hazard between
these 2 instructions.


https://reviews.llvm.org/D52289

Files:
  lib/Target/ARM/ARM.td
  test/CodeGen/ARM/fmacs.ll


Index: test/CodeGen/ARM/fmacs.ll
===================================================================

--- test/CodeGen/ARM/fmacs.ll
+++ test/CodeGen/ARM/fmacs.ll
@@ -3,6 +3,8 @@
 ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s -check-prefix=A8
 ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s -check-prefix=A9
 ; RUN: llc -mtriple=arm-linux-gnueabi -mcpu=cortex-a9 -float-abi=hard %s -o - | FileCheck %s -check-prefix=HARD
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-m4 %s -o - | FileCheck %s -check-prefix=SLOWVMLX
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-m33 %s -o - | FileCheck %s -check-prefix=SLOWVMLX
 
 define float @t1(float %acc, float %a, float %b) {
 entry:
@@ -15,6 +17,10 @@
 ; A8-LABEL: t1:
 ; A8: vmul.f32
 ; A8: vadd.f32
+
+; SLOWVMLX-LABEL: t1:
+; SLOWVMLX:       vmul.f32
+; SLOWVMLX-NEXT:  vadd.f32
   %0 = fmul float %a, %b
   %1 = fadd float %acc, %0
 	ret float %1
Index: lib/Target/ARM/ARM.td
===================================================================
--- lib/Target/ARM/ARM.td
+++ lib/Target/ARM/ARM.td
@@ -951,6 +951,7 @@
                                                          FeatureVFPOnlySP,
                                                          FeatureD16,
                                                          FeaturePrefLoopAlign32,
+                                                         FeatureHasSlowFPVMLx,
                                                          FeatureHasNoBranchPredictor]>;
 
 def : ProcNoItin<"cortex-m7",                           [ARMv7em,
@@ -966,6 +967,7 @@
                                                          FeatureD16,
                                                          FeatureVFPOnlySP,
                                                          FeaturePrefLoopAlign32,
+                                                         FeatureHasSlowFPVMLx,
                                                          FeatureHasNoBranchPredictor]>;
 
 def : ProcNoItin<"cortex-a32",                           [ARMv8a,


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D52289.166235.patch
Type: text/x-patch
Size: 2048 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180920/df32b76f/attachment.bin>