[llvm] ea796e5 - [ARM] Prefer MUL to MULS on some implementations (#112540)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 17 05:53:27 PDT 2024
Author: VladiKrapp-Arm
Date: 2024-10-17T13:53:22+01:00
New Revision: ea796e5237afbbef396b21ac04d4f32557c8db61
URL: https://github.com/llvm/llvm-project/commit/ea796e5237afbbef396b21ac04d4f32557c8db61
DIFF: https://github.com/llvm/llvm-project/commit/ea796e5237afbbef396b21ac04d4f32557c8db61.diff
LOG: [ARM] Prefer MUL to MULS on some implementations (#112540)
MULS adversely affects performance on many implementations. Where this
is the case, we prefer not to shrink MUL to MULS.
Added:
Modified:
llvm/lib/Target/ARM/ARMFeatures.td
llvm/lib/Target/ARM/ARMProcessors.td
llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
llvm/test/CodeGen/Thumb2/avoidmuls.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMFeatures.td b/llvm/lib/Target/ARM/ARMFeatures.td
index 3a2188adbec33b..bb437698296ce8 100644
--- a/llvm/lib/Target/ARM/ARMFeatures.td
+++ b/llvm/lib/Target/ARM/ARMFeatures.td
@@ -398,6 +398,13 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
"AvoidCPSRPartialUpdate", "true",
"Avoid CPSR partial update for OOO execution">;
+/// FeatureAvoidMULS - If true, codegen would avoid using the MULS instruction,
+/// prefering the thumb2 MUL which doesn't set flags.
+def FeatureAvoidMULS : SubtargetFeature<"avoid-muls",
+ "AvoidMULS", "true",
+ "Avoid MULS instructions for M class cores">;
+
+
/// Disable +1 predication cost for instructions updating CPSR.
/// Enabled for Cortex-A57.
/// True if disable +1 predication cost for instructions updating CPSR. Enabled for Cortex-A57.
diff --git a/llvm/lib/Target/ARM/ARMProcessors.td b/llvm/lib/Target/ARM/ARMProcessors.td
index 08f62d12f4a9f1..b94a5fc1614697 100644
--- a/llvm/lib/Target/ARM/ARMProcessors.td
+++ b/llvm/lib/Target/ARM/ARMProcessors.td
@@ -360,6 +360,7 @@ def : ProcessorModel<"cortex-m33", CortexM4Model, [ARMv8mMainline,
FeatureHasSlowFPVFMx,
FeatureUseMISched,
FeatureHasNoBranchPredictor,
+ FeatureAvoidMULS,
FeatureFixCMSE_CVE_2021_35465]>;
def : ProcessorModel<"star-mc1", CortexM4Model, [ARMv8mMainline,
diff --git a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index f572af98600738..f4a9915a78b99d 100644
--- a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -755,6 +755,9 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
Register Reg1 = MI->getOperand(1).getReg();
// t2MUL is "special". The tied source operand is second, not first.
if (MI->getOpcode() == ARM::t2MUL) {
+ // MULS can be slower than MUL
+ if (!MinimizeSize && STI->avoidMULS())
+ return false;
Register Reg2 = MI->getOperand(2).getReg();
// Early exit if the regs aren't all low regs.
if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1)
diff --git a/llvm/test/CodeGen/Thumb2/avoidmuls.mir b/llvm/test/CodeGen/Thumb2/avoidmuls.mir
index 8d5567482d5cd7..865152068fdf7f 100644
--- a/llvm/test/CodeGen/Thumb2/avoidmuls.mir
+++ b/llvm/test/CodeGen/Thumb2/avoidmuls.mir
@@ -1,67 +1,20 @@
-# RUN: llc -run-pass=thumb2-reduce-size %s -o - | FileCheck %s
+# RUN: llc -mtriple=thumbv7m-none-eabi -mcpu=cortex-m33 -run-pass=thumb2-reduce-size %s -o - | FileCheck %s --check-prefix=MUL
+# RUN: llc -mtriple=thumbv7m-none-eabi --run-pass=thumb2-reduce-size %s -o - | FileCheck %s --check-prefix=MULS
---- |
- target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
- target triple = "thumbv8m.main-arm-none-eabi"
-
- ; Function Attrs: norecurse nounwind readnone
- define i32 @test(i32 %x, i32 %y) local_unnamed_addr #0 {
- entry:
- %cmp6 = icmp sgt i32 %y, 0
- br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
-
- for.body.preheader: ; preds = %entry
- br label %for.body
-
- for.cond.cleanup: ; preds = %for.body, %entry
- %sum.0.lcssa = phi i32 [ 1, %entry ], [ %mul, %for.body ]
- ret i32 %sum.0.lcssa
-
- for.body: ; preds = %for.body, %for.body.preheader
- %lsr.iv1 = phi i32 [ %lsr.iv.next2, %for.body ], [ %x, %for.body.preheader ]
- %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ %y, %for.body.preheader ]
- %sum.07 = phi i32 [ %mul, %for.body ], [ 1, %for.body.preheader ]
- %mul = mul nsw i32 %lsr.iv1, %sum.07
- %lsr.iv.next = add i32 %lsr.iv, -1
- %lsr.iv.next2 = add i32 %lsr.iv1, 1
- %exitcond = icmp eq i32 %lsr.iv.next, 0
- br i1 %exitcond, label %for.cond.cleanup, label %for.body
- }
-
- attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m33" "target-features"="-d32,+dsp,+fp-armv8,-fp64,+hwdiv,+strict-align,+thumb-mode,-crc,-dotprod,-hwdiv-arm,-ras" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-...
---
name: test
-tracksRegLiveness: true
-liveins:
- - { reg: '$r0', virtual-reg: '' }
- - { reg: '$r1', virtual-reg: '' }
body: |
- bb.0.entry:
- successors: %bb.1.for.body, %bb.2.for.cond.cleanup
- liveins: $r0, $r1
-
+ bb.0:
$r2 = tMOVr $r0, 14, _
$r0 = t2MOVi 1, 14, _, _
- t2CMPri $r1, 1, 14, _, implicit-def $cpsr
- t2Bcc %bb.2.for.cond.cleanup, 11, killed $cpsr
-
- bb.1.for.body:
- successors: %bb.2.for.cond.cleanup, %bb.1.for.body
- liveins: $r0, $r1, $r2
-
$r0 = t2MUL $r2, killed $r0, 14, _
- $r2 = t2ADDri killed $r2, 1, 14, _, _
- $r1 = t2SUBri killed $r1, 1, 14, _, def $cpsr
- t2Bcc %bb.1.for.body, 1, killed $cpsr
-
- bb.2.for.cond.cleanup:
- liveins: $r0
-
tBX_RET 14, _, implicit $r0
...
-# CHECK-LABEL: test
-# CHECK: tMUL
-# CHECK-NOT: t2MUL
+# MUL-LABEL: test
+# MUL: t2MUL
+# MUL-NOT: tMUL
+
+# MULS-LABEL: test
+# MULS: tMUL
+# MULS-NOT: t2MUL
\ No newline at end of file
More information about the llvm-commits
mailing list