[llvm] [ARM] Prefer MUL to MULS on some implementations (PR #112540)

Wed Oct 16 06:18:42 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-arm

Author: None (VladiKrapp-Arm)

<details>
<summary>Changes</summary>

MULS adversely affects performance on many implementations. Where this is the case, we prefer not to shrink MUL to MULS.

---
Full diff: https://github.com/llvm/llvm-project/pull/112540.diff


4 Files Affected:

- (modified) llvm/lib/Target/ARM/ARMFeatures.td (+7) 
- (modified) llvm/lib/Target/ARM/ARMProcessors.td (+1) 
- (modified) llvm/lib/Target/ARM/Thumb2SizeReduction.cpp (+3) 
- (modified) llvm/test/CodeGen/Thumb2/avoidmuls.mir (+2-2) 


``````````diff

diff --git a/llvm/lib/Target/ARM/ARMFeatures.td b/llvm/lib/Target/ARM/ARMFeatures.td
index 3a2188adbec33b..bb437698296ce8 100644
--- a/llvm/lib/Target/ARM/ARMFeatures.td
+++ b/llvm/lib/Target/ARM/ARMFeatures.td
@@ -398,6 +398,13 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
                                                "AvoidCPSRPartialUpdate", "true",
                                  "Avoid CPSR partial update for OOO execution">;
 
+/// FeatureAvoidMULS - If true, codegen would avoid using the MULS instruction,
+/// prefering the thumb2 MUL which doesn't set flags.
+def FeatureAvoidMULS : SubtargetFeature<"avoid-muls",
+                                        "AvoidMULS", "true",
+                                 "Avoid MULS instructions for M class cores">;
+
+
 /// Disable +1 predication cost for instructions updating CPSR.
 /// Enabled for Cortex-A57.
 /// True if disable +1 predication cost for instructions updating CPSR. Enabled for Cortex-A57.
diff --git a/llvm/lib/Target/ARM/ARMProcessors.td b/llvm/lib/Target/ARM/ARMProcessors.td
index 08f62d12f4a9f1..b94a5fc1614697 100644
--- a/llvm/lib/Target/ARM/ARMProcessors.td
+++ b/llvm/lib/Target/ARM/ARMProcessors.td
@@ -360,6 +360,7 @@ def : ProcessorModel<"cortex-m33", CortexM4Model,       [ARMv8mMainline,
                                                          FeatureHasSlowFPVFMx,
                                                          FeatureUseMISched,
                                                          FeatureHasNoBranchPredictor,
+                                                         FeatureAvoidMULS,
                                                          FeatureFixCMSE_CVE_2021_35465]>;
 
 def : ProcessorModel<"star-mc1", CortexM4Model,         [ARMv8mMainline,
diff --git a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index f572af98600738..f4a9915a78b99d 100644
--- a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -755,6 +755,9 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
   Register Reg1 = MI->getOperand(1).getReg();
   // t2MUL is "special". The tied source operand is second, not first.
   if (MI->getOpcode() == ARM::t2MUL) {
+    // MULS can be slower than MUL
+    if (!MinimizeSize && STI->avoidMULS())
+      return false;
     Register Reg2 = MI->getOperand(2).getReg();
     // Early exit if the regs aren't all low regs.
     if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1)
diff --git a/llvm/test/CodeGen/Thumb2/avoidmuls.mir b/llvm/test/CodeGen/Thumb2/avoidmuls.mir
index 8d5567482d5cd7..09b7e62bee04e3 100644
--- a/llvm/test/CodeGen/Thumb2/avoidmuls.mir
+++ b/llvm/test/CodeGen/Thumb2/avoidmuls.mir
@@ -63,5 +63,5 @@ body:             |
 
 ...
 # CHECK-LABEL: test
-# CHECK: tMUL
-# CHECK-NOT: t2MUL
+# CHECK: t2MUL
+# CHECK-NOT: tMUL

``````````

</details>


https://github.com/llvm/llvm-project/pull/112540