[llvm] [ARM] Update costs for ARM insts (PR #142843)

Thu Jun 5 07:48:37 PDT 2025

https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/142843

>From b10ff0e680f6cf3460d4ad46e461c0413343c93d Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Wed, 4 Jun 2025 14:04:49 -0400
Subject: [PATCH] Update costs for ARM insts

Things that can be encoded in an immediate should not have a cost.
---
 .../lib/Target/ARM/ARMTargetTransformInfo.cpp | 88 ++++++++++++++++---
 llvm/test/CodeGen/ARM/ssat.ll                 | 16 ++--
 2 files changed, 84 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 6c3a1ae7e1775..1a054dd228149 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -410,6 +410,22 @@ static bool isFPSatMinMaxPattern(Instruction *Inst, const APInt &Imm) {
   return isa<FPToSIInst>(FP);
 }
 
+/// isLegalICmpImmediate - Return true if the specified immediate is legal
+/// icmp immediate, that is the target has icmp instructions which can compare
+/// a register against the immediate without having to materialize the
+/// immediate into a register.
+static bool isLegalCmpImmed(int64_t Imm, const ARMSubtarget *Subtarget) {
+  // Thumb2 and ARM modes can use cmn for negative immediates.
+  if (!Subtarget->isThumb())
+    return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 ||
+           ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1;
+  if (Subtarget->isThumb2())
+    return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 ||
+           ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
+  // Thumb1 doesn't have cmn, and only 8-bit immediates.
+  return Imm >= 0 && Imm <= 255;
+}
+
 InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
                                               const APInt &Imm, Type *Ty,
                                               TTI::TargetCostKind CostKind,
@@ -428,6 +444,13 @@ InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
   if (Opcode == Instruction::GetElementPtr && Idx != 0)
     return 0;
 
+  if ((Opcode == Instruction::Shl || Opcode == Instruction::LShr ||
+       Opcode == Instruction::AShr) &&
+      Idx == 1) {
+    // Shifts are free (are we really going to get a shift of more than 64)?
+    return 0;
+  }
+
   if (Opcode == Instruction::And) {
     // UXTB/UXTH
     if (Imm == 255 || Imm == 65535)
@@ -437,19 +460,41 @@ InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
                     getIntImmCost(~Imm, Ty, CostKind));
   }
 
-  if (Opcode == Instruction::Add)
+  if (Opcode == Instruction::Add || Opcode == Instruction::Sub) {
+    if (Ty->getIntegerBitWidth() <= 32) {
+      int64_t ImmVal = Imm.getSExtValue();
+      if (!ST->isThumb())
+        if (ARM_AM::getSOImmVal((uint32_t)ImmVal) != -1 ||
+            ARM_AM::getSOImmVal(-(uint32_t)ImmVal) != -1)
+          return 0;
+      if (ST->isThumb2())
+        if (ARM_AM::getT2SOImmVal((uint32_t)ImmVal) != -1 ||
+            ARM_AM::getT2SOImmVal(-(uint32_t)ImmVal) != -1)
+          return 0;
+      // Thumb1 doesn't have cmn, and only 8-bit immediates.
+      ImmVal = ImmVal < 0 ? -ImmVal : ImmVal;
+      if (ImmVal >= 0 && ImmVal <= 255)
+        return 0;
+    }
+
     // Conversion to SUB is free, and means we can use -Imm instead.
     return std::min(getIntImmCost(Imm, Ty, CostKind),
                     getIntImmCost(-Imm, Ty, CostKind));
+  }
 
-  if (Opcode == Instruction::ICmp && Imm.isNegative() &&
-      Ty->getIntegerBitWidth() == 32) {
-    int64_t NegImm = -Imm.getSExtValue();
-    if (ST->isThumb2() && NegImm < 1<<12)
-      // icmp X, #-C -> cmn X, #C
-      return 0;
-    if (ST->isThumb() && NegImm < 1<<8)
-      // icmp X, #-C -> adds X, #C
+  if (Opcode == Instruction::ICmp && Ty->getIntegerBitWidth() < 64) {
+    int64_t ImmVal = Imm.getSExtValue();
+    if (!ST->isThumb())
+      if (ARM_AM::getSOImmVal((uint32_t)ImmVal) != -1 ||
+          ARM_AM::getSOImmVal(-(uint32_t)ImmVal) != -1)
+        return 0;
+    if (ST->isThumb2())
+      if (ARM_AM::getT2SOImmVal((uint32_t)ImmVal) != -1 ||
+          ARM_AM::getT2SOImmVal(-(uint32_t)ImmVal) != -1)
+        return 0;
+    // Thumb1 doesn't have cmn, and only 8-bit immediates.
+    ImmVal = ImmVal < 0 ? -ImmVal : ImmVal;
+    if (ImmVal >= 0 && ImmVal <= 255)
       return 0;
   }
 
@@ -470,12 +515,31 @@ InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
   if (Inst && ST->hasVFP2Base() && isFPSatMinMaxPattern(Inst, Imm))
     return 0;
 
-  // We can convert <= -1 to < 0, which is generally quite cheap.
-  if (Inst && Opcode == Instruction::ICmp && Idx == 1 && Imm.isAllOnes()) {
+  // We can convert <= to <, which is generally quite cheap.
+  if (Inst && Opcode == Instruction::ICmp && Idx == 1 &&
+      ((Ty->getIntegerBitWidth() <= 32  &&
+        (!isLegalCmpImmed(Imm.getSExtValue(), ST))) ||
+       Imm.isAllOnes() || Imm.isOne())) {
     ICmpInst::Predicate Pred = cast<ICmpInst>(Inst)->getPredicate();
-    if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE)
+    if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE) &&
+        !Imm.isMaxSignedValue())
+      return std::min(getIntImmCost(Imm, Ty, CostKind),
+                      getIntImmCost(Imm + 1, Ty, CostKind));
+
+    if ((Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE) &&
+        !Imm.isAllOnes())
       return std::min(getIntImmCost(Imm, Ty, CostKind),
                       getIntImmCost(Imm + 1, Ty, CostKind));
+
+    if ((Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) &&
+        !Imm.isMinSignedValue())
+      return std::min(getIntImmCost(Imm, Ty, CostKind),
+                      getIntImmCost(Imm - 1, Ty, CostKind));
+
+    if ((Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE) &&
+        !Imm.isZero())
+      return std::min(getIntImmCost(Imm, Ty, CostKind),
+                      getIntImmCost(Imm - 1, Ty, CostKind));
   }
 
   return getIntImmCost(Imm, Ty, CostKind);
diff --git a/llvm/test/CodeGen/ARM/ssat.ll b/llvm/test/CodeGen/ARM/ssat.ll
index ed777f2b1882b..175ed4db1c49e 100644
--- a/llvm/test/CodeGen/ARM/ssat.ll
+++ b/llvm/test/CodeGen/ARM/ssat.ll
@@ -387,15 +387,14 @@ entry:
 }
 
 ; Lower constant is different in the select and in the compare
+; FIXME: 0xff800001 can be constructed with mov r2, 0x7f, ror 6; or r2, r2, 0xe0, ror 14
 define i32 @no_sat_incorrect_constant(i32 %x) #0 {
 ; V4T-LABEL: no_sat_incorrect_constant:
 ; V4T:       @ %bb.0: @ %entry
-; V4T-NEXT:    mov r1, #1065353216
+; V4T-NEXT:    ldr r2, .LCPI11_0
 ; V4T-NEXT:    cmn r0, #8388608
-; V4T-NEXT:    orr r1, r1, #-1073741824
-; V4T-NEXT:    mov r2, r0
-; V4T-NEXT:    orrlt r2, r1, #1
-; V4T-NEXT:    ldr r1, .LCPI11_0
+; V4T-NEXT:    movge r2, r0
+; V4T-NEXT:    ldr r1, .LCPI11_1
 ; V4T-NEXT:    cmp r0, #8388608
 ; V4T-NEXT:    movlt r1, r2
 ; V4T-NEXT:    mov r0, r1
@@ -403,15 +402,16 @@ define i32 @no_sat_incorrect_constant(i32 %x) #0 {
 ; V4T-NEXT:    .p2align 2
 ; V4T-NEXT:  @ %bb.1:
 ; V4T-NEXT:  .LCPI11_0:
+; V4T-NEXT:    .long 4286578689 @ 0xff800001
+; V4T-NEXT:  .LCPI11_1:
 ; V4T-NEXT:    .long 8388607 @ 0x7fffff
 ;
 ; V6T2-LABEL: no_sat_incorrect_constant:
 ; V6T2:       @ %bb.0: @ %entry
-; V6T2-NEXT:    movw r2, #0
 ; V6T2-NEXT:    cmn r0, #8388608
 ; V6T2-NEXT:    mov r1, r0
-; V6T2-NEXT:    movt r2, #65408
-; V6T2-NEXT:    orrlt r1, r2, #1
+; V6T2-NEXT:    movwlt r1, #1
+; V6T2-NEXT:    movtlt r1, #65408
 ; V6T2-NEXT:    cmp r0, #8388608
 ; V6T2-NEXT:    movwge r1, #65535
 ; V6T2-NEXT:    movtge r1, #127