[llvm] [ARM][AArch64] Allow the CSE to take into consideration uses of the carry and overflow flags in ARM and AArch64 (PR #150803)

via llvm-commits llvm-commits at lists.llvm.org
Sat Jul 26 18:03:53 PDT 2025


https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/150803

>From dc83f973095521bb06350c58a88e6b2e88f08380 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sat, 26 Jul 2025 18:01:24 -0400
Subject: [PATCH] [ARM][AArch64] Allow the CSE to take into consideration uses
 of the carry and overflow flags in ARM and AArch64

On both of these platforms, we know that the cmp will not stomp on these flags and overwrite them if doing so would be poison, or in ANDS case, it will always have the V flag cleared during an ANDS.
---
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp  |  48 +++++++-
 llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp      |  30 ++++-
 llvm/test/CodeGen/AArch64/aarch64-icmp-opt.ll | 104 ++++++++++++++++++
 3 files changed, 177 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 8685d7a04ac9c..feb7d969563a7 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1745,8 +1745,24 @@ static unsigned sForm(MachineInstr &Instr) {
     return AArch64::SBCSXr;
   case AArch64::ANDWri:
     return AArch64::ANDSWri;
+  case AArch64::ANDWrr:
+    return AArch64::ANDSWrr;
+  case AArch64::ANDWrs:
+    return AArch64::ANDSWrs;
+  case AArch64::BICWrr:
+    return AArch64::BICSWrr;
+  case AArch64::BICWrs:
+    return AArch64::BICSWrs;
   case AArch64::ANDXri:
     return AArch64::ANDSXri;
+  case AArch64::ANDXrr:
+    return AArch64::ANDSXrr;
+  case AArch64::ANDXrs:
+    return AArch64::ANDSXrs;
+  case AArch64::BICXrr:
+    return AArch64::BICSXrr;
+  case AArch64::BICXrs:
+    return AArch64::BICSXrs;
   }
 }
 
@@ -1884,6 +1900,23 @@ static bool isSUBSRegImm(unsigned Opcode) {
   return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
 }
 
+static bool isANDSOpcode(MachineInstr &MI) {
+  switch (sForm(MI)) {
+  case AArch64::ANDSWri:
+  case AArch64::ANDSWrr:
+  case AArch64::ANDSWrs:
+  case AArch64::ANDSXri:
+  case AArch64::ANDSXrr:
+  case AArch64::ANDSXrs:
+  case AArch64::BICSWrr:
+  case AArch64::BICSWrs:
+  case AArch64::BICSXrr:
+  case AArch64::BICSXrs:
+    return true;
+  default:
+    return false;
+}
+
 /// Check if CmpInstr can be substituted by MI.
 ///
 /// CmpInstr can be substituted:
@@ -1912,7 +1945,17 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
          "Caller guarantees that CmpInstr compares with constant 0");
 
   std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
-  if (!NZVCUsed || NZVCUsed->C)
+  if (!NZVCUsed)
+    return false;
+
+  // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
+  // '%vreg = add ...' or '%vreg = sub ...'.
+  // Condition flag C is used to indicate unsigned overflow.
+  // 1) MI and CmpInstr set N and C to the same value if Cmp is an adds
+  // 2) ADDS x, 0, always sets C to 0.
+  // In practice we should not really get here, as an unsigned comparison with 0
+  // should have been optimized out anyway, but just in case.
+  if (NZVCUsed->C && !isADDSRegImm(CmpOpcode))
     return false;
 
   // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
@@ -1921,7 +1964,8 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
   // 1) MI and CmpInstr set N and V to the same value.
   // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
   //    signed overflow occurs, so CmpInstr could still be simplified away.
-  if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
+  // 3) ANDS also always sets V to 0.
+  if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap) && !isANDSOpcode(MI))
     return false;
 
   AccessKind AccessToCheck = AK_Write;
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 50217c3a047df..e48703b1285e8 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -3089,17 +3089,41 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
           break;
         case ARMCC::HS: // C
         case ARMCC::LO: // C
-        case ARMCC::VS: // V
-        case ARMCC::VC: // V
         case ARMCC::HI: // C Z
         case ARMCC::LS: // C Z
+          // The instruction uses the C bit which is not safe.
+          return false;
+        case ARMCC::VS: // V
+        case ARMCC::VC: // V
         case ARMCC::GE: // N V
         case ARMCC::LT: // N V
         case ARMCC::GT: // Z N V
         case ARMCC::LE: // Z N V
-          // The instruction uses the V bit or C bit which is not safe.
+        {
+          // We MAY be able to do this if signed overflow is
+          // poison.
+
+          if (I->getFlag(MachineInstr::NoSWrap)) {
+            // Only adds and subs can set the V bit.
+            unsigned Opc = I->getOpcode();
+            bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
+                         Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
+                         Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
+                         Opc == ARM::tSUBi8;
+
+            bool IsAdd = Opc == ARM::ADDrr || Opc == ARM::t2ADDrr ||
+                         Opc == ARM::ADDri || Opc == ARM::t2ADDri ||
+                         Opc == ARM::tADDrr || Opc == ARM::tADDi3 ||
+                         Opc == ARM::tADDi8;
+
+            if (IsSub || IsAdd)
+              break;
+          }
+
+          // The instruction uses the V bit which is not safe.
           return false;
         }
+        }
       }
     }
   }
diff --git a/llvm/test/CodeGen/AArch64/aarch64-icmp-opt.ll b/llvm/test/CodeGen/AArch64/aarch64-icmp-opt.ll
index c24ef372a5907..57ef72959a21a 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-icmp-opt.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-icmp-opt.ll
@@ -110,7 +110,111 @@ define i32 @add_i32(i32 %0, i32 %1) {
   ret i32 %10
 }
 
+define i64 @and_i64(i64 %0, i64 %1) {
+; CHECK-LABEL: and_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ands x0, x1, x0
+; CHECK-NEXT:    b.le .LBB4_2
+; CHECK-NEXT:  // %bb.1:
+; CHECK-NEXT:    b _Z2f4l
+; CHECK-NEXT:  .LBB4_2:
+; CHECK-NEXT:    b _Z2f3l
+  %3 = and i64 %1, %0
+  %4 = icmp slt i64 %3, 1
+  br i1 %4, label %5, label %7
+
+5:
+  %6 = tail call i64 @_Z2f3l(i64 %3)
+  br label %9
+
+7:
+  %8 = tail call i64 @_Z2f4l(i64 %3)
+  br label %9
+
+9:
+  %10 = phi i64 [ %6, %5 ], [ %8, %7 ]
+  ret i64 %10
+}
 
+define i32 @and_i32(i32 %0, i32 %1) {
+; CHECK-LABEL: and_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ands w0, w1, w0
+; CHECK-NEXT:    b.le .LBB5_2
+; CHECK-NEXT:  // %bb.1:
+; CHECK-NEXT:    b _Z2f4l
+; CHECK-NEXT:  .LBB5_2:
+; CHECK-NEXT:    b _Z2f3l
+  %3 = and i32 %1, %0
+  %4 = icmp slt i32 %3, 1
+  br i1 %4, label %5, label %7
+
+5:
+  %6 = tail call i32 @_Z2f3l(i32 %3)
+  br label %9
+
+7:
+  %8 = tail call i32 @_Z2f4l(i32 %3)
+  br label %9
+
+9:
+  %10 = phi i32 [ %6, %5 ], [ %8, %7 ]
+  ret i32 %10
+}
+
+define i64 @and_i64_freeze(i64 %0, i64 %1) {
+; CHECK-LABEL: and_i64_freeze:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ands x0, x1, x0
+; CHECK-NEXT:    b.le .LBB6_2
+; CHECK-NEXT:  // %bb.1:
+; CHECK-NEXT:    b _Z2f4l
+; CHECK-NEXT:  .LBB6_2:
+; CHECK-NEXT:    b _Z2f3l
+  %3 = and i64 %1, %0
+  %freeze = freeze i64 %3
+  %4 = icmp slt i64 %3, 1
+  br i1 %4, label %5, label %7
+
+5:
+  %6 = tail call i64 @_Z2f3l(i64 %freeze)
+  br label %9
+
+7:
+  %8 = tail call i64 @_Z2f4l(i64 %freeze)
+  br label %9
+
+9:
+  %10 = phi i64 [ %6, %5 ], [ %8, %7 ]
+  ret i64 %10
+}
+
+define i32 @and_i32_freeze(i32 %0, i32 %1) {
+; CHECK-LABEL: and_i32_freeze:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ands w0, w1, w0
+; CHECK-NEXT:    b.le .LBB7_2
+; CHECK-NEXT:  // %bb.1:
+; CHECK-NEXT:    b _Z2f4l
+; CHECK-NEXT:  .LBB7_2:
+; CHECK-NEXT:    b _Z2f3l
+  %3 = and i32 %1, %0
+  %freeze = freeze i32 %3
+  %4 = icmp slt i32 %freeze, 1
+  br i1 %4, label %5, label %7
+
+5:
+  %6 = tail call i32 @_Z2f3l(i32 %freeze)
+  br label %9
+
+7:
+  %8 = tail call i32 @_Z2f4l(i32 %freeze)
+  br label %9
+
+9:
+  %10 = phi i32 [ %6, %5 ], [ %8, %7 ]
+  ret i32 %10
+}
 
 declare i32 @_Z2f1i(i32)
 declare i32 @_Z2f2i(i32)



More information about the llvm-commits mailing list