[llvm] [AArch64] Snap 32 and -32 to 31 and -31 if possible for ccmp and ccmn (PR #150640)

Sat Jul 26 08:10:17 PDT 2025

https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/150640

>From 38b94f879f4a01183153c2039f60faf7dfd4733d Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Fri, 25 Jul 2025 11:41:19 -0400
Subject: [PATCH 1/2] Pre-commit test (NFC)

---
 llvm/test/CodeGen/AArch64/cmp-chains.ll | 92 +++++++++++++++++++++++++
 1 file changed, 92 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/cmp-chains.ll b/llvm/test/CodeGen/AArch64/cmp-chains.ll
index 4b816df75a730..7cf82cb52a48c 100644
--- a/llvm/test/CodeGen/AArch64/cmp-chains.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-chains.ll
@@ -501,3 +501,95 @@ entry:
   %land.ext = zext i1 %0 to i32
   ret i32 %land.ext
 }
+
+define i32 @compare_with_neg_32(i32 %a, i32 %b, i32 %c) {
+; SDISEL-LABEL: compare_with_neg_32:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    cmp w0, w2
+; SDISEL-NEXT:    mov w8, #-32 // =0xffffffe0
+; SDISEL-NEXT:    ccmp w1, w8, #4, lt
+; SDISEL-NEXT:    csel w0, w1, w0, gt
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: compare_with_neg_32:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #-32 // =0xffffffe0
+; GISEL-NEXT:    cmp w0, w2
+; GISEL-NEXT:    ccmp w1, w8, #4, lt
+; GISEL-NEXT:    csel w0, w1, w0, gt
+; GISEL-NEXT:    ret
+  %cmp = icmp sgt i32 %b, -32
+  %cmp1 = icmp slt i32 %a, %c
+  %or.cond = and i1 %cmp, %cmp1
+  %cond = select i1 %or.cond, i32 %b, i32 %a
+  ret i32 %cond
+}
+
+define i32 @compare_with_32(i32 %a, i32 %b, i32 %c) {
+; SDISEL-LABEL: compare_with_32:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    cmp w0, w2
+; SDISEL-NEXT:    mov w8, #32 // =0x20
+; SDISEL-NEXT:    ccmp w1, w8, #0, lt
+; SDISEL-NEXT:    csel w0, w1, w0, lt
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: compare_with_32:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #32 // =0x20
+; GISEL-NEXT:    cmp w0, w2
+; GISEL-NEXT:    ccmp w1, w8, #0, lt
+; GISEL-NEXT:    csel w0, w1, w0, lt
+; GISEL-NEXT:    ret
+  %cmp = icmp slt i32 %b, 32
+  %cmp1 = icmp slt i32 %a, %c
+  %or.cond = and i1 %cmp, %cmp1
+  %cond = select i1 %or.cond, i32 %b, i32 %a
+  ret i32 %cond
+}
+
+define i32 @compare_with_neg_32_unsigned(i32 %a, i32 %b, i32 %c) {
+; SDISEL-LABEL: compare_with_neg_32_unsigned:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    cmp w0, w2
+; SDISEL-NEXT:    mov w8, #-32 // =0xffffffe0
+; SDISEL-NEXT:    ccmp w1, w8, #0, lo
+; SDISEL-NEXT:    csel w0, w1, w0, hi
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: compare_with_neg_32_unsigned:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #-32 // =0xffffffe0
+; GISEL-NEXT:    cmp w0, w2
+; GISEL-NEXT:    ccmp w1, w8, #0, lo
+; GISEL-NEXT:    csel w0, w1, w0, hi
+; GISEL-NEXT:    ret
+  %cmp = icmp ugt i32 %b, -32
+  %cmp1 = icmp ult i32 %a, %c
+  %or.cond = and i1 %cmp, %cmp1
+  %cond = select i1 %or.cond, i32 %b, i32 %a
+  ret i32 %cond
+}
+
+define i32 @compare_with_32_unsigned(i32 %a, i32 %b, i32 %c) {
+; SDISEL-LABEL: compare_with_32_unsigned:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    cmp w0, w2
+; SDISEL-NEXT:    mov w8, #32 // =0x20
+; SDISEL-NEXT:    ccmp w1, w8, #2, lo
+; SDISEL-NEXT:    csel w0, w1, w0, lo
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: compare_with_32_unsigned:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #32 // =0x20
+; GISEL-NEXT:    cmp w0, w2
+; GISEL-NEXT:    ccmp w1, w8, #2, lo
+; GISEL-NEXT:    csel w0, w1, w0, lo
+; GISEL-NEXT:    ret
+  %cmp = icmp ult i32 %b, 32
+  %cmp1 = icmp ult i32 %a, %c
+  %or.cond = and i1 %cmp, %cmp1
+  %cond = select i1 %or.cond, i32 %b, i32 %a
+  ret i32 %cond
+}

>From 0f714b9c0c1617d4ab5dfe1caec0dd82696fadf2 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Fri, 25 Jul 2025 11:42:01 -0400
Subject: [PATCH 2/2] [AArch64] Snap 32 and -32 to 31 and -31 if possible for
 ccmp and ccmn

This lets us encode the immediate in the instruction.
---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 45 +++++++++-
 .../GISel/AArch64InstructionSelector.cpp      | 55 +++++++++++-
 llvm/test/CodeGen/AArch64/cmp-chains.ll       | 84 ++++++-------------
 3 files changed, 118 insertions(+), 66 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7b49754ee7e1f..2c11776323e02 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3646,7 +3646,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
 static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
                                          ISD::CondCode CC, SDValue CCOp,
                                          AArch64CC::CondCode Predicate,
-                                         AArch64CC::CondCode OutCC,
+                                         AArch64CC::CondCode &OutCC,
                                          const SDLoc &DL, SelectionDAG &DAG) {
   unsigned Opcode = 0;
   const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
@@ -3661,7 +3661,48 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
     Opcode = AArch64ISD::FCCMP;
   } else if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(RHS)) {
     APInt Imm = Const->getAPIntValue();
-    if (Imm.isNegative() && Imm.sgt(-32)) {
+    if (Imm.getZExtValue() == 32 && (CC == ISD::SETLT || CC == ISD::SETGE ||
+                                     CC == ISD::SETULT || CC == ISD::SETUGE)) {
+      Opcode = AArch64ISD::CCMP;
+      RHS = DAG.getConstant(31, DL, Const->getValueType(0));
+      switch (CC) {
+      case ISD::SETLT:
+        OutCC = AArch64CC::LE;
+        break;
+      case ISD::SETGE:
+        OutCC = AArch64CC::GT;
+        break;
+      case ISD::SETULT:
+        OutCC = AArch64CC::LS;
+        break;
+      case ISD::SETUGE:
+        OutCC = AArch64CC::HI;
+        break;
+      default:
+        llvm_unreachable("Cannot adjust 32 to 31");
+      }
+    } else if (Imm.getSExtValue() == -32 &&
+               (CC == ISD::SETLE || CC == ISD::SETGT || CC == ISD::SETULE ||
+                CC == ISD::SETUGT)) {
+      Opcode = AArch64ISD::CCMN;
+      RHS = DAG.getConstant(31, DL, Const->getValueType(0));
+      switch (CC) {
+      case ISD::SETLE:
+        OutCC = AArch64CC::LT;
+        break;
+      case ISD::SETGT:
+        OutCC = AArch64CC::GE;
+        break;
+      case ISD::SETULE:
+        OutCC = AArch64CC::LO;
+        break;
+      case ISD::SETUGT:
+        OutCC = AArch64CC::HS;
+        break;
+      default:
+        llvm_unreachable("Cannot adjust -32 to -31");
+      }
+    } else if (Imm.isNegative() && Imm.sgt(-32)) {
       Opcode = AArch64ISD::CCMN;
       RHS = DAG.getConstant(Imm.abs(), DL, Const->getValueType(0));
     }
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 1381a9b70df87..b1a2d9ebe7fc2 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -352,7 +352,7 @@ class AArch64InstructionSelector : public InstructionSelector {
   MachineInstr *emitConditionalComparison(Register LHS, Register RHS,
                                           CmpInst::Predicate CC,
                                           AArch64CC::CondCode Predicate,
-                                          AArch64CC::CondCode OutCC,
+                                          AArch64CC::CondCode &OutCC,
                                           MachineIRBuilder &MIB) const;
   MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,
                                    bool Negate, Register CCOp,
@@ -4868,16 +4868,61 @@ static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
 
 MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
     Register LHS, Register RHS, CmpInst::Predicate CC,
-    AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,
+    AArch64CC::CondCode Predicate, AArch64CC::CondCode &OutCC,
     MachineIRBuilder &MIB) const {
   auto &MRI = *MIB.getMRI();
   LLT OpTy = MRI.getType(LHS);
   unsigned CCmpOpc;
   std::optional<ValueAndVReg> C;
+  bool Adjusted = false;
   if (CmpInst::isIntPredicate(CC)) {
     assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
     C = getIConstantVRegValWithLookThrough(RHS, MRI);
-    if (!C || C->Value.sgt(31) || C->Value.slt(-31))
+    if (!C) {
+      CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
+    } else if (C->Value.getZExtValue() == 32 &&
+               (CC == CmpInst::ICMP_SLT || CC == CmpInst::ICMP_SGE ||
+                CC == CmpInst::ICMP_ULT || CC == CmpInst::ICMP_UGE)) {
+      CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
+      switch (CC) {
+      case CmpInst::ICMP_SLT:
+        OutCC = AArch64CC::LE;
+        break;
+      case CmpInst::ICMP_SGE:
+        OutCC = AArch64CC::GT;
+        break;
+      case CmpInst::ICMP_ULT:
+        OutCC = AArch64CC::LS;
+        break;
+      case CmpInst::ICMP_UGE:
+        OutCC = AArch64CC::HI;
+        break;
+      default:
+        llvm_unreachable("Cannot adjust 32 to 31");
+      }
+      Adjusted = true;
+    } else if (C->Value.getSExtValue() == -32 &&
+               (CC == CmpInst::ICMP_SLE || CC == CmpInst::ICMP_SGT ||
+                CC == CmpInst::ICMP_ULE || CC == CmpInst::ICMP_UGT)) {
+      CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;
+      switch (CC) {
+      case CmpInst::ICMP_SLE:
+        OutCC = AArch64CC::LT;
+        break;
+      case CmpInst::ICMP_SGT:
+        OutCC = AArch64CC::GE;
+        break;
+      case CmpInst::ICMP_ULE:
+        OutCC = AArch64CC::LO;
+        break;
+      case CmpInst::ICMP_UGT:
+        OutCC = AArch64CC::HS;
+        break;
+      default:
+        llvm_unreachable("Cannot adjust -32 to -31");
+      }
+      Adjusted = true;
+    } else if (C->Value.sgt(31) || C->Value.slt(-31))
       CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
     else if (C->Value.ule(31))
       CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
@@ -4905,7 +4950,9 @@ MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
   unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
   auto CCmp =
       MIB.buildInstr(CCmpOpc, {}, {LHS});
-  if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
+  if (Adjusted) {
+    CCmp.addImm(31);
+  } else if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
     CCmp.addImm(C->Value.getZExtValue());
   else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)
     CCmp.addImm(C->Value.abs().getZExtValue());
diff --git a/llvm/test/CodeGen/AArch64/cmp-chains.ll b/llvm/test/CodeGen/AArch64/cmp-chains.ll
index 7cf82cb52a48c..45aa2cc6bf4f4 100644
--- a/llvm/test/CodeGen/AArch64/cmp-chains.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-chains.ll
@@ -503,21 +503,12 @@ entry:
 }
 
 define i32 @compare_with_neg_32(i32 %a, i32 %b, i32 %c) {
-; SDISEL-LABEL: compare_with_neg_32:
-; SDISEL:       // %bb.0:
-; SDISEL-NEXT:    cmp w0, w2
-; SDISEL-NEXT:    mov w8, #-32 // =0xffffffe0
-; SDISEL-NEXT:    ccmp w1, w8, #4, lt
-; SDISEL-NEXT:    csel w0, w1, w0, gt
-; SDISEL-NEXT:    ret
-;
-; GISEL-LABEL: compare_with_neg_32:
-; GISEL:       // %bb.0:
-; GISEL-NEXT:    mov w8, #-32 // =0xffffffe0
-; GISEL-NEXT:    cmp w0, w2
-; GISEL-NEXT:    ccmp w1, w8, #4, lt
-; GISEL-NEXT:    csel w0, w1, w0, gt
-; GISEL-NEXT:    ret
+; CHECK-LABEL: compare_with_neg_32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp w0, w2
+; CHECK-NEXT:    ccmn w1, #31, #8, lt
+; CHECK-NEXT:    csel w0, w1, w0, ge
+; CHECK-NEXT:    ret
   %cmp = icmp sgt i32 %b, -32
   %cmp1 = icmp slt i32 %a, %c
   %or.cond = and i1 %cmp, %cmp1
@@ -526,21 +517,12 @@ define i32 @compare_with_neg_32(i32 %a, i32 %b, i32 %c) {
 }
 
 define i32 @compare_with_32(i32 %a, i32 %b, i32 %c) {
-; SDISEL-LABEL: compare_with_32:
-; SDISEL:       // %bb.0:
-; SDISEL-NEXT:    cmp w0, w2
-; SDISEL-NEXT:    mov w8, #32 // =0x20
-; SDISEL-NEXT:    ccmp w1, w8, #0, lt
-; SDISEL-NEXT:    csel w0, w1, w0, lt
-; SDISEL-NEXT:    ret
-;
-; GISEL-LABEL: compare_with_32:
-; GISEL:       // %bb.0:
-; GISEL-NEXT:    mov w8, #32 // =0x20
-; GISEL-NEXT:    cmp w0, w2
-; GISEL-NEXT:    ccmp w1, w8, #0, lt
-; GISEL-NEXT:    csel w0, w1, w0, lt
-; GISEL-NEXT:    ret
+; CHECK-LABEL: compare_with_32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp w0, w2
+; CHECK-NEXT:    ccmp w1, #31, #0, lt
+; CHECK-NEXT:    csel w0, w1, w0, le
+; CHECK-NEXT:    ret
   %cmp = icmp slt i32 %b, 32
   %cmp1 = icmp slt i32 %a, %c
   %or.cond = and i1 %cmp, %cmp1
@@ -549,21 +531,12 @@ define i32 @compare_with_32(i32 %a, i32 %b, i32 %c) {
 }
 
 define i32 @compare_with_neg_32_unsigned(i32 %a, i32 %b, i32 %c) {
-; SDISEL-LABEL: compare_with_neg_32_unsigned:
-; SDISEL:       // %bb.0:
-; SDISEL-NEXT:    cmp w0, w2
-; SDISEL-NEXT:    mov w8, #-32 // =0xffffffe0
-; SDISEL-NEXT:    ccmp w1, w8, #0, lo
-; SDISEL-NEXT:    csel w0, w1, w0, hi
-; SDISEL-NEXT:    ret
-;
-; GISEL-LABEL: compare_with_neg_32_unsigned:
-; GISEL:       // %bb.0:
-; GISEL-NEXT:    mov w8, #-32 // =0xffffffe0
-; GISEL-NEXT:    cmp w0, w2
-; GISEL-NEXT:    ccmp w1, w8, #0, lo
-; GISEL-NEXT:    csel w0, w1, w0, hi
-; GISEL-NEXT:    ret
+; CHECK-LABEL: compare_with_neg_32_unsigned:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp w0, w2
+; CHECK-NEXT:    ccmn w1, #31, #0, lo
+; CHECK-NEXT:    csel w0, w1, w0, hs
+; CHECK-NEXT:    ret
   %cmp = icmp ugt i32 %b, -32
   %cmp1 = icmp ult i32 %a, %c
   %or.cond = and i1 %cmp, %cmp1
@@ -572,21 +545,12 @@ define i32 @compare_with_neg_32_unsigned(i32 %a, i32 %b, i32 %c) {
 }
 
 define i32 @compare_with_32_unsigned(i32 %a, i32 %b, i32 %c) {
-; SDISEL-LABEL: compare_with_32_unsigned:
-; SDISEL:       // %bb.0:
-; SDISEL-NEXT:    cmp w0, w2
-; SDISEL-NEXT:    mov w8, #32 // =0x20
-; SDISEL-NEXT:    ccmp w1, w8, #2, lo
-; SDISEL-NEXT:    csel w0, w1, w0, lo
-; SDISEL-NEXT:    ret
-;
-; GISEL-LABEL: compare_with_32_unsigned:
-; GISEL:       // %bb.0:
-; GISEL-NEXT:    mov w8, #32 // =0x20
-; GISEL-NEXT:    cmp w0, w2
-; GISEL-NEXT:    ccmp w1, w8, #2, lo
-; GISEL-NEXT:    csel w0, w1, w0, lo
-; GISEL-NEXT:    ret
+; CHECK-LABEL: compare_with_32_unsigned:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp w0, w2
+; CHECK-NEXT:    ccmp w1, #31, #2, lo
+; CHECK-NEXT:    csel w0, w1, w0, ls
+; CHECK-NEXT:    ret
   %cmp = icmp ult i32 %b, 32
   %cmp1 = icmp ult i32 %a, %c
   %or.cond = and i1 %cmp, %cmp1