[llvm] 6b19ccd - [AArch64] Simplify some masked integer comparisons. (#153783)

Tue Sep 30 02:17:11 PDT 2025

Author: Ricardo Jesus
Date: 2025-09-30T10:17:07+01:00
New Revision: 6b19ccdf64a0022b9665b61e4003b4e87643829b

URL: https://github.com/llvm/llvm-project/commit/6b19ccdf64a0022b9665b61e4003b4e87643829b
DIFF: https://github.com/llvm/llvm-project/commit/6b19ccdf64a0022b9665b61e4003b4e87643829b.diff

LOG: [AArch64] Simplify some masked integer comparisons. (#153783)

Specifically, `X & M ?= C --> (C << clz(M)) ?= (X << clz(M))` where M is
a non-empty sequence of ones starting at the least significant bit with
the remainder zero and C is a constant subset of M that cannot be
materialised into a SUBS (immediate). Proof:
https://alive2.llvm.org/ce/z/haqdJ4.

This improves the comparison in isinf, for example:
```cpp
int isinf(float x) {
  return __builtin_isinf(x);
}
```

Before:
```
isinf:
  fmov    w9, s0
  mov     w8, #2139095040
  and     w9, w9, #0x7fffffff
  cmp     w9, w8
  cset    w0, eq
  ret
```

After:
```
isinf:
  fmov    w9, s0
  mov     w8, #-16777216
  cmp     w8, w9, lsl #1
  cset    w0, eq
  ret
```

Added: 
    llvm/test/CodeGen/AArch64/masked-integer-compare.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/isinf.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 899baa9c998ec..9078675da0e95 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -25512,6 +25512,32 @@ SDValue performCONDCombine(SDNode *N,
                                              CmpIndex, CC))
     return Val;
 
+  // X & M ?= C --> (C << clz(M)) ?= (X << clz(M)) where M is a non-empty
+  // sequence of ones starting at the least significant bit with the remainder
+  // zero and C is a constant s.t. (C & ~M) == 0 that cannot be materialised
+  // into a SUBS (immediate). The transformed form can be matched into a SUBS
+  // (shifted register).
+  if ((CC == AArch64CC::EQ || CC == AArch64CC::NE) && AndNode->hasOneUse() &&
+      isa<ConstantSDNode>(AndNode->getOperand(1)) &&
+      isa<ConstantSDNode>(SubsNode->getOperand(1))) {
+    SDValue X = AndNode->getOperand(0);
+    APInt M = AndNode->getConstantOperandAPInt(1);
+    APInt C = SubsNode->getConstantOperandAPInt(1);
+
+    if (M.isMask() && C.isSubsetOf(M) && !isLegalArithImmed(C.getZExtValue())) {
+      SDLoc DL(SubsNode);
+      EVT VT = SubsNode->getValueType(0);
+      unsigned ShiftAmt = M.countl_zero();
+      SDValue ShiftedX = DAG.getNode(
+          ISD::SHL, DL, VT, X, DAG.getShiftAmountConstant(ShiftAmt, VT, DL));
+      SDValue ShiftedC = DAG.getConstant(C << ShiftAmt, DL, VT);
+      SDValue NewSubs = DAG.getNode(AArch64ISD::SUBS, DL, SubsNode->getVTList(),
+                                    ShiftedC, ShiftedX);
+      DCI.CombineTo(SubsNode, NewSubs, NewSubs.getValue(1));
+      return SDValue(N, 0);
+    }
+  }
+
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
     uint32_t CNV = CN->getZExtValue();
     if (CNV == 255)

diff  --git a/llvm/test/CodeGen/AArch64/isinf.ll b/llvm/test/CodeGen/AArch64/isinf.ll
index e68539bcf07d9..e8bbaf96395f0 100644
--- a/llvm/test/CodeGen/AArch64/isinf.ll
+++ b/llvm/test/CodeGen/AArch64/isinf.ll
@@ -27,9 +27,8 @@ define i32 @replace_isinf_call_f32(float %x) {
 ; CHECK-LABEL: replace_isinf_call_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov w9, s0
-; CHECK-NEXT:    mov w8, #2139095040 // =0x7f800000
-; CHECK-NEXT:    and w9, w9, #0x7fffffff
-; CHECK-NEXT:    cmp w9, w8
+; CHECK-NEXT:    mov w8, #-16777216 // =0xff000000
+; CHECK-NEXT:    cmp w8, w9, lsl #1
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
   %abs = tail call float @llvm.fabs.f32(float %x)
@@ -43,9 +42,8 @@ define i32 @replace_isinf_call_f64(double %x) {
 ; CHECK-LABEL: replace_isinf_call_f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov x9, d0
-; CHECK-NEXT:    mov x8, #9218868437227405312 // =0x7ff0000000000000
-; CHECK-NEXT:    and x9, x9, #0x7fffffffffffffff
-; CHECK-NEXT:    cmp x9, x8
+; CHECK-NEXT:    mov x8, #-9007199254740992 // =0xffe0000000000000
+; CHECK-NEXT:    cmp x8, x9, lsl #1
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
   %abs = tail call double @llvm.fabs.f64(double %x)

diff  --git a/llvm/test/CodeGen/AArch64/masked-integer-compare.ll b/llvm/test/CodeGen/AArch64/masked-integer-compare.ll
new file mode 100644
index 0000000000000..363cd10c78a94
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/masked-integer-compare.ll
@@ -0,0 +1,178 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -o -| FileCheck %s
+
+; Test code generation support for SUBS (shifted register) from masked integer
+; compare sequences. These sequences appear in isinf tests, for example.
+
+define i1 @combine_masked_i32(i32 %x) {
+; CHECK-LABEL: combine_masked_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-16777216 // =0xff000000
+; CHECK-NEXT:    cmp w8, w0, lsl #1
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %and = and i32 %x, u0x7fffffff
+  %sub = sub i32 %and, u0x7f800000
+  %cmp = icmp eq i32 %sub, 0
+  ret i1 %cmp
+}
+
+define i1 @combine_masked_i64(i64 %x) {
+; CHECK-LABEL: combine_masked_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-9007199254740992 // =0xffe0000000000000
+; CHECK-NEXT:    cmp x8, x0, lsl #1
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %and = and i64 %x, u0x7fffffffffffffff
+  %sub = sub i64 %and, u0x7ff0000000000000
+  %cmp = icmp eq i64 %sub, 0
+  ret i1 %cmp
+}
+
+define i1 @combine_masked_ne(i32 %x) {
+; CHECK-LABEL: combine_masked_ne:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-16777216 // =0xff000000
+; CHECK-NEXT:    cmp w8, w0, lsl #1
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    ret
+  %and = and i32 %x, u0x7fffffff
+  %cmp = icmp ne i32 %and, u0x7f800000
+  ret i1 %cmp
+}
+
+define i1 @combine_masked_lsl4(i32 %x) {
+; CHECK-LABEL: combine_masked_lsl4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-134217728 // =0xf8000000
+; CHECK-NEXT:    cmp w8, w0, lsl #4
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %and = and i32 %x, u0x0fffffff
+  %cmp = icmp eq i32 %and, u0x0f800000
+  ret i1 %cmp
+}
+
+define i1 @dont_combine_not_mask(i32 %x) {
+; CHECK-LABEL: dont_combine_not_mask:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #2139095040 // =0x7f800000
+; CHECK-NEXT:    and w9, w0, #0x7ffffffe
+; CHECK-NEXT:    cmp w9, w8
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %and = and i32 %x, u0x7ffffffe
+  %cmp = icmp eq i32 %and, u0x7f800000
+  ret i1 %cmp
+}
+
+define i1 @dont_combine_cmp_not_masked(i32 %x) {
+; CHECK-LABEL: dont_combine_cmp_not_masked:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #2139095040 // =0x7f800000
+; CHECK-NEXT:    and w9, w0, #0x3fffffff
+; CHECK-NEXT:    cmp w9, w8
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %and = and i32 %x, u0x3fffffff
+  %cmp = icmp eq i32 %and, u0x7f800000
+  ret i1 %cmp
+}
+
+define i1 @dont_combine_not_constant_mask(i32 %x, i32 %m) {
+; CHECK-LABEL: dont_combine_not_constant_mask:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #2139095040 // =0x7f800000
+; CHECK-NEXT:    and w9, w0, w1
+; CHECK-NEXT:    cmp w9, w8
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %and = and i32 %x, %m
+  %cmp = icmp eq i32 %and, u0x7f800000
+  ret i1 %cmp
+}
+
+define i1 @dont_combine_not_constant_cmp(i32 %x, i32 %c) {
+; CHECK-LABEL: dont_combine_not_constant_cmp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w8, w0, #0xfffffff
+; CHECK-NEXT:    cmp w8, w1
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %and = and i32 %x, u0x0fffffff
+  %cmp = icmp eq i32 %and, %c
+  ret i1 %cmp
+}
+
+define i1 @dont_combine_subs_imm(i32 %x) {
+; CHECK-LABEL: dont_combine_subs_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w8, w0, #0x7fffffff
+; CHECK-NEXT:    cmp w8, #291
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %and = and i32 %x, u0x7fffffff
+  %cmp = icmp eq i32 %and, u0x123
+  ret i1 %cmp
+}
+
+define i1 @dont_combine_subs_imm_lsl12(i32 %x) {
+; CHECK-LABEL: dont_combine_subs_imm_lsl12:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w8, w0, #0x7fffffff
+; CHECK-NEXT:    cmp w8, #291, lsl #12 // =1191936
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %and = and i32 %x, u0x7fffffff
+  %cmp = icmp eq i32 %and, u0x123000
+  ret i1 %cmp
+}
+
+define { i1, i1 } @dont_combine_multi_use_cmp(i32 %x) {
+; CHECK-LABEL: dont_combine_multi_use_cmp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #2139095040 // =0x7f800000
+; CHECK-NEXT:    and w9, w0, #0x7fffffff
+; CHECK-NEXT:    cmp w9, w8
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    cset w1, lt
+; CHECK-NEXT:    ret
+  %and = and i32 %x, u0x7fffffff
+  %eq = icmp eq i32 %and, u0x7f800000
+  %lt = icmp slt i32 %and, u0x7f800000
+  %r1 = insertvalue { i1, i1 } poison, i1 %eq, 0
+  %r2 = insertvalue { i1, i1 } %r1, i1 %lt, 1
+  ret { i1, i1 } %r2
+}
+
+define { i32, i1 } @dont_combine_multi_use_sub(i32 %x) {
+; CHECK-LABEL: dont_combine_multi_use_sub:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-2139095040 // =0x80800000
+; CHECK-NEXT:    and w9, w0, #0x7fffffff
+; CHECK-NEXT:    adds w0, w9, w8
+; CHECK-NEXT:    cset w1, eq
+; CHECK-NEXT:    ret
+  %and = and i32 %x, u0x7fffffff
+  %sub = sub i32 %and, u0x7f800000
+  %cmp = icmp eq i32 %sub, 0
+  %r1 = insertvalue { i32, i1 } poison, i32 %sub, 0
+  %r2 = insertvalue { i32, i1 } %r1, i1 %cmp, 1
+  ret { i32, i1 } %r2
+}
+
+define { i32, i1 } @dont_combine_multi_use_and(i32 %x) {
+; CHECK-LABEL: dont_combine_multi_use_and:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #2139095040 // =0x7f800000
+; CHECK-NEXT:    and w0, w0, #0x7fffffff
+; CHECK-NEXT:    cmp w0, w8
+; CHECK-NEXT:    cset w1, eq
+; CHECK-NEXT:    ret
+  %and = and i32 %x, u0x7fffffff
+  %cmp = icmp eq i32 %and, u0x7f800000
+  %r1 = insertvalue { i32, i1 } poison, i32 %and, 0
+  %r2 = insertvalue { i32, i1 } %r1, i1 %cmp, 1
+  ret { i32, i1 } %r2
+}