[llvm] [LLVM][SCEV] Look through common multiplicand when simplifying compares. (PR #141798)

Thu Jul 31 08:17:53 PDT 2025

https://github.com/paulwalker-arm updated https://github.com/llvm/llvm-project/pull/141798

>From c3c54ade9baa8b4b4d76cbc314dde016cb09aa84 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Fri, 30 May 2025 14:41:18 +0100
Subject: [PATCH 1/4] Add dedicated SCEV tests.

---
 .../ScalarEvolution/simplify-icmp-ops.ll      | 416 ++++++++++++++++++
 1 file changed, 416 insertions(+)
 create mode 100644 llvm/test/Analysis/ScalarEvolution/simplify-icmp-ops.ll

diff --git a/llvm/test/Analysis/ScalarEvolution/simplify-icmp-ops.ll b/llvm/test/Analysis/ScalarEvolution/simplify-icmp-ops.ll
new file mode 100644
index 0000000000000..87fedaa5c3556
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/simplify-icmp-ops.ll
@@ -0,0 +1,416 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=indvars < %s | FileCheck %s
+
+; Verify ScalarEvolution can simplify comparisons of the form:
+;    (X * Z) icmp (Y * Z) ==> X icmp Y
+; which allows IndVarSimplify to "remove" control flow.
+
+define void @signed_icmp_mul_common_multiplicand(ptr %loc) vscale_range(1,1073741824) {
+; CHECK-LABEL: define void @signed_icmp_mul_common_multiplicand(
+; CHECK-SAME: ptr [[LOC:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[Z:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[X:%.*]] = mul nsw i32 9, [[Z]]
+; CHECK-NEXT:    [[Y:%.*]] = mul nsw i32 5, [[Z]]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IDX_DEC:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    store i32 [[IDX]], ptr [[LOC]], align 4
+; CHECK-NEXT:    [[IDX_DEC]] = add nuw i32 [[IDX]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp slt i32 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %z = call i32 @llvm.vscale.i32()
+  %x = mul nsw i32 9, %z
+  %y = mul nsw i32 5, %z
+  br label %loop
+
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.dec, %loop ]
+  store i32 %idx, ptr %loc
+  %idx.dec = add nuw i32 %idx, 1
+  %cond = icmp slt i32 %x, %y
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @signed_icmp_mul_common_multiplicand_commuted(ptr %loc) vscale_range(1,1073741824) {
+; CHECK-LABEL: define void @signed_icmp_mul_common_multiplicand_commuted(
+; CHECK-SAME: ptr [[LOC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[Z:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[X:%.*]] = mul nsw i32 [[Z]], 9
+; CHECK-NEXT:    [[Y:%.*]] = mul nsw i32 [[Z]], 5
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IDX_DEC:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    store i32 [[IDX]], ptr [[LOC]], align 4
+; CHECK-NEXT:    [[IDX_DEC]] = add nuw i32 [[IDX]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp slt i32 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %z = call i32 @llvm.vscale.i32()
+  %x = mul nsw i32 %z, 9
+  %y = mul nsw i32 %z, 5
+  br label %loop
+
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.dec, %loop ]
+  store i32 %idx, ptr %loc
+  %idx.dec = add nuw i32 %idx, 1
+  %cond = icmp slt i32 %x, %y
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @signed_icmp_mul_common_multiplicand_mixed_arith(ptr %loc) vscale_range(1,1073741824) {
+; CHECK-LABEL: define void @signed_icmp_mul_common_multiplicand_mixed_arith(
+; CHECK-SAME: ptr [[LOC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[VS1:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[VS2:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[X:%.*]] = mul nsw i32 9, [[VS1]]
+; CHECK-NEXT:    [[Y:%.*]] = shl nsw i32 [[VS2]], 2
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IDX_DEC:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    store i32 [[IDX]], ptr [[LOC]], align 4
+; CHECK-NEXT:    [[IDX_DEC]] = add nuw i32 [[IDX]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp slt i32 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %vs1 = call i32 @llvm.vscale.i32()
+  %vs2 = call i32 @llvm.vscale.i32()
+  %x = mul nsw i32 9, %vs1
+  %y = shl nsw i32 %vs2, 2
+  br label %loop
+
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.dec, %loop ]
+  store i32 %idx, ptr %loc
+  %idx.dec = add nuw i32 %idx, 1
+  %cond = icmp slt i32 %x, %y
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @signed_icmp_mul_common_multiplicand_potential_wrapping(ptr %loc) vscale_range(1,1073741824) {
+; CHECK-LABEL: define void @signed_icmp_mul_common_multiplicand_potential_wrapping(
+; CHECK-SAME: ptr [[LOC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[Z:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[X:%.*]] = mul nsw i32 5, [[Z]]
+; CHECK-NEXT:    [[Y:%.*]] = mul i32 9, [[Z]]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IDX_DEC:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    store i32 [[IDX]], ptr [[LOC]], align 4
+; CHECK-NEXT:    [[IDX_DEC]] = add nuw i32 [[IDX]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp sgt i32 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %z = call i32 @llvm.vscale.i32()
+  %x = mul nsw i32 5, %z
+  %y = mul i32 9, %z
+  br label %loop
+
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.dec, %loop ]
+  store i32 %idx, ptr %loc
+  %idx.dec = add nuw i32 %idx, 1
+  %cond = icmp sgt i32 %x, %y
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @signed_icmp_mul_common_multiplicand_potential_wrapping_2(ptr %loc) vscale_range(1,1073741824) {
+; CHECK-LABEL: define void @signed_icmp_mul_common_multiplicand_potential_wrapping_2(
+; CHECK-SAME: ptr [[LOC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[Z:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[X:%.*]] = mul i32 9, [[Z]]
+; CHECK-NEXT:    [[Y:%.*]] = mul nsw i32 5, [[Z]]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IDX_DEC:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    store i32 [[IDX]], ptr [[LOC]], align 4
+; CHECK-NEXT:    [[IDX_DEC]] = add nuw i32 [[IDX]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp slt i32 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %z = call i32 @llvm.vscale.i32()
+  %x = mul i32 9, %z
+  %y = mul nsw i32 5, %z
+  br label %loop
+
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.dec, %loop ]
+  store i32 %idx, ptr %loc
+  %idx.dec = add nuw i32 %idx, 1
+  %cond = icmp slt i32 %x, %y
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @signed_icmp_mul_common_but_potentially_non_positive_multiplicand(ptr %loc, i32 %z) {
+; CHECK-LABEL: define void @signed_icmp_mul_common_but_potentially_non_positive_multiplicand(
+; CHECK-SAME: ptr [[LOC:%.*]], i32 [[Z:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[X:%.*]] = mul nsw i32 9, [[Z]]
+; CHECK-NEXT:    [[Y:%.*]] = mul nsw i32 5, [[Z]]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IDX_DEC:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    store i32 [[IDX]], ptr [[LOC]], align 4
+; CHECK-NEXT:    [[IDX_DEC]] = add nuw i32 [[IDX]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp slt i32 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %x = mul nsw i32 9, %z
+  %y = mul nsw i32 5, %z
+  br label %loop
+
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.dec, %loop ]
+  store i32 %idx, ptr %loc
+  %idx.dec = add nuw i32 %idx, 1
+  %cond = icmp slt i32 %x, %y
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @unsigned_icmp_mul_common_multiplicand(ptr %loc) {
+; CHECK-LABEL: define void @unsigned_icmp_mul_common_multiplicand(
+; CHECK-SAME: ptr [[LOC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[Z:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[X:%.*]] = mul nuw i32 9, [[Z]]
+; CHECK-NEXT:    [[Y:%.*]] = mul nuw i32 5, [[Z]]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IDX_DEC:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    store i32 [[IDX]], ptr [[LOC]], align 4
+; CHECK-NEXT:    [[IDX_DEC]] = add nuw i32 [[IDX]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp ult i32 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %z = call i32 @llvm.vscale.i32()
+  %x = mul nuw i32 9, %z
+  %y = mul nuw i32 5, %z
+  br label %loop
+
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.dec, %loop ]
+  store i32 %idx, ptr %loc
+  %idx.dec = add nuw i32 %idx, 1
+  %cond = icmp ult i32 %x, %y
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @unsigned_icmp_mul_common_multiplicand_commuted(ptr %loc) {
+; CHECK-LABEL: define void @unsigned_icmp_mul_common_multiplicand_commuted(
+; CHECK-SAME: ptr [[LOC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[Z:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[X:%.*]] = mul nuw i32 [[Z]], 9
+; CHECK-NEXT:    [[Y:%.*]] = mul nuw i32 [[Z]], 5
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IDX_DEC:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    store i32 [[IDX]], ptr [[LOC]], align 4
+; CHECK-NEXT:    [[IDX_DEC]] = add nuw i32 [[IDX]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp ult i32 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %z = call i32 @llvm.vscale.i32()
+  %x = mul nuw i32 %z, 9
+  %y = mul nuw i32 %z, 5
+  br label %loop
+
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.dec, %loop ]
+  store i32 %idx, ptr %loc
+  %idx.dec = add nuw i32 %idx, 1
+  %cond = icmp ult i32 %x, %y
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @unsigned_icmp_mul_common_multiplicand_mixed_arith(ptr %loc) {
+; CHECK-LABEL: define void @unsigned_icmp_mul_common_multiplicand_mixed_arith(
+; CHECK-SAME: ptr [[LOC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[VS1:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[VS2:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[X:%.*]] = mul nuw i32 9, [[VS1]]
+; CHECK-NEXT:    [[Y:%.*]] = shl nuw i32 [[VS2]], 2
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IDX_DEC:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    store i32 [[IDX]], ptr [[LOC]], align 4
+; CHECK-NEXT:    [[IDX_DEC]] = add nuw i32 [[IDX]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp ult i32 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %vs1 = call i32 @llvm.vscale.i32()
+  %vs2 = call i32 @llvm.vscale.i32()
+  %x = mul nuw i32 9, %vs1
+  %y = shl nuw i32 %vs2, 2
+  br label %loop
+
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.dec, %loop ]
+  store i32 %idx, ptr %loc
+  %idx.dec = add nuw i32 %idx, 1
+  %cond = icmp ult i32 %x, %y
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @unsigned_icmp_mul_common_multiplicand_potential_wrapping(ptr %loc) {
+; CHECK-LABEL: define void @unsigned_icmp_mul_common_multiplicand_potential_wrapping(
+; CHECK-SAME: ptr [[LOC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[Z:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[X:%.*]] = mul nuw i32 5, [[Z]]
+; CHECK-NEXT:    [[Y:%.*]] = mul i32 9, [[Z]]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IDX_DEC:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    store i32 [[IDX]], ptr [[LOC]], align 4
+; CHECK-NEXT:    [[IDX_DEC]] = add nuw i32 [[IDX]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp ugt i32 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %z = call i32 @llvm.vscale.i32()
+  %x = mul nuw i32 5, %z
+  %y = mul i32 9, %z
+  br label %loop
+
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.dec, %loop ]
+  store i32 %idx, ptr %loc
+  %idx.dec = add nuw i32 %idx, 1
+  %cond = icmp ugt i32 %x, %y
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @unsigned_icmp_mul_common_multiplicand_potential_wrapping_2(ptr %loc) {
+; CHECK-LABEL: define void @unsigned_icmp_mul_common_multiplicand_potential_wrapping_2(
+; CHECK-SAME: ptr [[LOC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[Z:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[X:%.*]] = mul i32 9, [[Z]]
+; CHECK-NEXT:    [[Y:%.*]] = mul nuw i32 5, [[Z]]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IDX_DEC:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    store i32 [[IDX]], ptr [[LOC]], align 4
+; CHECK-NEXT:    [[IDX_DEC]] = add nuw i32 [[IDX]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp ult i32 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %z = call i32 @llvm.vscale.i32()
+  %x = mul i32 9, %z
+  %y = mul nuw i32 5, %z
+  br label %loop
+
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.dec, %loop ]
+  store i32 %idx, ptr %loc
+  %idx.dec = add nuw i32 %idx, 1
+  %cond = icmp ult i32 %x, %y
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @unsigned_icmp_mul_common_but_potentially_zero_multiplicand(ptr %loc, i32 %z) {
+; CHECK-LABEL: define void @unsigned_icmp_mul_common_but_potentially_zero_multiplicand(
+; CHECK-SAME: ptr [[LOC:%.*]], i32 [[Z:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[X:%.*]] = mul nuw i32 9, [[Z]]
+; CHECK-NEXT:    [[Y:%.*]] = mul nuw i32 5, [[Z]]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IDX_DEC:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    store i32 [[IDX]], ptr [[LOC]], align 4
+; CHECK-NEXT:    [[IDX_DEC]] = add nuw i32 [[IDX]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp ult i32 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %x = mul nuw i32 9, %z
+  %y = mul nuw i32 5, %z
+  br label %loop
+
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.dec, %loop ]
+  store i32 %idx, ptr %loc
+  %idx.dec = add nuw i32 %idx, 1
+  %cond = icmp ult i32 %x, %y
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+declare i32 @llvm.vscale.i32()

>From e4dcb872fa77abf1f386a5493a3345cc13329bdd Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Thu, 15 May 2025 17:43:23 +0100
Subject: [PATCH 2/4] [LLVM][SCEV] Look through common multiplicand when
 simplifying compares.

My usecase is simplifying the control flow generated by LoopVectorize
when vectorising loops whose tripcount is a function of the runtime
vector length. This can be problematic because:

* CSE is a pre-LoopVectorize transform and so it's common for an IR
function to include several calls to llvm.vscale(). (NOTE: Code
generation will typically remove the duplicates)
* Pre-LoopVectorize instcombines will rewrite some multiplies as
shifts. This leads to a mismatch between VL based maths of the scalar
loop and that created for the vector loop, which prevents some obvious
simplifications.

SCEV does not suffer these issues because it effectively does CSE
during construction and shifts are represented as multiplies.
---
 llvm/lib/Analysis/ScalarEvolution.cpp         | 16 +++++++
 .../AArch64/sve-vscale-based-trip-counts.ll   | 48 ++++++-------------
 2 files changed, 31 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 0990a0daac80c..dac05d7aab2d6 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -10748,6 +10748,22 @@ bool ScalarEvolution::SimplifyICmpOperands(CmpPredicate &Pred, const SCEV *&LHS,
   if (Depth >= 3)
     return false;
 
+  // (X * Z) icmp (Y * Z) ==> X icmp Y
+  //     when neither multiply wraps and Z is positive.
+  if (isa<SCEVMulExpr>(LHS) && isa<SCEVMulExpr>(RHS)) {
+    const SCEVMulExpr *LMul = cast<SCEVMulExpr>(LHS);
+    const SCEVMulExpr *RMul = cast<SCEVMulExpr>(RHS);
+
+    if (LMul->getNumOperands() == 2 && RMul->getNumOperands() == 2 &&
+        LMul->getOperand(1) == RMul->getOperand(1) &&
+        isKnownPositive(LMul->getOperand(1)) && ICmpInst::isUnsigned(Pred) &&
+        LMul->hasNoUnsignedWrap() && RMul->hasNoUnsignedWrap()) {
+      LHS = LMul->getOperand(0);
+      RHS = RMul->getOperand(0);
+      Changed = true;
+    }
+  }
+
   // Canonicalize a constant to the right side.
   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
     // Check for both operands constant.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll
index 352f4fe3dae21..e78011a351f5f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll
@@ -9,8 +9,8 @@ define void @vscale_mul_4(ptr noalias noundef readonly captures(none) %a, ptr no
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
-; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP3:%.*]] = mul nuw i64 [[TMP10]], 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]]
@@ -19,8 +19,8 @@ define void @vscale_mul_4(ptr noalias noundef readonly captures(none) %a, ptr no
 ; CHECK-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[A]], align 4
 ; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[B]], align 4
-; CHECK-NEXT:    [[TMP10:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
-; CHECK-NEXT:    store <vscale x 4 x float> [[TMP10]], ptr [[B]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
+; CHECK-NEXT:    store <vscale x 4 x float> [[TMP8]], ptr [[B]], align 4
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]]
 ; CHECK:       [[FOR_COND_CLEANUP]]:
@@ -134,9 +134,6 @@ define void @vscale_mul_12(ptr noalias noundef readonly captures(none) %a, ptr n
 ; CHECK-NEXT:    [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 12
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[MUL1]], [[TMP2]]
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
-; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]]
@@ -145,7 +142,7 @@ define void @vscale_mul_12(ptr noalias noundef readonly captures(none) %a, ptr n
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]]
@@ -153,18 +150,15 @@ define void @vscale_mul_12(ptr noalias noundef readonly captures(none) %a, ptr n
 ; CHECK-NEXT:    [[TMP11:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
 ; CHECK-NEXT:    store <vscale x 4 x float> [[TMP11]], ptr [[TMP9]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
-; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[MUL1]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]]
-; CHECK:       [[SCALAR_PH]]:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]]
 ; CHECK:       [[FOR_COND_CLEANUP]]:
 ; CHECK-NEXT:    ret void
 ; CHECK:       [[FOR_BODY]]:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDVARS_IV]]
@@ -204,9 +198,6 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n
 ; CHECK-NEXT:    [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 31
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 8
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[MUL1]], [[TMP2]]
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
-; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]]
@@ -215,7 +206,7 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
@@ -240,14 +231,11 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n
 ; CHECK-NEXT:    br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[MUL1]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]]
-; CHECK:       [[SCALAR_PH]]:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]]
 ; CHECK:       [[FOR_COND_CLEANUP]]:
 ; CHECK-NEXT:    ret void
 ; CHECK:       [[FOR_BODY]]:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDVARS_IV]]
@@ -287,9 +275,6 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n
 ; CHECK-NEXT:    [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 64
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 8
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[MUL1]], [[TMP2]]
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
-; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]]
@@ -298,7 +283,7 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
@@ -323,14 +308,11 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n
 ; CHECK-NEXT:    br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[MUL1]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]]
-; CHECK:       [[SCALAR_PH]]:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]]
 ; CHECK:       [[FOR_COND_CLEANUP]]:
 ; CHECK-NEXT:    ret void
 ; CHECK:       [[FOR_BODY]]:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDVARS_IV]]

>From 0df38768f25ae3dcf16b8ad76100e09c759d63c6 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Fri, 30 May 2025 14:40:11 +0100
Subject: [PATCH 3/4] Extend to cover signed comparisons.

---
 llvm/lib/Analysis/ScalarEvolution.cpp         | 26 +++++--
 .../ScalarEvolution/simplify-icmp-ops.ll      | 74 +++++--------------
 2 files changed, 36 insertions(+), 64 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index dac05d7aab2d6..d9e0a7f98cabd 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -10748,19 +10748,29 @@ bool ScalarEvolution::SimplifyICmpOperands(CmpPredicate &Pred, const SCEV *&LHS,
   if (Depth >= 3)
     return false;
 
-  // (X * Z) icmp (Y * Z) ==> X icmp Y
-  //     when neither multiply wraps and Z is positive.
   if (isa<SCEVMulExpr>(LHS) && isa<SCEVMulExpr>(RHS)) {
     const SCEVMulExpr *LMul = cast<SCEVMulExpr>(LHS);
     const SCEVMulExpr *RMul = cast<SCEVMulExpr>(RHS);
 
     if (LMul->getNumOperands() == 2 && RMul->getNumOperands() == 2 &&
-        LMul->getOperand(1) == RMul->getOperand(1) &&
-        isKnownPositive(LMul->getOperand(1)) && ICmpInst::isUnsigned(Pred) &&
-        LMul->hasNoUnsignedWrap() && RMul->hasNoUnsignedWrap()) {
-      LHS = LMul->getOperand(0);
-      RHS = RMul->getOperand(0);
-      Changed = true;
+        LMul->getOperand(1) == RMul->getOperand(1)) {
+      // (X * Z) uicmp (Y * Z) ==> X uicmp Y
+      //     when neither multiply wraps and Z is non-zero.
+      if (ICmpInst::isUnsigned(Pred) && isKnownNonZero(LMul->getOperand(1)) &&
+          LMul->hasNoUnsignedWrap() && RMul->hasNoUnsignedWrap()) {
+        LHS = LMul->getOperand(0);
+        RHS = RMul->getOperand(0);
+        Changed = true;
+      }
+      // (X * Z) sicmp (Y * Z) ==> X sicmp Y
+      //     when neither multiply wraps and Z is positive.
+      else if (ICmpInst::isSigned(Pred) &&
+               isKnownPositive(LMul->getOperand(1)) &&
+               LMul->hasNoSignedWrap() && RMul->hasNoSignedWrap()) {
+        LHS = LMul->getOperand(0);
+        RHS = RMul->getOperand(0);
+        Changed = true;
+      }
     }
   }
 
diff --git a/llvm/test/Analysis/ScalarEvolution/simplify-icmp-ops.ll b/llvm/test/Analysis/ScalarEvolution/simplify-icmp-ops.ll
index 87fedaa5c3556..3497991ec37d1 100644
--- a/llvm/test/Analysis/ScalarEvolution/simplify-icmp-ops.ll
+++ b/llvm/test/Analysis/ScalarEvolution/simplify-icmp-ops.ll
@@ -8,17 +8,11 @@
 define void @signed_icmp_mul_common_multiplicand(ptr %loc) vscale_range(1,1073741824) {
 ; CHECK-LABEL: define void @signed_icmp_mul_common_multiplicand(
 ; CHECK-SAME: ptr [[LOC:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    [[Z:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT:    [[X:%.*]] = mul nsw i32 9, [[Z]]
-; CHECK-NEXT:    [[Y:%.*]] = mul nsw i32 5, [[Z]]
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
-; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IDX_DEC:%.*]], %[[LOOP]] ]
-; CHECK-NEXT:    store i32 [[IDX]], ptr [[LOC]], align 4
-; CHECK-NEXT:    [[IDX_DEC]] = add nuw i32 [[IDX]], 1
-; CHECK-NEXT:    [[COND:%.*]] = icmp slt i32 [[X]], [[Y]]
-; CHECK-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-NEXT:    store i32 0, ptr [[LOC]], align 4
+; CHECK-NEXT:    br i1 false, label %[[LOOP]], label %[[EXIT:.*]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
@@ -42,17 +36,11 @@ exit:
 define void @signed_icmp_mul_common_multiplicand_commuted(ptr %loc) vscale_range(1,1073741824) {
 ; CHECK-LABEL: define void @signed_icmp_mul_common_multiplicand_commuted(
 ; CHECK-SAME: ptr [[LOC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    [[Z:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT:    [[X:%.*]] = mul nsw i32 [[Z]], 9
-; CHECK-NEXT:    [[Y:%.*]] = mul nsw i32 [[Z]], 5
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
-; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IDX_DEC:%.*]], %[[LOOP]] ]
-; CHECK-NEXT:    store i32 [[IDX]], ptr [[LOC]], align 4
-; CHECK-NEXT:    [[IDX_DEC]] = add nuw i32 [[IDX]], 1
-; CHECK-NEXT:    [[COND:%.*]] = icmp slt i32 [[X]], [[Y]]
-; CHECK-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-NEXT:    store i32 0, ptr [[LOC]], align 4
+; CHECK-NEXT:    br i1 false, label %[[LOOP]], label %[[EXIT:.*]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
@@ -76,18 +64,11 @@ exit:
 define void @signed_icmp_mul_common_multiplicand_mixed_arith(ptr %loc) vscale_range(1,1073741824) {
 ; CHECK-LABEL: define void @signed_icmp_mul_common_multiplicand_mixed_arith(
 ; CHECK-SAME: ptr [[LOC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    [[VS1:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT:    [[VS2:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT:    [[X:%.*]] = mul nsw i32 9, [[VS1]]
-; CHECK-NEXT:    [[Y:%.*]] = shl nsw i32 [[VS2]], 2
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
-; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IDX_DEC:%.*]], %[[LOOP]] ]
-; CHECK-NEXT:    store i32 [[IDX]], ptr [[LOC]], align 4
-; CHECK-NEXT:    [[IDX_DEC]] = add nuw i32 [[IDX]], 1
-; CHECK-NEXT:    [[COND:%.*]] = icmp slt i32 [[X]], [[Y]]
-; CHECK-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-NEXT:    store i32 0, ptr [[LOC]], align 4
+; CHECK-NEXT:    br i1 false, label %[[LOOP]], label %[[EXIT:.*]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
@@ -212,17 +193,11 @@ exit:
 define void @unsigned_icmp_mul_common_multiplicand(ptr %loc) {
 ; CHECK-LABEL: define void @unsigned_icmp_mul_common_multiplicand(
 ; CHECK-SAME: ptr [[LOC:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    [[Z:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT:    [[X:%.*]] = mul nuw i32 9, [[Z]]
-; CHECK-NEXT:    [[Y:%.*]] = mul nuw i32 5, [[Z]]
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
-; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IDX_DEC:%.*]], %[[LOOP]] ]
-; CHECK-NEXT:    store i32 [[IDX]], ptr [[LOC]], align 4
-; CHECK-NEXT:    [[IDX_DEC]] = add nuw i32 [[IDX]], 1
-; CHECK-NEXT:    [[COND:%.*]] = icmp ult i32 [[X]], [[Y]]
-; CHECK-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-NEXT:    store i32 0, ptr [[LOC]], align 4
+; CHECK-NEXT:    br i1 false, label %[[LOOP]], label %[[EXIT:.*]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
@@ -246,17 +221,11 @@ exit:
 define void @unsigned_icmp_mul_common_multiplicand_commuted(ptr %loc) {
 ; CHECK-LABEL: define void @unsigned_icmp_mul_common_multiplicand_commuted(
 ; CHECK-SAME: ptr [[LOC:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    [[Z:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT:    [[X:%.*]] = mul nuw i32 [[Z]], 9
-; CHECK-NEXT:    [[Y:%.*]] = mul nuw i32 [[Z]], 5
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
-; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IDX_DEC:%.*]], %[[LOOP]] ]
-; CHECK-NEXT:    store i32 [[IDX]], ptr [[LOC]], align 4
-; CHECK-NEXT:    [[IDX_DEC]] = add nuw i32 [[IDX]], 1
-; CHECK-NEXT:    [[COND:%.*]] = icmp ult i32 [[X]], [[Y]]
-; CHECK-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-NEXT:    store i32 0, ptr [[LOC]], align 4
+; CHECK-NEXT:    br i1 false, label %[[LOOP]], label %[[EXIT:.*]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
@@ -280,18 +249,11 @@ exit:
 define void @unsigned_icmp_mul_common_multiplicand_mixed_arith(ptr %loc) {
 ; CHECK-LABEL: define void @unsigned_icmp_mul_common_multiplicand_mixed_arith(
 ; CHECK-SAME: ptr [[LOC:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    [[VS1:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT:    [[VS2:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT:    [[X:%.*]] = mul nuw i32 9, [[VS1]]
-; CHECK-NEXT:    [[Y:%.*]] = shl nuw i32 [[VS2]], 2
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
-; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IDX_DEC:%.*]], %[[LOOP]] ]
-; CHECK-NEXT:    store i32 [[IDX]], ptr [[LOC]], align 4
-; CHECK-NEXT:    [[IDX_DEC]] = add nuw i32 [[IDX]], 1
-; CHECK-NEXT:    [[COND:%.*]] = icmp ult i32 [[X]], [[Y]]
-; CHECK-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-NEXT:    store i32 0, ptr [[LOC]], align 4
+; CHECK-NEXT:    br i1 false, label %[[LOOP]], label %[[EXIT:.*]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;

>From 057f9add4a7e154715bc9a8e32ff7a66296adc10 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Wed, 18 Jun 2025 16:15:10 +0100
Subject: [PATCH 4/4] Make algorithm independent of operand order.

---
 llvm/lib/Analysis/ScalarEvolution.cpp         | 63 ++++++++++++++-----
 .../Analysis/ScalarEvolutionTest.cpp          | 51 +++++++++++++++
 2 files changed, 98 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index d9e0a7f98cabd..aa1606a0c4620 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -10752,23 +10752,54 @@ bool ScalarEvolution::SimplifyICmpOperands(CmpPredicate &Pred, const SCEV *&LHS,
     const SCEVMulExpr *LMul = cast<SCEVMulExpr>(LHS);
     const SCEVMulExpr *RMul = cast<SCEVMulExpr>(RHS);
 
-    if (LMul->getNumOperands() == 2 && RMul->getNumOperands() == 2 &&
-        LMul->getOperand(1) == RMul->getOperand(1)) {
-      // (X * Z) uicmp (Y * Z) ==> X uicmp Y
-      //     when neither multiply wraps and Z is non-zero.
-      if (ICmpInst::isUnsigned(Pred) && isKnownNonZero(LMul->getOperand(1)) &&
-          LMul->hasNoUnsignedWrap() && RMul->hasNoUnsignedWrap()) {
-        LHS = LMul->getOperand(0);
-        RHS = RMul->getOperand(0);
-        Changed = true;
+    auto FindCommonFactor =
+        [&](const SCEVMulExpr *LHS, const SCEVMulExpr *RHS,
+            bool (ScalarEvolution::*Predicate)(
+                const SCEV *)) -> std::optional<std::pair<int, int>> {
+      for (int i = 0, e = LHS->getNumOperands(); i != e; ++i)
+        for (int j = 0, e = RHS->getNumOperands(); j != e; ++j)
+          if (LHS->getOperand(i) == RHS->getOperand(j) &&
+              (this->*Predicate)(LHS->getOperand(i)))
+            return std::make_pair(i, j);
+
+      return std::nullopt;
+    };
+
+    // (X * Z) uicmp (Z * Y) ==> X uicmp Y
+    //     when neither multiply wraps and Z is non-zero.
+    if (ICmpInst::isUnsigned(Pred)) {
+      if (LMul->hasNoUnsignedWrap() && RMul->hasNoUnsignedWrap()) {
+        if (auto Indices = FindCommonFactor(LMul, RMul,
+                                            &ScalarEvolution::isKnownNonZero)) {
+          SmallVector<const SCEV *, 2> LHSOps;
+          append_range(LHSOps, LHS->operands().take_front(Indices->first));
+          append_range(LHSOps, LHS->operands().drop_front(Indices->first + 1));
+          LHS = getMulExpr(LHSOps);
+
+          SmallVector<const SCEV *, 2> RHSOps;
+          append_range(RHSOps, RHS->operands().take_front(Indices->second));
+          append_range(RHSOps, RHS->operands().drop_front(Indices->second + 1));
+          RHS = getMulExpr(RHSOps);
+
+          Changed = true;
+        }
       }
-      // (X * Z) sicmp (Y * Z) ==> X sicmp Y
-      //     when neither multiply wraps and Z is positive.
-      else if (ICmpInst::isSigned(Pred) &&
-               isKnownPositive(LMul->getOperand(1)) &&
-               LMul->hasNoSignedWrap() && RMul->hasNoSignedWrap()) {
-        LHS = LMul->getOperand(0);
-        RHS = RMul->getOperand(0);
+    }
+    // (X * Z) sicmp (Z * Y) ==> X sicmp Y
+    //     when neither multiply wraps and Z is positive.
+    else if (LMul->hasNoSignedWrap() && RMul->hasNoSignedWrap()) {
+      if (auto Indices =
+              FindCommonFactor(LMul, RMul, &ScalarEvolution::isKnownPositive)) {
+        SmallVector<const SCEV *, 2> LHSOps;
+        append_range(LHSOps, LHS->operands().take_front(Indices->first));
+        append_range(LHSOps, LHS->operands().drop_front(Indices->first + 1));
+        LHS = getMulExpr(LHSOps);
+
+        SmallVector<const SCEV *, 2> RHSOps;
+        append_range(RHSOps, RHS->operands().take_front(Indices->second));
+        append_range(RHSOps, RHS->operands().drop_front(Indices->second + 1));
+        RHS = getMulExpr(RHSOps);
+
         Changed = true;
       }
     }
diff --git a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp
index 678960418d7d7..8eb5ecd2e593d 100644
--- a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp
+++ b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp
@@ -1768,4 +1768,55 @@ TEST_F(ScalarEvolutionsTest, ComplexityComparatorIsStrictWeakOrdering3) {
   SE.getSCEV(Or1);
 }
 
+TEST_F(ScalarEvolutionsTest, SimplifyICmpOperandsCommutability) {
+  LLVMContext C;
+  SMDiagnostic Err;
+  std::unique_ptr<Module> M = parseAssemblyString(
+      "define i32 @foo(ptr %loc, i32 range(i32 0, 10) %a,"
+      "    i32 range(i32 100, 200) %b) vscale_range(1,1073741824) {"
+      "entry: "
+      "  %c = call i32 @llvm.vscale.i32()"
+      "  ret i32 %c "
+      "} ",
+      Err, C);
+
+  ASSERT_TRUE(M && "Could not parse module?");
+  ASSERT_TRUE(!verifyModule(*M) && "Must have been well formed!");
+
+  runWithSE(*M, "foo", [](Function &F, LoopInfo &LI, ScalarEvolution &SE) {
+    const SCEV *A = SE.getSCEV(getArgByName(F, "a"));
+    const SCEV *B = SE.getSCEV(getArgByName(F, "b"));
+    const SCEV *VS = SE.getSCEV(getInstructionByName(F, "c"));
+    const SCEV *Two = SE.getConstant(A->getType(), 2);
+    SCEV::NoWrapFlags Flags =
+        ScalarEvolution::setFlags(SCEV::FlagNUW, SCEV::FlagNSW);
+
+    SmallVector<const SCEV *, 2> Ops0 = {A, VS};
+    SmallVector<const SCEV *, 2> Ops1 = {B, VS};
+    SmallVector<const SCEV *, 3> Ops2 = {A, Two, VS};
+    SmallVector<const SCEV *, 3> Ops3 = {B, Two, VS};
+
+    const SCEV *AxVS = SE.getMulExpr(Ops0, Flags);
+    const SCEV *BxVS = SE.getMulExpr(Ops1, Flags);
+    const SCEV *Ax2xVS = SE.getMulExpr(Ops2, Flags);
+    const SCEV *Bx2xVS = SE.getMulExpr(Ops3, Flags);
+
+    // Verify VScale factor is available at different indices.
+    EXPECT_TRUE(isa<SCEVVScale>(cast<SCEVMulExpr>(AxVS)->getOperand(0)) !=
+                isa<SCEVVScale>(cast<SCEVMulExpr>(Ax2xVS)->getOperand(0)));
+    EXPECT_TRUE(isa<SCEVVScale>(cast<SCEVMulExpr>(BxVS)->getOperand(0)) !=
+                isa<SCEVVScale>(cast<SCEVMulExpr>(Bx2xVS)->getOperand(0)));
+
+    // Verify the common factor's position does not impede simplification.
+    EXPECT_TRUE(SE.isKnownPredicate(ICmpInst::ICMP_SLT, AxVS, BxVS));
+    EXPECT_TRUE(SE.isKnownPredicate(ICmpInst::ICMP_SLT, AxVS, Bx2xVS));
+    EXPECT_TRUE(SE.isKnownPredicate(ICmpInst::ICMP_SLT, Ax2xVS, BxVS));
+    EXPECT_TRUE(SE.isKnownPredicate(ICmpInst::ICMP_SLT, Ax2xVS, Bx2xVS));
+    EXPECT_TRUE(SE.isKnownPredicate(ICmpInst::ICMP_ULT, AxVS, BxVS));
+    EXPECT_TRUE(SE.isKnownPredicate(ICmpInst::ICMP_ULT, AxVS, Bx2xVS));
+    EXPECT_TRUE(SE.isKnownPredicate(ICmpInst::ICMP_ULT, Ax2xVS, BxVS));
+    EXPECT_TRUE(SE.isKnownPredicate(ICmpInst::ICMP_ULT, Ax2xVS, Bx2xVS));
+  });
+}
+
 }  // end namespace llvm