[llvm] [InstSimplify] Fold converted urem to 0 if there's no overlapping bits (PR #71528)

Thu Nov 16 08:33:55 PST 2023

https://github.com/huntergr-arm updated https://github.com/llvm/llvm-project/pull/71528

>From 14831e7ad89654cfbfb31df86a17ddea445ea447 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Fri, 3 Nov 2023 14:22:57 +0000
Subject: [PATCH] [InstSimplify] Fold converted urem to 0 if there's no
 overlapping bits

When folding urem instructions we can end up not recognizing that
the output will always be 0 due to Value*s being different, despite
generating the same data (in this case, 2 different calls to vscale).

This patch recognizes the (x << N) & (add (x << M), -1) pattern that
instcombine replaces urem with after the two vscale calls have been
reduced to one via CSE, then replaces with 0 when x is a non-zero
power of 2 and N >= M.
---
 llvm/lib/Analysis/InstructionSimplify.cpp     | 10 +++
 .../InstSimplify/po2-shift-add-and-to-zero.ll | 82 +++++++++++++++----
 2 files changed, 78 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index bb58a446b447b3e..84d629808a2605a 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -2028,6 +2028,16 @@ static Value *simplifyAndCommutative(Value *Op0, Value *Op1,
       isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
     return Constant::getNullValue(Op1->getType());
 
+  // (x << N) & ((x << M) - 1) --> 0, where x is known to be a power of 2 and
+  // M <= N.
+  const APInt *Shift1, *Shift2;
+  if (match(Op0, m_Shl(m_Value(X), m_APInt(Shift1))) &&
+      match(Op1, m_Add(m_Shl(m_Specific(X), m_APInt(Shift2)), m_AllOnes())) &&
+      isKnownToBeAPowerOfTwo(X, Q.DL, /*OrZero*/ true, /*Depth*/ 0, Q.AC,
+                             Q.CxtI) &&
+      Shift1->uge(*Shift2))
+    return Constant::getNullValue(Op0->getType());
+
   if (Value *V =
           simplifyAndOrWithICmpEq(Instruction::And, Op0, Op1, Q, MaxRecurse))
     return V;
diff --git a/llvm/test/Transforms/InstSimplify/po2-shift-add-and-to-zero.ll b/llvm/test/Transforms/InstSimplify/po2-shift-add-and-to-zero.ll
index 7eb8fd1c76bb087..54dd1688ad916aa 100644
--- a/llvm/test/Transforms/InstSimplify/po2-shift-add-and-to-zero.ll
+++ b/llvm/test/Transforms/InstSimplify/po2-shift-add-and-to-zero.ll
@@ -35,12 +35,7 @@ define i64 @f1() #0 {
 ; CHECK-LABEL: define i64 @f1
 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 4
-; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0]], 3
-; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[TMP2]], -1
-; CHECK-NEXT:    [[REM:%.*]] = and i64 [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    ret i64 [[REM]]
+; CHECK-NEXT:    ret i64 0
 ;
 entry:
   %0 = call i64 @llvm.vscale.i64()
@@ -55,24 +50,19 @@ entry:
 define i64 @test_pow2_or_zero(i64 %arg) {
 ; CHECK-LABEL: define i64 @test_pow2_or_zero
 ; CHECK-SAME: (i64 [[ARG:%.*]]) {
-; CHECK-NEXT:    [[NEG:%.*]] = sub i64 0, [[ARG]]
-; CHECK-NEXT:    [[X:%.*]] = and i64 [[NEG]], [[ARG]]
-; CHECK-NEXT:    [[SHL1:%.*]] = shl i64 [[X]], 4
-; CHECK-NEXT:    [[SHL2:%.*]] = shl i64 [[X]], 3
-; CHECK-NEXT:    [[MASK:%.*]] = add i64 [[SHL2]], -1
-; CHECK-NEXT:    [[REM:%.*]] = and i64 [[SHL1]], [[MASK]]
-; CHECK-NEXT:    ret i64 [[REM]]
+; CHECK-NEXT:    ret i64 0
 ;
   %neg = sub i64 0, %arg
   %x = and i64 %neg, %arg
   %shl1 = shl i64 %x, 4
   %shl2 = shl i64 %x, 3
   %mask = add i64 %shl2, -1
-  %rem = and i64 %shl1, %mask
+  %rem = and i64 %mask, %shl1
   ret i64 %rem
 }
 
 ;; Make sure it doesn't work if the value isn't known to be a power of 2.
+;; In this case a vscale without a `vscale_range` attribute on the function.
 define i64 @no_pow2() {
 ; CHECK-LABEL: define i64 @no_pow2() {
 ; CHECK-NEXT:  entry:
@@ -92,6 +82,70 @@ entry:
   ret i64 %rem
 }
 
+;; Make sure it doesn't work if the shift on the -1 side is greater
+define i64 @minus_shift_greater(i64 %arg) {
+; CHECK-LABEL: define i64 @minus_shift_greater
+; CHECK-SAME: (i64 [[ARG:%.*]]) {
+; CHECK-NEXT:    [[NEG:%.*]] = sub i64 0, [[ARG]]
+; CHECK-NEXT:    [[X:%.*]] = and i64 [[NEG]], [[ARG]]
+; CHECK-NEXT:    [[SHL1:%.*]] = shl i64 [[X]], 3
+; CHECK-NEXT:    [[SHL2:%.*]] = shl i64 [[X]], 4
+; CHECK-NEXT:    [[MASK:%.*]] = add i64 [[SHL2]], -1
+; CHECK-NEXT:    [[REM:%.*]] = and i64 [[SHL1]], [[MASK]]
+; CHECK-NEXT:    ret i64 [[REM]]
+;
+  %neg = sub i64 0, %arg
+  %x = and i64 %neg, %arg
+  %shl1 = shl i64 %x, 3
+  %shl2 = shl i64 %x, 4
+  %mask = add i64 %shl2, -1
+  %rem = and i64 %shl1, %mask
+  ret i64 %rem
+}
+
+;; Make sure it doesn't work if the subtract isn't one.
+define i64 @sub2(i64 %arg) {
+; CHECK-LABEL: define i64 @sub2
+; CHECK-SAME: (i64 [[ARG:%.*]]) {
+; CHECK-NEXT:    [[NEG:%.*]] = sub i64 0, [[ARG]]
+; CHECK-NEXT:    [[X:%.*]] = and i64 [[NEG]], [[ARG]]
+; CHECK-NEXT:    [[SHL1:%.*]] = shl i64 [[X]], 4
+; CHECK-NEXT:    [[SHL2:%.*]] = shl i64 [[X]], 3
+; CHECK-NEXT:    [[MASK:%.*]] = add i64 [[SHL2]], -2
+; CHECK-NEXT:    [[REM:%.*]] = and i64 [[SHL1]], [[MASK]]
+; CHECK-NEXT:    ret i64 [[REM]]
+;
+  %neg = sub i64 0, %arg
+  %x = and i64 %neg, %arg
+  %shl1 = shl i64 %x, 4
+  %shl2 = shl i64 %x, 3
+  %mask = add i64 %shl2, -2
+  %rem = and i64 %shl1, %mask
+  ret i64 %rem
+}
+
+;; Make sure it doesn't work with a right shift
+;; Make sure it doesn't work if the subtract isn't one.
+define i64 @rightshift(i64 %arg) {
+; CHECK-LABEL: define i64 @rightshift
+; CHECK-SAME: (i64 [[ARG:%.*]]) {
+; CHECK-NEXT:    [[NEG:%.*]] = sub i64 0, [[ARG]]
+; CHECK-NEXT:    [[X:%.*]] = and i64 [[NEG]], [[ARG]]
+; CHECK-NEXT:    [[SHL1:%.*]] = shl i64 [[X]], 4
+; CHECK-NEXT:    [[SHL2:%.*]] = lshr i64 [[X]], 3
+; CHECK-NEXT:    [[MASK:%.*]] = add i64 [[SHL2]], -1
+; CHECK-NEXT:    [[REM:%.*]] = and i64 [[SHL1]], [[MASK]]
+; CHECK-NEXT:    ret i64 [[REM]]
+;
+  %neg = sub i64 0, %arg
+  %x = and i64 %neg, %arg
+  %shl1 = shl i64 %x, 4
+  %shl2 = lshr i64 %x, 3
+  %mask = add i64 %shl2, -1
+  %rem = and i64 %shl1, %mask
+  ret i64 %rem
+}
+
 declare i64 @llvm.vscale.i64()
 
 attributes #0 = { vscale_range(1,16) }