[flang-commits] [flang] [libc] [llvm] [clang-tools-extra] [clang] [lldb] [libcxx] [compiler-rt] [InstCombine] Fold converted urem to 0 if there's no overlapping bits (PR #71528)

Wed Nov 8 05:27:49 PST 2023

https://github.com/huntergr-arm updated https://github.com/llvm/llvm-project/pull/71528

>From 754519ad9b37343c827504e7d6bfcfa590f69483 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Fri, 3 Nov 2023 14:22:57 +0000
Subject: [PATCH] [InstCombine] Fold converted urem to 0 if there's no
 overlapping bits

When folding urem instructions we can end up not recognizing that
the output will always be 0 due to Value*s being different, despite
generating the same data (in this case, 2 different calls to vscale).

This patch recognizes the (x << N) & (add (x << M), -1) pattern that
instcombine replaces urem with after the two vscale calls have been
reduced to one via CSE, then replaces with 0 when x is a non-zero
power of 2 and N >= M.
---
 .../InstCombine/InstCombineAndOrXor.cpp       | 10 ++++
 .../InstCombine/po2-shift-add-and-to-zero.ll  | 52 +++++++++++++++++++
 2 files changed, 62 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/po2-shift-add-and-to-zero.ll

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 46af9bf5eed003a..da38f8039dbc3ca 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2662,6 +2662,16 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
   if (sinkNotIntoOtherHandOfLogicalOp(I))
     return &I;
 
+  // (x << N) & (add (x << M), -1) --> 0, where x is known to be a non-zero
+  // power of 2 and M <= N.
+  const APInt *Shift1, *Shift2;
+  if (match(&I, m_c_And(m_OneUse(m_Shl(m_Value(X), m_APInt(Shift1))),
+                        m_OneUse(m_Add(m_Shl(m_Value(Y), m_APInt(Shift2)),
+                                       m_AllOnes())))) &&
+      X == Y && isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, 0, &I) &&
+      Shift1->uge(*Shift2))
+    return replaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
   // An and recurrence w/loop invariant step is equivelent to (and start, step)
   PHINode *PN = nullptr;
   Value *Start = nullptr, *Step = nullptr;
diff --git a/llvm/test/Transforms/InstCombine/po2-shift-add-and-to-zero.ll b/llvm/test/Transforms/InstCombine/po2-shift-add-and-to-zero.ll
new file mode 100644
index 000000000000000..4979e7a01972299
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/po2-shift-add-and-to-zero.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -mtriple unknown -passes=instcombine -S < %s | FileCheck %s
+
+;; The and X, (add Y, -1) pattern is from an earlier instcombine pass which
+;; converted
+
+;; define dso_local i64 @f1() local_unnamed_addr #0 {
+;; entry:
+;;   %0 = call i64 @llvm.aarch64.sve.cntb(i32 31)
+;;   %1 = call i64 @llvm.aarch64.sve.cnth(i32 31)
+;;   %rem = urem i64 %0, %1
+;;   ret i64 %rem
+;; }
+
+;; into
+
+;; define dso_local i64 @f1() local_unnamed_addr #0 {
+;; entry:
+;;   %0 = call i64 @llvm.vscale.i64()
+;;   %1 = shl nuw nsw i64 %0, 4
+;;   %2 = call i64 @llvm.vscale.i64()
+;;   %3 = shl nuw nsw i64 %2, 3
+;;   %4 = add nsw i64 %3, -1
+;;   %rem = and i64 %1, %4
+;;   ret i64 %rem
+;; }
+
+;; InstCombine would have folded the original to returning 0 if the vscale
+;; calls were the same Value*, but since there's two of them it doesn't
+;; work and we convert the urem to add/and. CSE then gets rid of the extra
+;; vscale, leaving us with a new pattern to match. This only works because
+;; vscale is known to be a nonzero power of 2 (assuming there's a defined
+;; range for it).
+
+define dso_local i64 @f1() local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local i64 @f1
+; CHECK-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i64 0
+;
+entry:
+  %0 = call i64 @llvm.vscale.i64()
+  %1 = shl nuw nsw i64 %0, 4
+  %2 = shl nuw nsw i64 %0, 3
+  %3 = add nsw i64 %2, -1
+  %rem = and i64 %1, %3
+  ret i64 %rem
+}
+
+declare i64 @llvm.vscale.i64()
+
+attributes #0 = { vscale_range(1,16) }