[llvm] ConstraintSystem: replace data structure with Matrix (PR #98895)

Ramkumar Ramachandra via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 17 13:57:56 PDT 2024


https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/98895

>From ef1851a4daa5a9b48cdff9bac12a116c6518d39b Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 10 Jul 2024 18:23:08 +0100
Subject: [PATCH 01/10] ADT/Matrix: two-dimensional Container with View

Leverage the excellent SmallVector infrastructure to write a
two-dimensional container, along with a View that abstracts out
indexing-arithmetic, eliminating memory operations on the underlying
storage. The immediate applicability of Matrix is to replace uses of the
vector-of-vectors idiom, with one caveat: an upper bound on the number
of columns should be known ahead of time.
---
 llvm/include/llvm/ADT/ArrayRef.h  |   3 +
 llvm/include/llvm/ADT/Matrix.h    | 339 ++++++++++++++++++++++++++++++
 llvm/unittests/ADT/CMakeLists.txt |   1 +
 llvm/unittests/ADT/MatrixTest.cpp | 325 ++++++++++++++++++++++++++++
 4 files changed, 668 insertions(+)
 create mode 100644 llvm/include/llvm/ADT/Matrix.h
 create mode 100644 llvm/unittests/ADT/MatrixTest.cpp

diff --git a/llvm/include/llvm/ADT/ArrayRef.h b/llvm/include/llvm/ADT/ArrayRef.h
index 1c6799f1c56ed..cc7f89ceec462 100644
--- a/llvm/include/llvm/ADT/ArrayRef.h
+++ b/llvm/include/llvm/ADT/ArrayRef.h
@@ -25,6 +25,7 @@
 
 namespace llvm {
   template<typename T> class [[nodiscard]] MutableArrayRef;
+  template <typename T> struct [[nodiscard]] MutableRowView;
 
   /// ArrayRef - Represent a constant reference to an array (0 or more elements
   /// consecutively in memory), i.e. a start pointer and a length.  It allows
@@ -59,6 +60,8 @@ namespace llvm {
     /// The number of elements.
     size_type Length = 0;
 
+    friend MutableRowView<T>;
+
   public:
     /// @name Constructors
     /// @{
diff --git a/llvm/include/llvm/ADT/Matrix.h b/llvm/include/llvm/ADT/Matrix.h
new file mode 100644
index 0000000000000..52587352862f9
--- /dev/null
+++ b/llvm/include/llvm/ADT/Matrix.h
@@ -0,0 +1,339 @@
+//===- Matrix.h - Two-dimensional Container with View -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_MATRIX_H
+#define LLVM_ADT_MATRIX_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+template <typename T, size_t M, size_t NStorageInline> class JaggedArrayView;
+
+/// Due to the SmallVector infrastructure using SmallVectorAlignmentAndOffset
+/// that depends on the exact data layout, no derived classes can have extra
+/// members.
+template <typename T, size_t N>
+struct MatrixStorageBase : public SmallVectorImpl<T>, SmallVectorStorage<T, N> {
+  MatrixStorageBase() : SmallVectorImpl<T>(N) {}
+  MatrixStorageBase(size_t Size) : SmallVectorImpl<T>(N) { resize(Size); }
+  ~MatrixStorageBase() { destroy_range(this->begin(), this->end()); }
+  MatrixStorageBase(const MatrixStorageBase &RHS) : SmallVectorImpl<T>(N) {
+    if (!RHS.empty())
+      SmallVectorImpl<T>::operator=(RHS);
+  }
+  MatrixStorageBase(MatrixStorageBase &&RHS) : SmallVectorImpl<T>(N) {
+    if (!RHS.empty())
+      SmallVectorImpl<T>::operator=(::std::move(RHS));
+  }
+  using SmallVectorImpl<T>::size;
+  using SmallVectorImpl<T>::resize;
+  using SmallVectorImpl<T>::append;
+  using SmallVectorImpl<T>::erase;
+  using SmallVectorImpl<T>::destroy_range;
+
+  T *begin() const { return const_cast<T *>(SmallVectorImpl<T>::begin()); }
+  T *end() const { return const_cast<T *>(SmallVectorImpl<T>::end()); }
+};
+
+/// A two-dimensional container storage, whose upper bound on the number of
+/// columns should be known ahead of time. Not menat to be used directly: the
+/// primary usage API is MatrixView.
+template <typename T,
+          size_t N = CalculateSmallVectorDefaultInlinedElements<T>::value>
+class MatrixStorage {
+public:
+  MatrixStorage() = delete;
+  MatrixStorage(size_t NRows, size_t NCols)
+      : Base(NRows * NCols), NCols(NCols) {}
+  MatrixStorage(size_t NCols) : Base(), NCols(NCols) {}
+
+  size_t size() const { return Base.size(); }
+  bool empty() const { return !size(); }
+  size_t getNumRows() const { return size() / NCols; }
+  size_t getNumCols() const { return NCols; }
+  void setNumCols(size_t NCols) {
+    assert(empty() && "Column-resizing a non-empty MatrixStorage");
+    this->NCols = NCols;
+  }
+  void resize(size_t NRows) { Base.resize(NCols * NRows); }
+
+protected:
+  template <typename U, size_t M, size_t NStorageInline>
+  friend class JaggedArrayView;
+
+  T *begin() const { return Base.begin(); }
+  T *rowFromIdx(size_t RowIdx, size_t Offset = 0) const {
+    return begin() + RowIdx * NCols + Offset;
+  }
+  std::pair<size_t, size_t> idxFromRow(T *Ptr) const {
+    assert(Ptr >= begin() && "Internal error");
+    size_t Offset = (Ptr - begin()) % NCols;
+    return {(Ptr - begin()) / NCols, Offset};
+  }
+
+  // If Arg.size() < NCols, the number of columns won't be changed, and the
+  // difference is default-constructed.
+  void addRow(const SmallVectorImpl<T> &Arg) {
+    assert(Arg.size() <= NCols &&
+           "MatrixStorage has insufficient number of columns");
+    size_t Diff = NCols - Arg.size();
+    Base.append(Arg.begin(), Arg.end());
+    Base.append(Diff, T());
+  }
+
+private:
+  MatrixStorageBase<T, N> Base;
+  size_t NCols;
+};
+
+/// MutableArrayRef with a copy-assign, and extra APIs.
+template <typename T>
+struct [[nodiscard]] MutableRowView : public MutableArrayRef<T> {
+  using pointer = typename MutableArrayRef<T>::pointer;
+  using iterator = typename MutableArrayRef<T>::iterator;
+  using const_iterator = typename MutableArrayRef<T>::const_iterator;
+
+  MutableRowView() = delete;
+  MutableRowView(pointer Data, size_t Length)
+      : MutableArrayRef<T>(Data, Length) {}
+  MutableRowView(iterator Begin, iterator End)
+      : MutableArrayRef<T>(Begin, End) {}
+  MutableRowView(const_iterator Begin, const_iterator End)
+      : MutableArrayRef<T>(Begin, End) {}
+  MutableRowView(MutableArrayRef<T> Other)
+      : MutableArrayRef<T>(Other.data(), Other.size()) {}
+  MutableRowView(const SmallVectorImpl<T> &Vec) : MutableArrayRef<T>(Vec) {}
+
+  using MutableArrayRef<T>::size;
+  using MutableArrayRef<T>::data;
+
+  T &back() const { return MutableArrayRef<T>::back(); }
+  T &front() const { return MutableArrayRef<T>::front(); }
+  MutableRowView<T> drop_back(size_t N = 1) const { // NOLINT
+    return MutableArrayRef<T>::drop_back(N);
+  }
+  MutableRowView<T> drop_front(size_t N = 1) const { // NOLINT
+    return MutableArrayRef<T>::drop_front(N);
+  }
+  // This slice is different from the MutableArrayRef slice, and specifies a
+  // Begin and End index, instead of a Begin and Length.
+  MutableRowView<T> slice(size_t Begin, size_t End) {
+    return MutableArrayRef<T>::slice(Begin, End - Begin);
+  }
+  void pop_back(size_t N = 1) { // NOLINT
+    this->Length -= N;
+  }
+  void pop_front(size_t N = 1) { // NOLINT
+    this->Data += N;
+    this->Length -= N;
+  }
+
+  MutableRowView &operator=(const SmallVectorImpl<T> &Vec) {
+    copy_assign(Vec.begin(), Vec.end());
+    return *this;
+  }
+  MutableRowView &operator=(std::initializer_list<T> IL) {
+    copy_assign(IL.begin(), IL.end());
+    return *this;
+  }
+
+  void swap(MutableRowView<T> &Other) {
+    std::swap(this->Data, Other.Data);
+    std::swap(this->Length, Other.Length);
+  }
+
+protected:
+  void copy_assign(iterator Begin, iterator End) { // NOLINT
+    std::uninitialized_copy(Begin, End, data());
+    this->Length = End - Begin;
+  }
+  void copy_assign(const_iterator Begin, const_iterator End) { // NOLINT
+    std::uninitialized_copy(Begin, End, data());
+    this->Length = End - Begin;
+  }
+};
+
+/// The primary usage API of MatrixStorage. Abstracts out indexing-arithmetic,
+/// eliminating memory operations on the underlying data. Supports
+/// variable-length columns.
+template <typename T,
+          size_t N = CalculateSmallVectorDefaultInlinedElements<T>::value,
+          size_t NStorageInline =
+              CalculateSmallVectorDefaultInlinedElements<T>::value>
+class [[nodiscard]] JaggedArrayView {
+public:
+  using row_type = MutableRowView<T>;
+  using container_type = SmallVector<row_type, N>;
+  using iterator = typename container_type::iterator;
+  using const_iterator = typename container_type::const_iterator;
+
+  constexpr JaggedArrayView(MatrixStorage<T, NStorageInline> &Mat,
+                            size_t RowSpan, size_t ColSpan)
+      : Mat(Mat) {
+    RowView.reserve(RowSpan);
+    for (size_t RowIdx = 0; RowIdx < RowSpan; ++RowIdx) {
+      auto RangeBegin = Mat.begin() + RowIdx * ColSpan;
+      RowView.emplace_back(RangeBegin, RangeBegin + ColSpan);
+    }
+  }
+
+  // Constructor with a full View of the underlying MatrixStorage, if
+  // MatrixStorage has a non-zero number of Columns. Otherwise, creates an empty
+  // view.
+  constexpr JaggedArrayView(MatrixStorage<T, NStorageInline> &Mat)
+      : JaggedArrayView(Mat, Mat.getNumRows(), Mat.getNumCols()) {}
+
+  // Obvious copy-construator is deleted, since the underlying storage could
+  // have changed.
+  constexpr JaggedArrayView(const JaggedArrayView &) = delete;
+
+  // Copy-assignment operator should not be used when the underlying storage
+  // changes.
+  constexpr JaggedArrayView &operator=(const JaggedArrayView &Other) {
+    assert(Mat.begin() == Other.Mat.begin() &&
+           "Underlying storage has changed: use custom copy-constructor");
+    RowView = Other.RowView;
+    return *this;
+  }
+
+  // The actual copy-constructor: to be used when the underlying storage is
+  // copy-constructed.
+  JaggedArrayView(const JaggedArrayView &OldView,
+                  MatrixStorage<T, NStorageInline> &NewMat)
+      : Mat(NewMat) {
+    assert(OldView.Mat.size() == Mat.size() &&
+           "Custom copy-constructor called on non-copied storage");
+
+    // The underlying storage will change. Construct a new RowView by performing
+    // pointer-arithmetic on the underlying storage of OldView, using pointers
+    // from OldVie.
+    for (const auto &R : OldView.RowView) {
+      auto [StorageIdx, StartOffset] = OldView.Mat.idxFromRow(R.data());
+      RowView.emplace_back(Mat.rowFromIdx(StorageIdx, StartOffset), R.size());
+    }
+  }
+
+  void addRow(const SmallVectorImpl<T> &Row) {
+    // The underlying storage may be resized, performing reallocations. The
+    // pointers in RowView will no longer be valid, so save and restore the
+    // data. Construct RestoreData by performing pointer-arithmetic on the
+    // underlying storgge.
+    SmallVector<std::tuple<size_t, size_t, size_t>> RestoreData;
+    RestoreData.reserve(RowView.size());
+    for (const auto &R : RowView) {
+      auto [StorageIdx, StartOffset] = Mat.idxFromRow(R.data());
+      RestoreData.emplace_back(StorageIdx, StartOffset, R.size());
+    }
+
+    Mat.addRow(Row);
+
+    // Restore the RowView by performing pointer-arithmetic on the
+    // possibly-reallocated storage, using information from RestoreData.
+    RowView.clear();
+    for (const auto &[StorageIdx, StartOffset, Len] : RestoreData)
+      RowView.emplace_back(Mat.rowFromIdx(StorageIdx, StartOffset), Len);
+
+    // Finally, add the new row to the VRowView.
+    RowView.emplace_back(Mat.rowFromIdx(Mat.getNumRows() - 1), Row.size());
+  }
+
+  // To support addRow(View[Idx]).
+  void addRow(const row_type &Row) { addRow(SmallVector<T>{Row}); }
+
+  void addRow(std::initializer_list<T> Row) { addRow(SmallVector<T>{Row}); }
+
+  constexpr row_type &operator[](size_t RowIdx) {
+    assert(RowIdx < RowView.size() && "Indexing out of bounds");
+    return RowView[RowIdx];
+  }
+
+  constexpr T *data() const {
+    assert(!empty() && "Non-empty view expected");
+    return RowView.front().data();
+  }
+  size_t size() const { return getRowSpan() * getMaxColSpan(); }
+  bool empty() const { return RowView.empty(); }
+  size_t getRowSpan() const { return RowView.size(); }
+  size_t getColSpan(size_t RowIdx) const {
+    assert(RowIdx < RowView.size() && "Indexing out of bounds");
+    return RowView[RowIdx].size();
+  }
+  constexpr size_t getMaxColSpan() const {
+    return std::max_element(RowView.begin(), RowView.end(),
+                            [](const row_type &RowA, const row_type &RowB) {
+                              return RowA.size() < RowB.size();
+                            })
+        ->size();
+  }
+
+  iterator begin() { return RowView.begin(); }
+  iterator end() { return RowView.end(); }
+  const_iterator begin() const { return RowView.begin(); }
+  const_iterator end() const { return RowView.end(); }
+
+  constexpr JaggedArrayView<T, N, NStorageInline> rowSlice(size_t Begin,
+                                                           size_t End) {
+    assert(Begin < getRowSpan() && End <= getRowSpan() &&
+           "Indexing out of bounds");
+    assert(Begin < End && "Invalid slice");
+    container_type NewRowView;
+    for (size_t RowIdx = Begin; RowIdx < End; ++RowIdx)
+      NewRowView.emplace_back(RowView[RowIdx]);
+    return {Mat, std::move(NewRowView)};
+  }
+
+  constexpr JaggedArrayView<T, N, NStorageInline> colSlice(size_t Begin,
+                                                           size_t End) {
+    assert(Begin < End && "Invalid slice");
+    size_t MinColSpan =
+        std::min_element(RowView.begin(), RowView.end(),
+                         [](const row_type &RowA, const row_type &RowB) {
+                           return RowA.size() < RowB.size();
+                         })
+            ->size();
+    assert(Begin < MinColSpan && End <= MinColSpan && "Indexing out of bounds");
+    container_type NewRowView;
+    for (row_type Row : RowView)
+      NewRowView.emplace_back(Row.slice(Begin, End));
+    return {Mat, std::move(NewRowView)};
+  }
+
+  row_type &lastRow() {
+    assert(!empty() && "Non-empty view expected");
+    return RowView.back();
+  }
+  const row_type &lastRow() const {
+    assert(!empty() && "Non-empty view expected");
+    return RowView.back();
+  }
+  void dropLastRow() {
+    assert(!empty() && "Non-empty view expected");
+    RowView.pop_back();
+  }
+
+protected:
+  // Helper constructor.
+  constexpr JaggedArrayView(MatrixStorage<T, NStorageInline> &Mat,
+                            SmallVectorImpl<row_type> &&RowView)
+      : Mat(Mat), RowView(std::move(RowView)) {}
+
+private:
+  MatrixStorage<T, NStorageInline> &Mat;
+  container_type RowView;
+};
+} // namespace llvm
+
+namespace std {
+template <typename T>
+inline void swap(llvm::MutableRowView<T> &LHS, llvm::MutableRowView<T> &RHS) {
+  LHS.swap(RHS);
+}
+} // end namespace std
+
+#endif
diff --git a/llvm/unittests/ADT/CMakeLists.txt b/llvm/unittests/ADT/CMakeLists.txt
index f48d840a10595..cd04778e86e2d 100644
--- a/llvm/unittests/ADT/CMakeLists.txt
+++ b/llvm/unittests/ADT/CMakeLists.txt
@@ -54,6 +54,7 @@ add_llvm_unittest(ADTTests
   LazyAtomicPointerTest.cpp
   MappedIteratorTest.cpp
   MapVectorTest.cpp
+  MatrixTest.cpp
   PackedVectorTest.cpp
   PagedVectorTest.cpp
   PointerEmbeddedIntTest.cpp
diff --git a/llvm/unittests/ADT/MatrixTest.cpp b/llvm/unittests/ADT/MatrixTest.cpp
new file mode 100644
index 0000000000000..2f6843c0615a1
--- /dev/null
+++ b/llvm/unittests/ADT/MatrixTest.cpp
@@ -0,0 +1,325 @@
+#include "llvm/ADT/Matrix.h"
+#include "llvm/ADT/DynamicAPInt.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+template <typename T> static SmallVector<T> getDummyValues(size_t NValues) {
+  SmallVector<T> Ret(NValues);
+  for (size_t Idx = 0; Idx < NValues; ++Idx)
+    Ret[Idx] = T(Idx + 1);
+  return Ret;
+}
+
+template <typename T> class MatrixTest : public testing::Test {
+protected:
+  MatrixTest()
+      : ColumnInitMatrix(8), SmallMatrix(2, 2), OtherSmall(3, 3),
+        LargeMatrix(16, 16) {}
+  MatrixStorage<T, 16> ColumnInitMatrix;
+  MatrixStorage<T> SmallMatrix;
+  MatrixStorage<T> OtherSmall;
+  MatrixStorage<T> LargeMatrix;
+  SmallVector<T> getDummyRow(size_t NValues) {
+    return getDummyValues<T>(NValues);
+  }
+};
+
+using MatrixTestTypes = ::testing::Types<int64_t, DynamicAPInt>;
+TYPED_TEST_SUITE(MatrixTest, MatrixTestTypes, );
+
+TYPED_TEST(MatrixTest, Construction) {
+  auto &E = this->ColumnInitMatrix;
+  ASSERT_TRUE(E.empty());
+  EXPECT_EQ(E.getNumCols(), 8u);
+  E.setNumCols(3);
+  EXPECT_EQ(E.getNumCols(), 3u);
+  EXPECT_TRUE(E.empty());
+  EXPECT_EQ(E.getNumRows(), 0u);
+  auto &M = this->SmallMatrix;
+  EXPECT_FALSE(M.empty());
+  EXPECT_EQ(M.size(), 4u);
+  EXPECT_EQ(M.getNumRows(), 2u);
+  EXPECT_EQ(M.getNumCols(), 2u);
+}
+
+TYPED_TEST(MatrixTest, CopyConstruction) {
+  auto &OldMat = this->SmallMatrix;
+  auto V = JaggedArrayView<TypeParam>{OldMat};
+  V[0] = this->getDummyRow(2);
+  V[0].pop_back();
+  V[1] = this->getDummyRow(2);
+  V[1].pop_front();
+  ASSERT_EQ(V.getRowSpan(), 2u);
+  ASSERT_EQ(V.getColSpan(0), 1u);
+  ASSERT_EQ(V.getColSpan(1), 1u);
+  EXPECT_EQ(V[0][0], 1);
+  EXPECT_EQ(V[1][0], 2);
+  EXPECT_EQ(JaggedArrayView<TypeParam>{OldMat}[0][0], 1);
+  EXPECT_EQ(JaggedArrayView<TypeParam>{OldMat}[0][1], 2);
+  EXPECT_EQ(JaggedArrayView<TypeParam>{OldMat}[1][0], 1);
+  EXPECT_EQ(JaggedArrayView<TypeParam>{OldMat}[1][1], 2);
+  MatrixStorage<TypeParam> NewMat{OldMat};
+  JaggedArrayView<TypeParam> C{V, NewMat};
+  ASSERT_EQ(C.getRowSpan(), 2u);
+  ASSERT_EQ(C.getColSpan(0), 1u);
+  ASSERT_EQ(C.getColSpan(1), 1u);
+  EXPECT_EQ(C[0][0], 1);
+  EXPECT_EQ(C[1][0], 2);
+  C.addRow(this->getDummyRow(2));
+  EXPECT_EQ(C[2][0], 1);
+  EXPECT_EQ(C[2][1], 2);
+}
+
+TYPED_TEST(MatrixTest, RowOps) {
+  auto &M = this->SmallMatrix;
+  auto &O = this->OtherSmall;
+  JaggedArrayView<TypeParam> V{M};
+  ASSERT_EQ(M.getNumRows(), 2u);
+  ASSERT_EQ(V.getRowSpan(), 2u);
+  V[0] = this->getDummyRow(2);
+  V = JaggedArrayView<TypeParam>{M};
+  EXPECT_EQ(V[0][0], 1);
+  EXPECT_EQ(V[0][1], 2);
+  ASSERT_EQ(M.getNumRows(), 2u);
+  V.addRow({TypeParam(4), TypeParam(5)});
+  ASSERT_EQ(M.getNumRows(), 3u);
+  EXPECT_EQ(V[2][0], 4);
+  EXPECT_EQ(V[2][1], 5);
+  ASSERT_EQ(O.getNumRows(), 3u);
+  JaggedArrayView<TypeParam> W{O};
+  W.addRow(V[0]);
+  ASSERT_EQ(O.getNumCols(), 3u);
+  ASSERT_EQ(O.getNumRows(), 4u);
+  W[3] = {TypeParam(7), TypeParam(8)};
+  auto &WRow3 = W[3];
+  WRow3 = WRow3.drop_back();
+  ASSERT_EQ(WRow3[0], 7);
+  WRow3 = WRow3.drop_back();
+  EXPECT_TRUE(WRow3.empty());
+  ASSERT_EQ(W.getColSpan(3), 0u);
+  EXPECT_EQ(W[0][2], 0);
+  EXPECT_EQ(W[1][2], 0);
+  EXPECT_EQ(W[2][2], 0);
+  W = JaggedArrayView<TypeParam>{O};
+  EXPECT_EQ(W[0][2], 0);
+  EXPECT_EQ(W[1][2], 0);
+  EXPECT_EQ(W[2][2], 0);
+  EXPECT_EQ(W[3][2], 0);
+}
+
+TYPED_TEST(MatrixTest, ResizeAssign) {
+  auto &M = this->SmallMatrix;
+  M.resize(3);
+  ASSERT_EQ(M.getNumRows(), 3u);
+  JaggedArrayView<TypeParam> V{M};
+  V[0] = this->getDummyRow(2);
+  V[1] = this->getDummyRow(2);
+  V[2] = this->getDummyRow(2);
+  ASSERT_EQ(M.getNumRows(), 3u);
+  EXPECT_EQ(V[0][0], V[1][0]);
+  EXPECT_EQ(V[2][1], V[1][1]);
+}
+
+TYPED_TEST(MatrixTest, ColSlice) {
+  auto &M = this->OtherSmall;
+  JaggedArrayView<TypeParam> V{M};
+  V[0] = {TypeParam(3), TypeParam(7)};
+  V[1] = {TypeParam(4), TypeParam(5)};
+  V[2] = {TypeParam(8), TypeParam(9)};
+  auto W = V.colSlice(1, 2);
+  ASSERT_EQ(W.getRowSpan(), 3u);
+  ASSERT_EQ(W.getColSpan(0), 1u);
+  EXPECT_EQ(V[0][0], 3);
+  EXPECT_EQ(V[0][1], 7);
+  EXPECT_EQ(V[1][0], 4);
+  EXPECT_EQ(V[1][1], 5);
+  EXPECT_EQ(V[2][0], 8);
+  EXPECT_EQ(V[2][1], 9);
+  EXPECT_EQ(W[0][0], 7);
+  EXPECT_EQ(W[1][0], 5);
+  EXPECT_EQ(W[2][0], 9);
+}
+
+TYPED_TEST(MatrixTest, RowColSlice) {
+  auto &M = this->OtherSmall;
+  JaggedArrayView<TypeParam> V{M};
+  V[0] = {TypeParam(3), TypeParam(7)};
+  V[2] = {TypeParam(4), TypeParam(5)};
+  auto W = V.rowSlice(0, 2).colSlice(0, 2);
+  ASSERT_EQ(W.getRowSpan(), 2u);
+  ASSERT_EQ(W.getColSpan(0), 2u);
+  EXPECT_EQ(V[0][0], 3);
+  EXPECT_EQ(V[0][1], 7);
+  EXPECT_EQ(V[1][0], 0);
+  EXPECT_EQ(V[1][1], 0);
+  EXPECT_EQ(V[2][0], 4);
+  EXPECT_EQ(V[2][1], 5);
+  EXPECT_EQ(W[0][0], 3);
+  EXPECT_EQ(W[0][1], 7);
+  EXPECT_EQ(W[1][0], 0);
+  EXPECT_EQ(W[1][1], 0);
+}
+
+TYPED_TEST(MatrixTest, LastRowOps) {
+  auto &M = this->SmallMatrix;
+  JaggedArrayView<TypeParam> V{M};
+  V[0] = {TypeParam(3), TypeParam(7)};
+  V[1] = {TypeParam(4), TypeParam(5)};
+  V.dropLastRow();
+  ASSERT_EQ(M.size(), 4u);
+  ASSERT_EQ(V.size(), 2u);
+  auto W = V.lastRow();
+  ASSERT_EQ(W.size(), 2u);
+  EXPECT_EQ(W[0], 3);
+  EXPECT_EQ(W[1], 7);
+  V.lastRow() = {TypeParam(1), TypeParam(2)};
+  EXPECT_EQ(V.lastRow()[0], 1);
+  EXPECT_EQ(V.lastRow()[1], 2);
+  V.dropLastRow();
+  EXPECT_TRUE(V.empty());
+}
+
+TYPED_TEST(MatrixTest, Swap) {
+  auto &M = this->SmallMatrix;
+  JaggedArrayView<TypeParam> V{M};
+  V[0] = {TypeParam(3), TypeParam(7)};
+  V[1] = {TypeParam(4), TypeParam(5)};
+  std::swap(V[0], V[1]);
+  EXPECT_EQ(V.lastRow()[0], 3);
+  EXPECT_EQ(V.lastRow()[1], 7);
+  EXPECT_EQ(V[0][0], 4);
+  EXPECT_EQ(V[0][1], 5);
+  EXPECT_EQ(V[1][0], 3);
+  EXPECT_EQ(V[1][1], 7);
+  auto W = JaggedArrayView<TypeParam>{M};
+  EXPECT_EQ(W[0][0], 3);
+  EXPECT_EQ(W[0][1], 7);
+  EXPECT_EQ(W[1][0], 4);
+  EXPECT_EQ(W[1][1], 5);
+}
+
+TYPED_TEST(MatrixTest, DropLastRow) {
+  auto &M = this->OtherSmall;
+  auto V = JaggedArrayView<TypeParam>{M};
+  ASSERT_EQ(V.getRowSpan(), 3u);
+  V.dropLastRow();
+  ASSERT_EQ(V.getRowSpan(), 2u);
+  V[0] = {TypeParam(19), TypeParam(7), TypeParam(3)};
+  V[1] = {TypeParam(19), TypeParam(7), TypeParam(3)};
+  ASSERT_EQ(V.getColSpan(1), 3u);
+  V.addRow(V.lastRow());
+  ASSERT_EQ(V.getRowSpan(), 3u);
+  ASSERT_EQ(V.getColSpan(2), 3u);
+  EXPECT_EQ(V[1], V.lastRow());
+  V.dropLastRow();
+  ASSERT_EQ(V.getRowSpan(), 2u);
+  EXPECT_EQ(V[0], V.lastRow());
+  V.addRow(this->getDummyRow(3));
+  V.dropLastRow();
+  V.addRow(this->getDummyRow(3));
+  V.dropLastRow();
+  ASSERT_EQ(V.getRowSpan(), 2u);
+  EXPECT_EQ(V[0], V.lastRow());
+  V.dropLastRow();
+  V.dropLastRow();
+  ASSERT_TRUE(V.empty());
+  V.addRow({TypeParam(9), TypeParam(7), TypeParam(3)});
+  V.addRow(this->getDummyRow(3));
+  ASSERT_EQ(V.getRowSpan(), 2u);
+  EXPECT_EQ(V.lastRow()[0], 1);
+  EXPECT_EQ(V.lastRow()[1], 2);
+  EXPECT_EQ(V.lastRow()[2], 3);
+  EXPECT_EQ(V[0][0], 9);
+  EXPECT_EQ(V[0][1], 7);
+  EXPECT_EQ(V[0][2], 3);
+  V.dropLastRow();
+  V.addRow({TypeParam(21), TypeParam(22), TypeParam(23)});
+  ASSERT_EQ(V.getRowSpan(), 2u);
+  EXPECT_EQ(V.lastRow()[0], 21);
+  EXPECT_EQ(V.lastRow()[1], 22);
+  EXPECT_EQ(V.lastRow()[2], 23);
+}
+
+TYPED_TEST(MatrixTest, Iteration) {
+  auto &M = this->SmallMatrix;
+  M.resize(2);
+  JaggedArrayView<TypeParam> V{M};
+  for (const auto &[RowIdx, Row] : enumerate(V)) {
+    V[RowIdx] = this->getDummyRow(2);
+    for (const auto &[ColIdx, Col] : enumerate(Row)) {
+      EXPECT_GT(V[RowIdx][ColIdx], 0);
+    }
+  }
+}
+
+TYPED_TEST(MatrixTest, VariableLengthColumns) {
+  auto &M = this->ColumnInitMatrix;
+  JaggedArrayView<TypeParam, 8, 16> V{M};
+  ASSERT_EQ(V.empty(), true);
+  size_t NumCols = 6;
+  size_t NumRows = 3;
+  SmallVector<TypeParam> ColumnVec;
+  for (size_t Var = 0; Var < NumCols; ++Var)
+    ColumnVec.emplace_back(Var + 1);
+  ASSERT_EQ(ColumnVec.size(), NumCols);
+  for (size_t RowIdx = 0; RowIdx < NumRows; ++RowIdx)
+    V.addRow(ColumnVec);
+  ASSERT_EQ(V.getMaxColSpan(), NumCols);
+  V.addRow({TypeParam(19)});
+  ASSERT_EQ(V.getColSpan(NumRows), 1u);
+  V.dropLastRow();
+  V.addRow(V.lastRow());
+  ASSERT_EQ(V.getColSpan(NumRows), NumCols);
+  EXPECT_EQ(V[NumRows - 1], V[NumRows]);
+  V.dropLastRow();
+  ASSERT_EQ(V.getRowSpan(), NumRows);
+  ASSERT_EQ(M.getNumCols(), 8u);
+  ASSERT_EQ(M.getNumRows(), NumRows + 2);
+  V.dropLastRow();
+  ASSERT_EQ(V.getRowSpan(), NumRows - 1);
+  ASSERT_EQ(M.getNumRows(), NumRows + 2);
+  ASSERT_EQ(V.getColSpan(1), NumCols);
+  V[1][0] = 19;
+  std::swap(V[0], V[1]);
+  V[0].pop_back();
+  V[1] = V[1].drop_back().drop_back();
+  ASSERT_EQ(V.getColSpan(0), NumCols - 1);
+  ASSERT_EQ(V.getColSpan(1), NumCols - 2);
+  EXPECT_EQ(V[0][0], 19);
+  EXPECT_EQ(V[1][0], 1);
+  ASSERT_EQ(V.getMaxColSpan(), NumCols - 1);
+  V = V.colSlice(0, 1).rowSlice(0, 2);
+  ASSERT_EQ(V.getColSpan(0), 1u);
+  ASSERT_EQ(V.getColSpan(1), 1u);
+  EXPECT_EQ(V.getMaxColSpan(), 1u);
+  ASSERT_EQ(V.getRowSpan(), 2u);
+  EXPECT_EQ(V[0][0], 19);
+  EXPECT_EQ(V[1][0], 1);
+  V.dropLastRow();
+  V.dropLastRow();
+  EXPECT_TRUE(V.empty());
+  EXPECT_EQ(M.size(), 40u);
+}
+
+TYPED_TEST(MatrixTest, LargeMatrixOps) {
+  auto &M = this->LargeMatrix;
+  ASSERT_EQ(M.getNumRows(), 16u);
+  ASSERT_EQ(M.getNumCols(), 16u);
+  JaggedArrayView<TypeParam, 16> V{M};
+  V[0] = {TypeParam(1),  TypeParam(2), TypeParam(1),  TypeParam(4),
+          TypeParam(5),  TypeParam(1), TypeParam(1),  TypeParam(8),
+          TypeParam(9),  TypeParam(1), TypeParam(1),  TypeParam(12),
+          TypeParam(13), TypeParam(1), TypeParam(15), TypeParam(1)};
+  V[1] = this->getDummyRow(16);
+  EXPECT_EQ(V[0][14], 15);
+  EXPECT_EQ(V[0][3], 4);
+  EXPECT_EQ(std::count(V[0].begin(), V[0].end(), 1), 8);
+  EXPECT_TRUE(
+      std::all_of(V[0].begin(), V[0].end(), [](auto &El) { return El > 0; }));
+  auto W = V.rowSlice(0, 1).colSlice(2, 4);
+  ASSERT_EQ(W.getRowSpan(), 1u);
+  ASSERT_EQ(W.getColSpan(0), 2u);
+  EXPECT_EQ(W[0][0], 1);
+  EXPECT_EQ(W[0][1], 4);
+}

>From f78bc6008c7dea8acce3a4171dd3ece15103419f Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Mon, 15 Jul 2024 12:34:01 +0100
Subject: [PATCH 02/10] ConstraintSystem: replace data structure with Matrix

ConstraintSystem currently stores constraints in a vector-of-vectors,
performing inefficient memory operations on it, as part of its
operation. Replace this data structure with the newly-minted Matrix,
using MatrixView to eliminate row-swaps and truncation of the underlying
storage.

Since Matrix requires knowing an upper bounds on the number of columns
ahead-of-time, add a cl::opt to ConstraintElimination upper-bounding
this, and change addVariableRowFill to not add constraints whose length
exceeds this upper bound.
---
 llvm/include/llvm/Analysis/ConstraintSystem.h | 66 +++++++++++++------
 llvm/lib/Analysis/ConstraintSystem.cpp        | 42 ++++++------
 .../Scalar/ConstraintElimination.cpp          | 10 ++-
 3 files changed, 76 insertions(+), 42 deletions(-)

diff --git a/llvm/include/llvm/Analysis/ConstraintSystem.h b/llvm/include/llvm/Analysis/ConstraintSystem.h
index 7b02b618f7cb4..6014b2cb64247 100644
--- a/llvm/include/llvm/Analysis/ConstraintSystem.h
+++ b/llvm/include/llvm/Analysis/ConstraintSystem.h
@@ -9,22 +9,19 @@
 #ifndef LLVM_ANALYSIS_CONSTRAINTSYSTEM_H
 #define LLVM_ANALYSIS_CONSTRAINTSYSTEM_H
 
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Matrix.h"
 #include "llvm/Support/MathExtras.h"
 
-#include <string>
-
 namespace llvm {
 
 class Value;
 class ConstraintSystem {
   struct Entry {
-    int64_t Coefficient;
-    uint16_t Id;
+    int64_t Coefficient = 0;
+    uint16_t Id = 0;
 
+    Entry() = default;
     Entry(int64_t Coefficient, uint16_t Id)
         : Coefficient(Coefficient), Id(Id) {}
   };
@@ -48,7 +45,10 @@ class ConstraintSystem {
   /// Current linear constraints in the system.
   /// An entry of the form c0, c1, ... cn represents the following constraint:
   ///   c0 >= v0 * c1 + .... + v{n-1} * cn
-  SmallVector<SmallVector<Entry, 8>, 4> Constraints;
+  MatrixStorage<Entry, 64> Constraints;
+
+  /// Constraints is only ever manipulated via this View.
+  JaggedArrayView<Entry, 16, 64> View;
 
   /// A map of variables (IR values) to their corresponding index in the
   /// constraint system.
@@ -64,18 +64,40 @@ class ConstraintSystem {
   SmallVector<std::string> getVarNamesList() const;
 
 public:
-  ConstraintSystem() {}
-  ConstraintSystem(ArrayRef<Value *> FunctionArgs) {
+  // The Matrix Constraints should always be initialized with an upper-bound
+  // number of columns. The default constructor hard-codes an upper-bound of 6,
+  // as it is only used in unit tests, and not in the actual
+  // ConstraintElimination Analysis.
+  ConstraintSystem() : Constraints(6), View(Constraints) {}
+
+  // This constructor is used by ConstraintElimination, inside ConstraintInfo.
+  // Unfortunately, due to calls to addFact, that adds local variables, it is
+  // impossible to know how many local variables there are in advance.
+  // ConstraintElimination has a fixed upper-bound on the number of columns,
+  // configurable as a cl::opt, so use that number, and don't add the constraint
+  // if it exceeds that number.
+  ConstraintSystem(ArrayRef<Value *> FunctionArgs, size_t NCols)
+      : Constraints(NCols), View(Constraints) {
     NumVariables += FunctionArgs.size();
     for (auto *Arg : FunctionArgs) {
       Value2Index.insert({Arg, Value2Index.size() + 1});
     }
   }
-  ConstraintSystem(const DenseMap<Value *, unsigned> &Value2Index)
-      : NumVariables(Value2Index.size()), Value2Index(Value2Index) {}
+
+  // This constructor is only used by the dump function in
+  // ConstraintElimination.
+  ConstraintSystem(const DenseMap<Value *, unsigned> &Value2Index,
+                   unsigned NVars)
+      : NumVariables(Value2Index.size()),
+        Constraints(std::max(Value2Index.size(), NVars)), View(Constraints),
+        Value2Index(Value2Index) {}
+
+  ConstraintSystem(const ConstraintSystem &Other)
+      : NumVariables(Other.NumVariables), Constraints(Other.Constraints),
+        View(Other.View, Constraints), Value2Index(Other.Value2Index) {}
 
   bool addVariableRow(ArrayRef<int64_t> R) {
-    assert(Constraints.empty() || R.size() == NumVariables);
+    assert(View.empty() || R.size() == NumVariables);
     // If all variable coefficients are 0, the constraint does not provide any
     // usable information.
     if (all_of(ArrayRef(R).drop_front(1), [](int64_t C) { return C == 0; }))
@@ -87,9 +109,10 @@ class ConstraintSystem {
         continue;
       NewRow.emplace_back(C, Idx);
     }
-    if (Constraints.empty())
+    if (View.empty())
       NumVariables = R.size();
-    Constraints.push_back(std::move(NewRow));
+
+    View.addRow(std::move(NewRow));
     return true;
   }
 
@@ -104,6 +127,11 @@ class ConstraintSystem {
     if (all_of(ArrayRef(R).drop_front(1), [](int64_t C) { return C == 0; }))
       return false;
 
+    // There is no correctness issue if we don't add a constraint, for whatever
+    // reason.
+    if (R.size() > Constraints.getNumCols())
+      return false;
+
     NumVariables = std::max(R.size(), NumVariables);
     return addVariableRow(R);
   }
@@ -145,21 +173,21 @@ class ConstraintSystem {
   bool isConditionImplied(SmallVector<int64_t, 8> R) const;
 
   SmallVector<int64_t> getLastConstraint() const {
-    assert(!Constraints.empty() && "Constraint system is empty");
+    assert(!View.empty() && "Constraint system is empty");
     SmallVector<int64_t> Result(NumVariables, 0);
-    for (auto &Entry : Constraints.back())
+    for (auto &Entry : View.lastRow())
       Result[Entry.Id] = Entry.Coefficient;
     return Result;
   }
 
-  void popLastConstraint() { Constraints.pop_back(); }
+  void popLastConstraint() { View.dropLastRow(); }
   void popLastNVariables(unsigned N) {
     assert(NumVariables > N);
     NumVariables -= N;
   }
 
   /// Returns the number of rows in the constraint system.
-  unsigned size() const { return Constraints.size(); }
+  unsigned size() const { return View.getRowSpan(); }
 
   /// Print the constraints in the system.
   void dump() const;
diff --git a/llvm/lib/Analysis/ConstraintSystem.cpp b/llvm/lib/Analysis/ConstraintSystem.cpp
index e4c9dcc7544e9..0b08a8ffeca2e 100644
--- a/llvm/lib/Analysis/ConstraintSystem.cpp
+++ b/llvm/lib/Analysis/ConstraintSystem.cpp
@@ -7,11 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ConstraintSystem.h"
+#include "llvm/ADT/Matrix.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
 
 #include <string>
 
@@ -26,37 +27,38 @@ bool ConstraintSystem::eliminateUsingFM() {
   //  analysis."
   // Supercomputing'91: Proceedings of the 1991 ACM/
   // IEEE conference on Supercomputing. IEEE, 1991.
-  assert(!Constraints.empty() &&
+  assert(!View.empty() &&
          "should only be called for non-empty constraint systems");
 
   unsigned LastIdx = NumVariables - 1;
 
   // First, either remove the variable in place if it is 0 or add the row to
   // RemainingRows and remove it from the system.
-  SmallVector<SmallVector<Entry, 8>, 4> RemainingRows;
-  for (unsigned R1 = 0; R1 < Constraints.size();) {
-    SmallVector<Entry, 8> &Row1 = Constraints[R1];
+  MatrixStorage<Entry, 32> RemainingRows(View.getMaxColSpan());
+  JaggedArrayView<Entry, 8, 32> RemainingRowsView{RemainingRows};
+  for (unsigned R1 = 0; R1 < View.getRowSpan();) {
+    auto &Row1 = View[R1];
     if (getLastCoefficient(Row1, LastIdx) == 0) {
       if (Row1.size() > 0 && Row1.back().Id == LastIdx)
         Row1.pop_back();
       R1++;
     } else {
-      std::swap(Constraints[R1], Constraints.back());
-      RemainingRows.push_back(std::move(Constraints.back()));
-      Constraints.pop_back();
+      std::swap(View[R1], View.lastRow());
+      RemainingRowsView.addRow(View.lastRow());
+      View.dropLastRow();
     }
   }
 
   // Process rows where the variable is != 0.
-  unsigned NumRemainingConstraints = RemainingRows.size();
+  unsigned NumRemainingConstraints = RemainingRowsView.getRowSpan();
   for (unsigned R1 = 0; R1 < NumRemainingConstraints; R1++) {
     // FIXME do not use copy
     for (unsigned R2 = R1 + 1; R2 < NumRemainingConstraints; R2++) {
       if (R1 == R2)
         continue;
 
-      int64_t UpperLast = getLastCoefficient(RemainingRows[R2], LastIdx);
-      int64_t LowerLast = getLastCoefficient(RemainingRows[R1], LastIdx);
+      int64_t UpperLast = getLastCoefficient(RemainingRowsView[R2], LastIdx);
+      int64_t LowerLast = getLastCoefficient(RemainingRowsView[R1], LastIdx);
       assert(
           UpperLast != 0 && LowerLast != 0 &&
           "RemainingRows should only contain rows where the variable is != 0");
@@ -74,8 +76,8 @@ bool ConstraintSystem::eliminateUsingFM() {
       SmallVector<Entry, 8> NR;
       unsigned IdxUpper = 0;
       unsigned IdxLower = 0;
-      auto &LowerRow = RemainingRows[LowerR];
-      auto &UpperRow = RemainingRows[UpperR];
+      auto &LowerRow = RemainingRowsView[LowerR];
+      auto &UpperRow = RemainingRowsView[UpperR];
       while (true) {
         if (IdxUpper >= UpperRow.size() || IdxLower >= LowerRow.size())
           break;
@@ -112,9 +114,9 @@ bool ConstraintSystem::eliminateUsingFM() {
       }
       if (NR.empty())
         continue;
-      Constraints.push_back(std::move(NR));
+      View.addRow(std::move(NR));
       // Give up if the new system gets too big.
-      if (Constraints.size() > 500)
+      if (size() > 500)
         return false;
     }
   }
@@ -124,15 +126,15 @@ bool ConstraintSystem::eliminateUsingFM() {
 }
 
 bool ConstraintSystem::mayHaveSolutionImpl() {
-  while (!Constraints.empty() && NumVariables > 1) {
+  while (!View.empty() && NumVariables > 1) {
     if (!eliminateUsingFM())
       return true;
   }
 
-  if (Constraints.empty() || NumVariables > 1)
+  if (View.empty() || NumVariables > 1)
     return true;
 
-  return all_of(Constraints, [](auto &R) {
+  return all_of(View, [](auto &R) {
     if (R.empty())
       return true;
     if (R[0].Id == 0)
@@ -158,10 +160,10 @@ SmallVector<std::string> ConstraintSystem::getVarNamesList() const {
 
 void ConstraintSystem::dump() const {
 #ifndef NDEBUG
-  if (Constraints.empty())
+  if (View.empty())
     return;
   SmallVector<std::string> Names = getVarNamesList();
-  for (const auto &Row : Constraints) {
+  for (const auto &Row : View) {
     SmallVector<std::string, 16> Parts;
     for (const Entry &E : Row) {
       if (E.Id >= NumVariables)
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index c31173879af1e..ff13302abc553 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -40,7 +40,6 @@
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 
-#include <cmath>
 #include <optional>
 #include <string>
 
@@ -57,6 +56,10 @@ static cl::opt<unsigned>
     MaxRows("constraint-elimination-max-rows", cl::init(500), cl::Hidden,
             cl::desc("Maximum number of rows to keep in constraint system"));
 
+static cl::opt<unsigned> MaxColumns(
+    "constraint-elimination-max-cols", cl::init(16), cl::Hidden,
+    cl::desc("Maximum number of columns to keep in constraint system"));
+
 static cl::opt<bool> DumpReproducers(
     "constraint-elimination-dump-reproducers", cl::init(false), cl::Hidden,
     cl::desc("Dump IR to reproduce successful transformations."));
@@ -274,7 +277,8 @@ class ConstraintInfo {
 
 public:
   ConstraintInfo(const DataLayout &DL, ArrayRef<Value *> FunctionArgs)
-      : UnsignedCS(FunctionArgs), SignedCS(FunctionArgs), DL(DL) {
+      : UnsignedCS(FunctionArgs, MaxColumns),
+        SignedCS(FunctionArgs, MaxColumns), DL(DL) {
     auto &Value2Index = getValue2Index(false);
     // Add Arg > -1 constraints to unsigned system for all function arguments.
     for (Value *Arg : FunctionArgs) {
@@ -894,7 +898,7 @@ void ConstraintInfo::transferToOtherSystem(
 
 static void dumpConstraint(ArrayRef<int64_t> C,
                            const DenseMap<Value *, unsigned> &Value2Index) {
-  ConstraintSystem CS(Value2Index);
+  ConstraintSystem CS(Value2Index, C.size());
   CS.addVariableRowFill(C);
   CS.dump();
 }

>From bab070cc4e350d07aed3225679e8ace30fe52aa7 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Tue, 16 Jul 2024 19:54:11 +0100
Subject: [PATCH 03/10] ConstraintElimination: use dry-run to upper-bound cols

---
 llvm/include/llvm/Analysis/ConstraintSystem.h |  11 +-
 .../Scalar/ConstraintElimination.cpp          | 131 ++++++++++++++++--
 2 files changed, 118 insertions(+), 24 deletions(-)

diff --git a/llvm/include/llvm/Analysis/ConstraintSystem.h b/llvm/include/llvm/Analysis/ConstraintSystem.h
index 6014b2cb64247..68df2b615f1f5 100644
--- a/llvm/include/llvm/Analysis/ConstraintSystem.h
+++ b/llvm/include/llvm/Analysis/ConstraintSystem.h
@@ -71,11 +71,7 @@ class ConstraintSystem {
   ConstraintSystem() : Constraints(6), View(Constraints) {}
 
   // This constructor is used by ConstraintElimination, inside ConstraintInfo.
-  // Unfortunately, due to calls to addFact, that adds local variables, it is
-  // impossible to know how many local variables there are in advance.
-  // ConstraintElimination has a fixed upper-bound on the number of columns,
-  // configurable as a cl::opt, so use that number, and don't add the constraint
-  // if it exceeds that number.
+  // ConstraintElimination upper-bounds the number of columns using a dry run.
   ConstraintSystem(ArrayRef<Value *> FunctionArgs, size_t NCols)
       : Constraints(NCols), View(Constraints) {
     NumVariables += FunctionArgs.size();
@@ -127,11 +123,6 @@ class ConstraintSystem {
     if (all_of(ArrayRef(R).drop_front(1), [](int64_t C) { return C == 0; }))
       return false;
 
-    // There is no correctness issue if we don't add a constraint, for whatever
-    // reason.
-    if (R.size() > Constraints.getNumCols())
-      return false;
-
     NumVariables = std::max(R.size(), NumVariables);
     return addVariableRow(R);
   }
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index ff13302abc553..14b0683517bf8 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -56,10 +56,6 @@ static cl::opt<unsigned>
     MaxRows("constraint-elimination-max-rows", cl::init(500), cl::Hidden,
             cl::desc("Maximum number of rows to keep in constraint system"));
 
-static cl::opt<unsigned> MaxColumns(
-    "constraint-elimination-max-cols", cl::init(16), cl::Hidden,
-    cl::desc("Maximum number of columns to keep in constraint system"));
-
 static cl::opt<bool> DumpReproducers(
     "constraint-elimination-dump-reproducers", cl::init(false), cl::Hidden,
     cl::desc("Dump IR to reproduce successful transformations."));
@@ -276,7 +272,8 @@ class ConstraintInfo {
   const DataLayout &DL;
 
 public:
-  ConstraintInfo(const DataLayout &DL, ArrayRef<Value *> FunctionArgs)
+  ConstraintInfo(const DataLayout &DL, ArrayRef<Value *> FunctionArgs,
+                 unsigned MaxColumns)
       : UnsignedCS(FunctionArgs, MaxColumns),
         SignedCS(FunctionArgs, MaxColumns), DL(DL) {
     auto &Value2Index = getValue2Index(false);
@@ -1675,21 +1672,68 @@ tryToSimplifyOverflowMath(IntrinsicInst *II, ConstraintInfo &Info,
   return Changed;
 }
 
-static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
-                                 ScalarEvolution &SE,
-                                 OptimizationRemarkEmitter &ORE) {
-  bool Changed = false;
+/// Calls Info.getConstraint, discarding the result, and only collecting the
+/// number of new variables introduced by the constraint.
+static unsigned getNumNewVars(CmpInst::Predicate Pred, Value *A, Value *B,
+                              const ConstraintInfo &Info) {
+  auto GetIncrVal = [&Info](CmpInst::Predicate Pred, Value *A, Value *B) {
+    SmallVector<Value *> NewVars;
+    Info.getConstraint(Pred, A, B, NewVars);
+    return NewVars.size();
+  };
+
+  unsigned NumNewVars = GetIncrVal(Pred, A, B);
+
+  // What follows is an inlined dry-run of transferToOtherSystem, which
+  // upper-bounds NumNewVars conservatively.
+  if (!A->getType()->isIntegerTy())
+    return NumNewVars;
+
+  switch (Pred) {
+  default:
+    break;
+  case CmpInst::ICMP_ULT:
+  case CmpInst::ICMP_ULE:
+    NumNewVars +=
+        GetIncrVal(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0));
+    NumNewVars += GetIncrVal(CmpInst::getSignedPredicate(Pred), A, B);
+    break;
+  case CmpInst::ICMP_UGE:
+  case CmpInst::ICMP_UGT:
+    //  If A is a signed positive constant, then B >=s 0 and A >s (or >=s)
+    //  B.
+    NumNewVars +=
+        GetIncrVal(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0));
+    NumNewVars += GetIncrVal(CmpInst::getSignedPredicate(Pred), A, B);
+    break;
+  case CmpInst::ICMP_SLT:
+    NumNewVars += GetIncrVal(CmpInst::ICMP_ULT, A, B);
+    break;
+  case CmpInst::ICMP_SGT: {
+    NumNewVars +=
+        GetIncrVal(CmpInst::ICMP_UGE, A, ConstantInt::get(B->getType(), 0));
+    NumNewVars += GetIncrVal(CmpInst::ICMP_UGT, A, B);
+    break;
+  }
+  case CmpInst::ICMP_SGE:
+    NumNewVars += GetIncrVal(CmpInst::ICMP_UGE, A, B);
+    break;
+  }
+  return NumNewVars;
+}
+
+/// Performs a dry run of the transform, computing a conservative estimate of
+/// the total number of columns we need in the underlying storage.
+static std::pair<State, unsigned> dryRun(Function &F, DominatorTree &DT,
+                                         LoopInfo &LI, ScalarEvolution &SE) {
   DT.updateDFSNumbers();
   SmallVector<Value *> FunctionArgs;
   for (Value &Arg : F.args())
     FunctionArgs.push_back(&Arg);
-  ConstraintInfo Info(F.getDataLayout(), FunctionArgs);
   State S(DT, LI, SE);
-  std::unique_ptr<Module> ReproducerModule(
-      DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr);
+  unsigned MaxColumns = FunctionArgs.size() + 1;
+  ConstraintInfo Info(F.getDataLayout(), FunctionArgs, MaxColumns);
 
-  // First, collect conditions implied by branches and blocks with their
-  // Dominator DFS in and out numbers.
   for (BasicBlock &BB : F) {
     if (!DT.getNode(&BB))
       continue;
@@ -1729,6 +1773,65 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
     return A.NumIn < B.NumIn;
   });
 
+  for (const FactOrCheck &CB : S.WorkList) {
+    ICmpInst::Predicate Pred;
+    Value *A, *B;
+    if (CB.isCheck()) {
+      if (auto *Cmp = dyn_cast<ICmpInst>(CB.getInstructionToSimplify())) {
+        if (!checkCondition(Cmp->getPredicate(), Cmp->getOperand(0),
+                            Cmp->getOperand(1), Cmp, Info) &&
+            match(CB.getContextInst(), m_LogicalOp(m_Value(), m_Value()))) {
+          match(Cmp, m_ICmp(Pred, m_Value(A), m_Value(B)));
+          MaxColumns += getNumNewVars(Pred, A, B, Info);
+        }
+      }
+      continue;
+    }
+    if (!CB.isConditionFact()) {
+      Value *X;
+      if (match(CB.Inst, m_Intrinsic<Intrinsic::abs>(m_Value(X)))) {
+        if (cast<ConstantInt>(CB.Inst->getOperand(1))->isOne())
+          MaxColumns +=
+              getNumNewVars(CmpInst::ICMP_SGE, CB.Inst,
+                            ConstantInt::get(CB.Inst->getType(), 0), Info);
+        MaxColumns += getNumNewVars(CmpInst::ICMP_SGE, CB.Inst, X, Info);
+        continue;
+      }
+
+      if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(CB.Inst)) {
+        Pred = ICmpInst::getNonStrictPredicate(MinMax->getPredicate());
+        MaxColumns += getNumNewVars(Pred, MinMax, MinMax->getLHS(), Info);
+        MaxColumns += getNumNewVars(Pred, MinMax, MinMax->getRHS(), Info);
+        continue;
+      }
+    }
+
+    if (CB.isConditionFact()) {
+      Pred = CB.Cond.Pred;
+      A = CB.Cond.Op0;
+      B = CB.Cond.Op1;
+    } else {
+      match(CB.Inst, m_Intrinsic<Intrinsic::assume>(
+                         m_ICmp(Pred, m_Value(A), m_Value(B))));
+    }
+    MaxColumns += getNumNewVars(Pred, A, B, Info);
+  }
+  return {S, MaxColumns};
+}
+
+static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
+                                 ScalarEvolution &SE,
+                                 OptimizationRemarkEmitter &ORE) {
+  bool Changed = false;
+  DT.updateDFSNumbers();
+  SmallVector<Value *> FunctionArgs;
+  for (Value &Arg : F.args())
+    FunctionArgs.push_back(&Arg);
+  auto [S, MaxColumns] = dryRun(F, DT, LI, SE);
+  ConstraintInfo Info(F.getDataLayout(), FunctionArgs, MaxColumns);
+  std::unique_ptr<Module> ReproducerModule(
+      DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr);
+
   SmallVector<Instruction *> ToRemove;
 
   // Finally, process ordered worklist and eliminate implied conditions.

>From 96835765fd5e9c485364d015a7a567bddfd7fad5 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 17 Jul 2024 01:01:39 +0100
Subject: [PATCH 04/10] ConstraintElimination: fix thinko in estimation

---
 .../Scalar/ConstraintElimination.cpp          | 52 +++++++++++++------
 1 file changed, 37 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 14b0683517bf8..b401ca009bfcb 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -56,6 +56,10 @@ static cl::opt<unsigned>
     MaxRows("constraint-elimination-max-rows", cl::init(500), cl::Hidden,
             cl::desc("Maximum number of rows to keep in constraint system"));
 
+static cl::opt<unsigned> MaxColumns(
+    "constraint-elimination-max-cols", cl::init(64), cl::Hidden,
+    cl::desc("Maximum number of columns to keep in constraint system"));
+
 static cl::opt<bool> DumpReproducers(
     "constraint-elimination-dump-reproducers", cl::init(false), cl::Hidden,
     cl::desc("Dump IR to reproduce successful transformations."));
@@ -1675,10 +1679,18 @@ tryToSimplifyOverflowMath(IntrinsicInst *II, ConstraintInfo &Info,
 /// Calls Info.getConstraint, discarding the result, and only collecting the
 /// number of new variables introduced by the constraint.
 static unsigned getNumNewVars(CmpInst::Predicate Pred, Value *A, Value *B,
-                              const ConstraintInfo &Info) {
-  auto GetIncrVal = [&Info](CmpInst::Predicate Pred, Value *A, Value *B) {
+                              const ConstraintInfo &Info,
+                              SmallPtrSet<Value *, 16> &SeenVars) {
+  auto GetIncrVal = [&Info, &SeenVars](CmpInst::Predicate Pred, Value *A,
+                                       Value *B) {
     SmallVector<Value *> NewVars;
     Info.getConstraint(Pred, A, B, NewVars);
+    NewVars.erase(std::remove_if(NewVars.begin(), NewVars.end(),
+                                 [&SeenVars](Value *NewVar) {
+                                   return SeenVars.contains(NewVar);
+                                 }),
+                  NewVars.end());
+    SeenVars.insert(NewVars.begin(), NewVars.end());
     return NewVars.size();
   };
 
@@ -1731,8 +1743,9 @@ static std::pair<State, unsigned> dryRun(Function &F, DominatorTree &DT,
   for (Value &Arg : F.args())
     FunctionArgs.push_back(&Arg);
   State S(DT, LI, SE);
-  unsigned MaxColumns = FunctionArgs.size() + 1;
-  ConstraintInfo Info(F.getDataLayout(), FunctionArgs, MaxColumns);
+  unsigned EstimatedColumns = FunctionArgs.size() + 1;
+  ConstraintInfo Info(F.getDataLayout(), FunctionArgs, EstimatedColumns);
+  SmallPtrSet<Value *, 16> SeenVars;
 
   for (BasicBlock &BB : F) {
     if (!DT.getNode(&BB))
@@ -1782,7 +1795,7 @@ static std::pair<State, unsigned> dryRun(Function &F, DominatorTree &DT,
                             Cmp->getOperand(1), Cmp, Info) &&
             match(CB.getContextInst(), m_LogicalOp(m_Value(), m_Value()))) {
           match(Cmp, m_ICmp(Pred, m_Value(A), m_Value(B)));
-          MaxColumns += getNumNewVars(Pred, A, B, Info);
+          EstimatedColumns += getNumNewVars(Pred, A, B, Info, SeenVars);
         }
       }
       continue;
@@ -1791,17 +1804,20 @@ static std::pair<State, unsigned> dryRun(Function &F, DominatorTree &DT,
       Value *X;
       if (match(CB.Inst, m_Intrinsic<Intrinsic::abs>(m_Value(X)))) {
         if (cast<ConstantInt>(CB.Inst->getOperand(1))->isOne())
-          MaxColumns +=
-              getNumNewVars(CmpInst::ICMP_SGE, CB.Inst,
-                            ConstantInt::get(CB.Inst->getType(), 0), Info);
-        MaxColumns += getNumNewVars(CmpInst::ICMP_SGE, CB.Inst, X, Info);
+          EstimatedColumns += getNumNewVars(
+              CmpInst::ICMP_SGE, CB.Inst,
+              ConstantInt::get(CB.Inst->getType(), 0), Info, SeenVars);
+        EstimatedColumns +=
+            getNumNewVars(CmpInst::ICMP_SGE, CB.Inst, X, Info, SeenVars);
         continue;
       }
 
       if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(CB.Inst)) {
         Pred = ICmpInst::getNonStrictPredicate(MinMax->getPredicate());
-        MaxColumns += getNumNewVars(Pred, MinMax, MinMax->getLHS(), Info);
-        MaxColumns += getNumNewVars(Pred, MinMax, MinMax->getRHS(), Info);
+        EstimatedColumns +=
+            getNumNewVars(Pred, MinMax, MinMax->getLHS(), Info, SeenVars);
+        EstimatedColumns +=
+            getNumNewVars(Pred, MinMax, MinMax->getRHS(), Info, SeenVars);
         continue;
       }
     }
@@ -1814,9 +1830,9 @@ static std::pair<State, unsigned> dryRun(Function &F, DominatorTree &DT,
       match(CB.Inst, m_Intrinsic<Intrinsic::assume>(
                          m_ICmp(Pred, m_Value(A), m_Value(B))));
     }
-    MaxColumns += getNumNewVars(Pred, A, B, Info);
+    EstimatedColumns += getNumNewVars(Pred, A, B, Info, SeenVars);
   }
-  return {S, MaxColumns};
+  return {S, EstimatedColumns};
 }
 
 static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
@@ -1827,8 +1843,14 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
   SmallVector<Value *> FunctionArgs;
   for (Value &Arg : F.args())
     FunctionArgs.push_back(&Arg);
-  auto [S, MaxColumns] = dryRun(F, DT, LI, SE);
-  ConstraintInfo Info(F.getDataLayout(), FunctionArgs, MaxColumns);
+  auto [S, EstimatedColumns] = dryRun(F, DT, LI, SE);
+
+  // If the estimate of the number of columns exceeds MaxRows / 8, it is likely
+  // that the constraint system will be too large to solve.
+  if (EstimatedColumns > MaxColumns)
+    return false;
+
+  ConstraintInfo Info(F.getDataLayout(), FunctionArgs, EstimatedColumns);
   std::unique_ptr<Module> ReproducerModule(
       DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr);
 

>From 9163f74afe46f9744dfd22d483e709272f3b7e77 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 17 Jul 2024 17:08:33 +0100
Subject: [PATCH 05/10] ConstraintElimination: a more thorough dry-run

---
 llvm/include/llvm/Analysis/ConstraintSystem.h |   6 +
 .../Scalar/ConstraintElimination.cpp          | 109 +++++++++---------
 2 files changed, 60 insertions(+), 55 deletions(-)

diff --git a/llvm/include/llvm/Analysis/ConstraintSystem.h b/llvm/include/llvm/Analysis/ConstraintSystem.h
index 68df2b615f1f5..5de60b9bc80d8 100644
--- a/llvm/include/llvm/Analysis/ConstraintSystem.h
+++ b/llvm/include/llvm/Analysis/ConstraintSystem.h
@@ -123,6 +123,12 @@ class ConstraintSystem {
     if (all_of(ArrayRef(R).drop_front(1), [](int64_t C) { return C == 0; }))
       return false;
 
+    unsigned NewRowSize = count_if(R, [](int64_t C) { return C != 0; });
+
+    // There is no correctness issue if we don't add a constraint.
+    if (NewRowSize > Constraints.getNumCols())
+      return false;
+
     NumVariables = std::max(R.size(), NumVariables);
     return addVariableRow(R);
   }
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index b401ca009bfcb..6120926a509cd 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -1676,62 +1676,59 @@ tryToSimplifyOverflowMath(IntrinsicInst *II, ConstraintInfo &Info,
   return Changed;
 }
 
-/// Calls Info.getConstraint, discarding the result, and only collecting the
-/// number of new variables introduced by the constraint.
-static unsigned getNumNewVars(CmpInst::Predicate Pred, Value *A, Value *B,
-                              const ConstraintInfo &Info,
-                              SmallPtrSet<Value *, 16> &SeenVars) {
-  auto GetIncrVal = [&Info, &SeenVars](CmpInst::Predicate Pred, Value *A,
-                                       Value *B) {
+/// Performs a dry run of AddFact, computing a conservative estimate of the
+/// number of new variables introduced.
+static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B,
+                          const ConstraintInfo &Info,
+                          unsigned &EstimatedColumns) {
+  auto UpdateEstimate = [&Info, &EstimatedColumns](CmpInst::Predicate Pred,
+                                                   Value *A, Value *B) {
     SmallVector<Value *> NewVars;
-    Info.getConstraint(Pred, A, B, NewVars);
-    NewVars.erase(std::remove_if(NewVars.begin(), NewVars.end(),
-                                 [&SeenVars](Value *NewVar) {
-                                   return SeenVars.contains(NewVar);
-                                 }),
-                  NewVars.end());
-    SeenVars.insert(NewVars.begin(), NewVars.end());
-    return NewVars.size();
+    auto R = Info.getConstraint(Pred, A, B, NewVars);
+
+    // We offset it by 1 due to logic in addFact.
+    unsigned NewEstimate = R.size() + 1;
+
+    // Adjust for any ExtraInfo.
+    for (auto Row : R.ExtraInfo)
+      NewEstimate = std::max(NewEstimate, static_cast<unsigned>(Row.size()));
+
+    EstimatedColumns = std::max(EstimatedColumns, NewEstimate);
   };
 
-  unsigned NumNewVars = GetIncrVal(Pred, A, B);
+  UpdateEstimate(Pred, A, B);
 
   // What follows is an inlined dry-run of transferToOtherSystem, which
   // upper-bounds NumNewVars conservatively.
   if (!A->getType()->isIntegerTy())
-    return NumNewVars;
+    return;
 
   switch (Pred) {
   default:
     break;
   case CmpInst::ICMP_ULT:
   case CmpInst::ICMP_ULE:
-    NumNewVars +=
-        GetIncrVal(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0));
-    NumNewVars += GetIncrVal(CmpInst::getSignedPredicate(Pred), A, B);
+    UpdateEstimate(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0));
+    UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B);
     break;
   case CmpInst::ICMP_UGE:
   case CmpInst::ICMP_UGT:
     //  If A is a signed positive constant, then B >=s 0 and A >s (or >=s)
     //  B.
-    NumNewVars +=
-        GetIncrVal(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0));
-    NumNewVars += GetIncrVal(CmpInst::getSignedPredicate(Pred), A, B);
+    UpdateEstimate(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0));
+    UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B);
     break;
   case CmpInst::ICMP_SLT:
-    NumNewVars += GetIncrVal(CmpInst::ICMP_ULT, A, B);
+    UpdateEstimate(CmpInst::ICMP_ULT, A, B);
     break;
-  case CmpInst::ICMP_SGT: {
-    NumNewVars +=
-        GetIncrVal(CmpInst::ICMP_UGE, A, ConstantInt::get(B->getType(), 0));
-    NumNewVars += GetIncrVal(CmpInst::ICMP_UGT, A, B);
+  case CmpInst::ICMP_SGT:
+    UpdateEstimate(CmpInst::ICMP_UGE, A, ConstantInt::get(B->getType(), 0));
+    UpdateEstimate(CmpInst::ICMP_UGT, A, B);
     break;
-  }
   case CmpInst::ICMP_SGE:
-    NumNewVars += GetIncrVal(CmpInst::ICMP_UGE, A, B);
+    UpdateEstimate(CmpInst::ICMP_UGE, A, B);
     break;
   }
-  return NumNewVars;
 }
 
 /// Performs a dry run of the transform, computing a conservative estimate of
@@ -1745,7 +1742,6 @@ static std::pair<State, unsigned> dryRun(Function &F, DominatorTree &DT,
   State S(DT, LI, SE);
   unsigned EstimatedColumns = FunctionArgs.size() + 1;
   ConstraintInfo Info(F.getDataLayout(), FunctionArgs, EstimatedColumns);
-  SmallPtrSet<Value *, 16> SeenVars;
 
   for (BasicBlock &BB : F) {
     if (!DT.getNode(&BB))
@@ -1790,12 +1786,19 @@ static std::pair<State, unsigned> dryRun(Function &F, DominatorTree &DT,
     ICmpInst::Predicate Pred;
     Value *A, *B;
     if (CB.isCheck()) {
-      if (auto *Cmp = dyn_cast<ICmpInst>(CB.getInstructionToSimplify())) {
-        if (!checkCondition(Cmp->getPredicate(), Cmp->getOperand(0),
-                            Cmp->getOperand(1), Cmp, Info) &&
-            match(CB.getContextInst(), m_LogicalOp(m_Value(), m_Value()))) {
-          match(Cmp, m_ICmp(Pred, m_Value(A), m_Value(B)));
-          EstimatedColumns += getNumNewVars(Pred, A, B, Info, SeenVars);
+      // What follows is a dry-run of checkOrAndOpImpliedByOther, without
+      // assuming that instructions have been simplified, as they would have
+      // during the course of normal operation.
+      auto *ContextInst = CB.getContextInst();
+      if (auto *Cmp =
+              dyn_cast_or_null<ICmpInst>(CB.getInstructionToSimplify())) {
+        unsigned OtherOpIdx = ContextInst->getOperand(0) == Cmp ? 1 : 0;
+        if (match(ContextInst, m_LogicalOp()) &&
+            match(ContextInst->getOperand(OtherOpIdx),
+                  m_ICmp(Pred, m_Value(A), m_Value(B)))) {
+          if (match(ContextInst, m_LogicalOr()))
+            Pred = CmpInst::getInversePredicate(Pred);
+          dryRunAddFact(Pred, A, B, Info, EstimatedColumns);
         }
       }
       continue;
@@ -1804,20 +1807,17 @@ static std::pair<State, unsigned> dryRun(Function &F, DominatorTree &DT,
       Value *X;
       if (match(CB.Inst, m_Intrinsic<Intrinsic::abs>(m_Value(X)))) {
         if (cast<ConstantInt>(CB.Inst->getOperand(1))->isOne())
-          EstimatedColumns += getNumNewVars(
-              CmpInst::ICMP_SGE, CB.Inst,
-              ConstantInt::get(CB.Inst->getType(), 0), Info, SeenVars);
-        EstimatedColumns +=
-            getNumNewVars(CmpInst::ICMP_SGE, CB.Inst, X, Info, SeenVars);
+          dryRunAddFact(CmpInst::ICMP_SGE, CB.Inst,
+                        ConstantInt::get(CB.Inst->getType(), 0), Info,
+                        EstimatedColumns);
+        dryRunAddFact(CmpInst::ICMP_SGE, CB.Inst, X, Info, EstimatedColumns);
         continue;
       }
 
       if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(CB.Inst)) {
         Pred = ICmpInst::getNonStrictPredicate(MinMax->getPredicate());
-        EstimatedColumns +=
-            getNumNewVars(Pred, MinMax, MinMax->getLHS(), Info, SeenVars);
-        EstimatedColumns +=
-            getNumNewVars(Pred, MinMax, MinMax->getRHS(), Info, SeenVars);
+        dryRunAddFact(Pred, MinMax, MinMax->getLHS(), Info, EstimatedColumns);
+        dryRunAddFact(Pred, MinMax, MinMax->getRHS(), Info, EstimatedColumns);
         continue;
       }
     }
@@ -1827,10 +1827,12 @@ static std::pair<State, unsigned> dryRun(Function &F, DominatorTree &DT,
       A = CB.Cond.Op0;
       B = CB.Cond.Op1;
     } else {
-      match(CB.Inst, m_Intrinsic<Intrinsic::assume>(
-                         m_ICmp(Pred, m_Value(A), m_Value(B))));
+      bool Matched = match(CB.Inst, m_Intrinsic<Intrinsic::assume>(
+                                        m_ICmp(Pred, m_Value(A), m_Value(B))));
+      (void)Matched;
+      assert(Matched && "Must have an assume intrinsic with a icmp operand");
     }
-    EstimatedColumns += getNumNewVars(Pred, A, B, Info, SeenVars);
+    dryRunAddFact(Pred, A, B, Info, EstimatedColumns);
   }
   return {S, EstimatedColumns};
 }
@@ -1844,11 +1846,8 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
   for (Value &Arg : F.args())
     FunctionArgs.push_back(&Arg);
   auto [S, EstimatedColumns] = dryRun(F, DT, LI, SE);
-
-  // If the estimate of the number of columns exceeds MaxRows / 8, it is likely
-  // that the constraint system will be too large to solve.
-  if (EstimatedColumns > MaxColumns)
-    return false;
+  EstimatedColumns =
+      std::min(EstimatedColumns, static_cast<unsigned>(MaxColumns));
 
   ConstraintInfo Info(F.getDataLayout(), FunctionArgs, EstimatedColumns);
   std::unique_ptr<Module> ReproducerModule(

>From 966ccec46ae54eb78ff08b9051377f70ae3de15c Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 17 Jul 2024 17:26:10 +0100
Subject: [PATCH 06/10] ConstraintElimination: address review

---
 .../Transforms/Scalar/ConstraintElimination.cpp    | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 6120926a509cd..a468b52956521 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -1834,7 +1834,7 @@ static std::pair<State, unsigned> dryRun(Function &F, DominatorTree &DT,
     }
     dryRunAddFact(Pred, A, B, Info, EstimatedColumns);
   }
-  return {S, EstimatedColumns};
+  return {S, std::min(EstimatedColumns, static_cast<unsigned>(MaxColumns))};
 }
 
 static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
@@ -1845,9 +1845,7 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
   SmallVector<Value *> FunctionArgs;
   for (Value &Arg : F.args())
     FunctionArgs.push_back(&Arg);
-  auto [S, EstimatedColumns] = dryRun(F, DT, LI, SE);
-  EstimatedColumns =
-      std::min(EstimatedColumns, static_cast<unsigned>(MaxColumns));
+  const auto &[S, EstimatedColumns] = dryRun(F, DT, LI, SE);
 
   ConstraintInfo Info(F.getDataLayout(), FunctionArgs, EstimatedColumns);
   std::unique_ptr<Module> ReproducerModule(
@@ -1858,7 +1856,7 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
   // Finally, process ordered worklist and eliminate implied conditions.
   SmallVector<StackEntry, 16> DFSInStack;
   SmallVector<ReproducerEntry> ReproducerCondStack;
-  for (FactOrCheck &CB : S.WorkList) {
+  for (const FactOrCheck &CB : S.WorkList) {
     // First, pop entries from the stack that are out-of-scope for CB. Remove
     // the corresponding entry from the constraint system.
     while (!DFSInStack.empty()) {
@@ -1895,9 +1893,9 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
             ReproducerModule.get(), ReproducerCondStack, S.DT, ToRemove);
         if (!Simplified &&
             match(CB.getContextInst(), m_LogicalOp(m_Value(), m_Value()))) {
-          Simplified =
-              checkOrAndOpImpliedByOther(CB, Info, ReproducerModule.get(),
-                                         ReproducerCondStack, DFSInStack);
+          Simplified = checkOrAndOpImpliedByOther(
+              const_cast<FactOrCheck &>(CB), Info, ReproducerModule.get(),
+              ReproducerCondStack, DFSInStack);
         }
         Changed |= Simplified;
       } else if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(Inst)) {

>From 0ea02f38e1c9ed06cf2dbeea6e8d6a763f6fb5e4 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 17 Jul 2024 18:57:34 +0100
Subject: [PATCH 07/10] ConstraintElimination: improve upper-bound

---
 .../Scalar/ConstraintElimination.cpp          | 46 +++++++++++--------
 1 file changed, 28 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index a468b52956521..86bc71f3fcc0d 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -40,6 +40,7 @@
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 
+#include <cstdint>
 #include <optional>
 #include <string>
 
@@ -269,7 +270,6 @@ struct ConstraintTy {
 /// based on signed-ness, certain conditions can be transferred between the two
 /// systems.
 class ConstraintInfo {
-
   ConstraintSystem UnsignedCS;
   ConstraintSystem SignedCS;
 
@@ -308,6 +308,7 @@ class ConstraintInfo {
   void popLastNVariables(bool Signed, unsigned N) {
     getCS(Signed).popLastNVariables(N);
   }
+  const DataLayout &getDataLayout() const { return DL; }
 
   bool doesHold(CmpInst::Predicate Pred, Value *A, Value *B) const;
 
@@ -1687,19 +1688,22 @@ static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B,
     auto R = Info.getConstraint(Pred, A, B, NewVars);
 
     // We offset it by 1 due to logic in addFact.
-    unsigned NewEstimate = R.size() + 1;
-
-    // Adjust for any ExtraInfo.
-    for (auto Row : R.ExtraInfo)
-      NewEstimate = std::max(NewEstimate, static_cast<unsigned>(Row.size()));
+    unsigned NewEstimate =
+        count_if(R.Coefficients, [](int64_t C) { return C != 0; }) + 1;
 
     EstimatedColumns = std::max(EstimatedColumns, NewEstimate);
   };
 
   UpdateEstimate(Pred, A, B);
 
-  // What follows is an inlined dry-run of transferToOtherSystem, which
-  // upper-bounds NumNewVars conservatively.
+  // What follows is a dry-run of transferToOtherSystem.
+  auto IsKnownNonNegative = [&Info](Value *V) {
+    return Info.doesHold(CmpInst::ICMP_SGE, V,
+                         ConstantInt::get(V->getType(), 0)) ||
+           isKnownNonNegative(V, Info.getDataLayout(),
+                              /*Depth=*/MaxAnalysisRecursionDepth - 1);
+  };
+
   if (!A->getType()->isIntegerTy())
     return;
 
@@ -1708,25 +1712,31 @@ static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B,
     break;
   case CmpInst::ICMP_ULT:
   case CmpInst::ICMP_ULE:
-    UpdateEstimate(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0));
-    UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B);
+    if (IsKnownNonNegative(B)) {
+      UpdateEstimate(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0));
+      UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B);
+    }
     break;
   case CmpInst::ICMP_UGE:
   case CmpInst::ICMP_UGT:
-    //  If A is a signed positive constant, then B >=s 0 and A >s (or >=s)
-    //  B.
-    UpdateEstimate(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0));
-    UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B);
+    if (IsKnownNonNegative(A)) {
+      UpdateEstimate(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0));
+      UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B);
+    }
     break;
   case CmpInst::ICMP_SLT:
-    UpdateEstimate(CmpInst::ICMP_ULT, A, B);
+    if (IsKnownNonNegative(A))
+      UpdateEstimate(CmpInst::ICMP_ULT, A, B);
     break;
   case CmpInst::ICMP_SGT:
-    UpdateEstimate(CmpInst::ICMP_UGE, A, ConstantInt::get(B->getType(), 0));
-    UpdateEstimate(CmpInst::ICMP_UGT, A, B);
+    if (Info.doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), -1)))
+      UpdateEstimate(CmpInst::ICMP_UGE, A, ConstantInt::get(B->getType(), 0));
+    if (IsKnownNonNegative(B))
+      UpdateEstimate(CmpInst::ICMP_UGT, A, B);
     break;
   case CmpInst::ICMP_SGE:
-    UpdateEstimate(CmpInst::ICMP_UGE, A, B);
+    if (IsKnownNonNegative(B))
+      UpdateEstimate(CmpInst::ICMP_UGE, A, B);
     break;
   }
 }

>From ce1c2634f29741e002eae5a594fce03adbead474 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 17 Jul 2024 19:40:16 +0100
Subject: [PATCH 08/10] ConstraintElimination: don't const_cast CB

---
 llvm/lib/Transforms/Scalar/ConstraintElimination.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 86bc71f3fcc0d..40719dad6cd97 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -1497,7 +1497,7 @@ removeEntryFromStack(const StackEntry &E, ConstraintInfo &Info,
 /// Check if either the first condition of an AND or OR is implied by the
 /// (negated in case of OR) second condition or vice versa.
 static bool checkOrAndOpImpliedByOther(
-    FactOrCheck &CB, ConstraintInfo &Info, Module *ReproducerModule,
+    const FactOrCheck &CB, ConstraintInfo &Info, Module *ReproducerModule,
     SmallVectorImpl<ReproducerEntry> &ReproducerCondStack,
     SmallVectorImpl<StackEntry> &DFSInStack) {
 
@@ -1903,9 +1903,9 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
             ReproducerModule.get(), ReproducerCondStack, S.DT, ToRemove);
         if (!Simplified &&
             match(CB.getContextInst(), m_LogicalOp(m_Value(), m_Value()))) {
-          Simplified = checkOrAndOpImpliedByOther(
-              const_cast<FactOrCheck &>(CB), Info, ReproducerModule.get(),
-              ReproducerCondStack, DFSInStack);
+          Simplified =
+              checkOrAndOpImpliedByOther(CB, Info, ReproducerModule.get(),
+                                         ReproducerCondStack, DFSInStack);
         }
         Changed |= Simplified;
       } else if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(Inst)) {

>From d326af84207cc5fd402dbae685c4e19d90df2fda Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 17 Jul 2024 20:27:54 +0100
Subject: [PATCH 09/10] ConstraintSystem: fix nit

---
 llvm/include/llvm/Analysis/ConstraintSystem.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/Analysis/ConstraintSystem.h b/llvm/include/llvm/Analysis/ConstraintSystem.h
index 5de60b9bc80d8..2133b9e6b67b1 100644
--- a/llvm/include/llvm/Analysis/ConstraintSystem.h
+++ b/llvm/include/llvm/Analysis/ConstraintSystem.h
@@ -105,6 +105,11 @@ class ConstraintSystem {
         continue;
       NewRow.emplace_back(C, Idx);
     }
+
+    // There is no correctness issue if we don't add a constraint.
+    if (NewRow.size() > Constraints.getNumCols())
+      return false;
+
     if (View.empty())
       NumVariables = R.size();
 
@@ -123,12 +128,6 @@ class ConstraintSystem {
     if (all_of(ArrayRef(R).drop_front(1), [](int64_t C) { return C == 0; }))
       return false;
 
-    unsigned NewRowSize = count_if(R, [](int64_t C) { return C != 0; });
-
-    // There is no correctness issue if we don't add a constraint.
-    if (NewRowSize > Constraints.getNumCols())
-      return false;
-
     NumVariables = std::max(R.size(), NumVariables);
     return addVariableRow(R);
   }

>From 1af2cbd27957a8a2a10469290e21a90193a8c5c2 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 17 Jul 2024 21:57:00 +0100
Subject: [PATCH 10/10] ConstraintElimination: cut-off max-cols

---
 llvm/lib/Transforms/Scalar/ConstraintElimination.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 40719dad6cd97..a8234a9160691 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -1844,7 +1844,7 @@ static std::pair<State, unsigned> dryRun(Function &F, DominatorTree &DT,
     }
     dryRunAddFact(Pred, A, B, Info, EstimatedColumns);
   }
-  return {S, std::min(EstimatedColumns, static_cast<unsigned>(MaxColumns))};
+  return {S, EstimatedColumns};
 }
 
 static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
@@ -1856,6 +1856,8 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
   for (Value &Arg : F.args())
     FunctionArgs.push_back(&Arg);
   const auto &[S, EstimatedColumns] = dryRun(F, DT, LI, SE);
+  if (EstimatedColumns > MaxColumns)
+    return false;
 
   ConstraintInfo Info(F.getDataLayout(), FunctionArgs, EstimatedColumns);
   std::unique_ptr<Module> ReproducerModule(



More information about the llvm-commits mailing list