[llvm] 36a3f8f - [TTI][TLI][AArch64] Support scalable immediates with isLegalAddImmediate (#84173)

Wed Mar 20 03:28:49 PDT 2024

Author: Graham Hunter
Date: 2024-03-20T10:28:46Z
New Revision: 36a3f8f6471ba04b7ebf32a3393708b8fb32aecf

URL: https://github.com/llvm/llvm-project/commit/36a3f8f6471ba04b7ebf32a3393708b8fb32aecf
DIFF: https://github.com/llvm/llvm-project/commit/36a3f8f6471ba04b7ebf32a3393708b8fb32aecf.diff

LOG: [TTI][TLI][AArch64] Support scalable immediates with isLegalAddImmediate (#84173)

Adds a second parameter (default to 0) to isLegalAddImmediate, to
represent a scalable immediate.

Extends the AArch64 implementation to match immediates based on what addvl and inc[h|w|d] support.

Added: 
    llvm/unittests/Target/AArch64/Immediates.cpp

Modified: 
    llvm/include/llvm/Analysis/TargetTransformInfo.h
    llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
    llvm/include/llvm/CodeGen/BasicTTIImpl.h
    llvm/include/llvm/CodeGen/TargetLowering.h
    llvm/lib/Analysis/TargetTransformInfo.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.h
    llvm/unittests/Target/AArch64/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 3a57b7bdb2d43b..bad0a77b0f2da2 100644

--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -696,6 +696,12 @@ class TargetTransformInfo {
   /// immediate without having to materialize the immediate into a register.
   bool isLegalAddImmediate(int64_t Imm) const;
 
+  /// Return true if adding the specified scalable immediate is legal, that is
+  /// the target has add instructions which can add a register with the
+  /// immediate (multiplied by vscale) without having to materialize the
+  /// immediate into a register.
+  bool isLegalAddScalableImmediate(int64_t Imm) const;
+
   /// Return true if the specified immediate is legal icmp immediate,
   /// that is the target has icmp instructions which can compare a register
   /// against the immediate without having to materialize the immediate into a
@@ -1842,6 +1848,7 @@ class TargetTransformInfo::Concept {
       std::function<void(Instruction *, unsigned, APInt, APInt &)>
           SimplifyAndSetOp) = 0;
   virtual bool isLegalAddImmediate(int64_t Imm) = 0;
+  virtual bool isLegalAddScalableImmediate(int64_t Imm) = 0;
   virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
   virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
                                      int64_t BaseOffset, bool HasBaseReg,
@@ -2303,6 +2310,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
   bool isLegalAddImmediate(int64_t Imm) override {
     return Impl.isLegalAddImmediate(Imm);
   }
+  bool isLegalAddScalableImmediate(int64_t Imm) override {
+    return Impl.isLegalAddScalableImmediate(Imm);
+  }
   bool isLegalICmpImmediate(int64_t Imm) override {
     return Impl.isLegalICmpImmediate(Imm);
   }

diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 07eeceeeaa22a8..7c47d3c2338a87 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -216,6 +216,8 @@ class TargetTransformInfoImplBase {
 
   bool isLegalAddImmediate(int64_t Imm) const { return false; }
 
+  bool isLegalAddScalableImmediate(int64_t Imm) const { return false; }
+
   bool isLegalICmpImmediate(int64_t Imm) const { return false; }
 
   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,

diff  --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 721900038ddd57..92fa726c31df14 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -328,6 +328,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     return getTLI()->isLegalAddImmediate(imm);
   }
 
+  bool isLegalAddScalableImmediate(int64_t Imm) {
+    return getTLI()->isLegalAddScalableImmediate(Imm);
+  }
+
   bool isLegalICmpImmediate(int64_t imm) {
     return getTLI()->isLegalICmpImmediate(imm);
   }

diff  --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 4753d8e8a51257..59fad88f91b1d1 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2772,6 +2772,12 @@ class TargetLoweringBase {
     return true;
   }
 
+  /// Return true if adding the specified scalable immediate is legal, that is
+  /// the target has add instructions which can add a register with the
+  /// immediate (multiplied by vscale) without having to materialize the
+  /// immediate into a register.
+  virtual bool isLegalAddScalableImmediate(int64_t) const { return false; }
+
   /// Return true if the specified immediate is legal for the value input of a
   /// store instruction.
   virtual bool isLegalStoreImmediate(int64_t Value) const {

diff  --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 414da85209c15f..5f933b4587843c 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -396,6 +396,10 @@ bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
   return TTIImpl->isLegalAddImmediate(Imm);
 }
 
+bool TargetTransformInfo::isLegalAddScalableImmediate(int64_t Imm) const {
+  return TTIImpl->isLegalAddScalableImmediate(Imm);
+}
+
 bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const {
   return TTIImpl->isLegalICmpImmediate(Imm);
 }

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 512e35fad8fead..7fab274ab957c8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16616,6 +16616,38 @@ bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
   return IsLegal;
 }
 
+bool AArch64TargetLowering::isLegalAddScalableImmediate(int64_t Imm) const {
+  // We will only emit addvl/inc* instructions for SVE2
+  if (!Subtarget->hasSVE2())
+    return false;
+
+  // addvl's immediates are in terms of the number of bytes in a register.
+  // Since there are 16 in the base supported size (128bits), we need to
+  // divide the immediate by that much to give us a useful immediate to
+  // multiply by vscale. We can't have a remainder as a result of this.
+  if (Imm % 16 == 0)
+    return isInt<6>(Imm / 16);
+
+  // Inc[b|h|w|d] instructions take a pattern and a positive immediate
+  // multiplier. For now, assume a pattern of 'all'. Incb would be a subset
+  // of addvl as a result, so only take h|w|d into account.
+  // Dec[h|w|d] will cover subtractions.
+  // Immediates are in the range [1,16], so we can't do a 2's complement check.
+  // FIXME: Can we make use of other patterns to cover other immediates?
+
+  // inch|dech
+  if (Imm % 8 == 0)
+    return std::labs(Imm / 8) <= 16;
+  // incw|decw
+  if (Imm % 4 == 0)
+    return std::labs(Imm / 4) <= 16;
+  // incd|decd
+  if (Imm % 2 == 0)
+    return std::labs(Imm / 2) <= 16;
+
+  return false;
+}
+
 // Return false to prevent folding
 // (mul (add x, c1), c2) -> (add (mul x, c2), c2*c1) in DAGCombine,
 // if the folding leads to worse code.

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 89016cbf56e39e..3465f3be887543 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -689,6 +689,7 @@ class AArch64TargetLowering : public TargetLowering {
                                        StoreInst *SI) const override;
 
   bool isLegalAddImmediate(int64_t) const override;
+  bool isLegalAddScalableImmediate(int64_t) const override;
   bool isLegalICmpImmediate(int64_t) const override;
 
   bool isMulAddWithConstProfitable(SDValue AddNode,

diff  --git a/llvm/unittests/Target/AArch64/CMakeLists.txt b/llvm/unittests/Target/AArch64/CMakeLists.txt
index dacd919ba1e33b..64ab991ac479a4 100644
--- a/llvm/unittests/Target/AArch64/CMakeLists.txt
+++ b/llvm/unittests/Target/AArch64/CMakeLists.txt
@@ -29,6 +29,7 @@ add_llvm_target_unittest(AArch64Tests
   MatrixRegisterAliasing.cpp
   SMEAttributesTest.cpp
   AArch64SVESchedPseudoTest.cpp
+  Immediates.cpp
   )
 
 set_property(TARGET AArch64Tests PROPERTY FOLDER "Tests/UnitTests/TargetTests")

diff  --git a/llvm/unittests/Target/AArch64/Immediates.cpp b/llvm/unittests/Target/AArch64/Immediates.cpp
new file mode 100644
index 00000000000000..a4551b083b4407
--- /dev/null
+++ b/llvm/unittests/Target/AArch64/Immediates.cpp
@@ -0,0 +1,106 @@
+#include "AArch64Subtarget.h"
+#include "AArch64TargetMachine.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+
+#include "gtest/gtest.h"
+#include <initializer_list>
+#include <memory>
+
+using namespace llvm;
+
+namespace {
+
+struct TestCase {
+  int64_t Imm;
+  bool Result;
+};
+
+const std::initializer_list<TestCase> Tests = {
+    // ScalableImm, Result
+    // No change, easily 'supported'
+    {0, true},
+
+    // addvl increments by whole registers, range [-32,31]
+    // +(16 * vscale), one register's worth
+    {16, true},
+    // -(32 * 16 * vscale)
+    {-512, true},
+    // -(33 * 16 * vscale)
+    {-528, false},
+    // +(31 * 16 * vscale)
+    {496, true},
+    // +(32 * 16 * vscale)
+    {512, false},
+
+    // inc[h|w|d] increments by the number of 16/32/64bit elements in a
+    // register. mult_imm is in the range [1,16]
+    // +(mult_imm * num_elts * vscale)
+    // +(1 * 8 * vscale), 16 bit
+    {8, true},
+    // +(15 * 8 * vscale), 16 bit
+    {120, true},
+    // +(1 * 4 * vscale), 32 bit
+    {4, true},
+    // +(7 * 4 * vscale), 32 bit
+    {28, true},
+    // +(1 * 2 * vscale), 64 bit
+    {2, true},
+    // +(13 * 2 * vscale), 64 bit
+    {26, true},
+    // +(17 * 8 * vscale), 16 bit, out of range.
+    {136, false},
+    // +(19 * 2 * vscale), 64 bit, out of range.
+    {38, false},
+    // +(21 * 4 * vscale), 32 bit, out of range.
+    {84, false},
+
+    // dec[h|w|d] -- Same as above, but negative.
+    // -(mult_imm * num_elts * vscale)
+    // -(1 * 8 * vscale), 16 bit
+    {-8, true},
+    // -(15 * 8 * vscale), 16 bit
+    {-120, true},
+    // -(1 * 4 * vscale), 32 bit
+    {-4, true},
+    // -(7 * 4 * vscale), 32 bit
+    {-28, true},
+    // -(1 * 2 * vscale), 64 bit
+    {-2, true},
+    // -(13 * 2 * vscale), 64 bit
+    {-26, true},
+    // -(17 * 8 * vscale), 16 bit, out of range.
+    {-136, false},
+    // -(19 * 2 * vscale), 64 bit, out of range.
+    {-38, false},
+    // -(21 * 4 * vscale), 32 bit, out of range.
+    {-84, false},
+
+    // Invalid; not divisible by the above powers of 2.
+    {5, false},
+};
+} // namespace
+
+TEST(Immediates, Immediates) {
+  LLVMInitializeAArch64TargetInfo();
+  LLVMInitializeAArch64Target();
+  LLVMInitializeAArch64TargetMC();
+
+  std::string Error;
+  auto TT = Triple::normalize("aarch64");
+  const Target *T = TargetRegistry::lookupTarget(TT, Error);
+
+  std::unique_ptr<TargetMachine> TM(T->createTargetMachine(
+      TT, "generic", "+sve2", TargetOptions(), std::nullopt, std::nullopt,
+      CodeGenOptLevel::Default));
+  AArch64Subtarget ST(TM->getTargetTriple(), TM->getTargetCPU(),
+                      TM->getTargetCPU(), TM->getTargetFeatureString(), *TM,
+                      true);
+
+  auto *TLI = ST.getTargetLowering();
+
+  for (const auto &Test : Tests) {
+    ASSERT_EQ(TLI->isLegalAddScalableImmediate(Test.Imm), Test.Result);
+  }
+}