[llvm-branch-commits] [llvm] c915974 - [LV] Reland "Update logic for calculating register usage due to invariants"

Tom Stellard via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Mar 3 23:56:52 PST 2023


Author: sgokhale
Date: 2023-03-03T23:56:29-08:00
New Revision: c915974bd3b209ca11ce94856088d72517d3866f

URL: https://github.com/llvm/llvm-project/commit/c915974bd3b209ca11ce94856088d72517d3866f
DIFF: https://github.com/llvm/llvm-project/commit/c915974bd3b209ca11ce94856088d72517d3866f.diff

LOG: [LV] Reland "Update logic for calculating register usage due to invariants"

Previously, while calculating register usage due to invariants, it was assumed that invariant would always be part of widening
instructions. This resulted in calculating vector register types for vectors which cant be legalized(check the newly added test for more details).

An invariant might not always need a vector register. For e.g., invariant might just be used for iteration check.

This patch checks if the invariant is part of any widening instruction and considers register usage accordingly. Fixes issue 60493

Differential Revision: https://reviews.llvm.org/D143422

(cherry picked from commit 4f9a5447c633083e99fd5cb448116fa8e5b1f976)

Added: 
    llvm/test/Transforms/LoopVectorize/AArch64/reg-usage.ll

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll
    llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
    llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
    llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index a28099d8ba7d5..148f2c545b411 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5953,7 +5953,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
   // Saves the list of values that are used in the loop but are defined outside
   // the loop (not including non-instruction values such as arguments and
   // constants).
-  SmallPtrSet<Value *, 8> LoopInvariants;
+  SmallPtrSet<Instruction *, 8> LoopInvariants;
 
   for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) {
     for (Instruction &I : BB->instructionsWithoutDebug()) {
@@ -6079,11 +6079,16 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
     for (auto *Inst : LoopInvariants) {
       // FIXME: The target might use more than one register for the type
       // even in the scalar case.
-      unsigned Usage =
-          VFs[i].isScalar() ? 1 : GetRegUsage(Inst->getType(), VFs[i]);
+      bool IsScalar = all_of(Inst->users(), [&](User *U) {
+        auto *I = cast<Instruction>(U);
+        return TheLoop != LI->getLoopFor(I->getParent()) ||
+               isScalarAfterVectorization(I, VFs[i]);
+      });
+
+      ElementCount VF = IsScalar ? ElementCount::getFixed(1) : VFs[i];
       unsigned ClassID =
-          TTI.getRegisterClassForType(VFs[i].isVector(), Inst->getType());
-      Invariant[ClassID] += Usage;
+          TTI.getRegisterClassForType(VF.isVector(), Inst->getType());
+      Invariant[ClassID] += GetRegUsage(Inst->getType(), VF);
     }
 
     LLVM_DEBUG({

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reg-usage.ll
new file mode 100644
index 0000000000000..d8bc2cca2dc05
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/reg-usage.ll
@@ -0,0 +1,41 @@
+; REQUIRES: asserts
+
+; RUN: opt -mtriple arm64-linux -passes=loop-vectorize -mattr=+sve -debug-only=loop-vectorize -disable-output <%s 2>&1 | FileCheck %s
+
+; Invariant register usage calculation should take into account if the
+; invariant would be used in widened instructions. Only in such cases, a vector
+; register would be required for holding the invariant. For all other cases
+; such as below(where usage of %0 in loop doesnt require vector register), a
+; general purpose register suffices.
+; Check that below test doesn't crash while calculating register usage for
+; invariant %0
+
+ at string = internal unnamed_addr constant [5 x i8] c"abcd\00", align 1
+define void @get_invariant_reg_usage(ptr %z) {
+; CHECK: LV: Checking a loop in 'get_invariant_reg_usage'
+; CHECK: LV(REG): VF = vscale x 16
+; CHECK: LV(REG): Found max usage: 1 item
+; CHECK: LV(REG): RegisterClass: Generic::ScalarRC, 3 registers
+; CHECK: LV(REG): Found invariant usage: 2 item
+; CHECK: LV(REG): RegisterClass: Generic::VectorRC, 8 registers
+; CHECK: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers
+
+L.entry:
+  %0 = load i128, ptr %z, align 16
+  %1 = icmp slt i128 %0, 1
+  %a = getelementptr i8, ptr %z, i64 1
+  br i1 %1, label %return, label %loopbody
+
+loopbody:                  ;preds = %L.entry, %loopbody
+  %b = phi ptr [ %2, %loopbody ], [ @string, %L.entry ]
+  %len_input = phi i128 [ %len, %loopbody ], [ %0, %L.entry ]
+  %len = add nsw i128 %len_input, -1
+  %2 = getelementptr i8, ptr %b, i64 1
+  %3 = load i8, ptr %b, align 1
+  store i8 %3, ptr %a, align 4
+  %.not = icmp eq i128 %len, 0
+  br i1 %.not, label %return, label %loopbody
+
+return:                    ;preds = %loopexit, %L.entry
+  ret void
+}

diff  --git a/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll
index b63f2cfa8758f..f9d512e74e1b4 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll
@@ -175,7 +175,7 @@ define void @double_(ptr nocapture %A, i32 %n) nounwind uwtable ssp {
 ;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
 ;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 5 registers
 ;CHECK-PWR8: LV(REG): Found invariant usage: 1 item
-;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 1 registers
+;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 1 registers
 
 ;CHECK-PWR9: LV(REG): VF = 1
 ;CHECK-PWR9: LV(REG): Found max usage: 2 item

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
index fddc21ebff6d5..5e3de92bca885 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
@@ -31,22 +31,22 @@ define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture rea
 ; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
 ; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 2 registers
 ; CHECK-LMUL1-NEXT:  LV(REG): Found invariant usage: 1 item
-; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 2 registers
+; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
 ; CHECK-LMUL2:       LV(REG): Found max usage: 2 item
 ; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
 ; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 4 registers
 ; CHECK-LMUL2-NEXT:  LV(REG): Found invariant usage: 1 item
-; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 4 registers
+; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
 ; CHECK-LMUL4:       LV(REG): Found max usage: 2 item
 ; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
 ; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 8 registers
 ; CHECK-LMUL4-NEXT:  LV(REG): Found invariant usage: 1 item
-; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 8 registers
+; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
 ; CHECK-LMUL8:       LV(REG): Found max usage: 2 item
 ; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
 ; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 16 registers
 ; CHECK-LMUL8-NEXT:  LV(REG): Found invariant usage: 1 item
-; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 16 registers
+; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
 
 entry:
   %conv = zext i32 %size to i64

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index 1c57e5817861e..372e119099e79 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -102,7 +102,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
 ; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 2 registers
 ; CHECK-NEXT:  LV(REG): Found invariant usage: 1 item
-; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 4 registers
+; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
 ; CHECK-NEXT:  LV: The target has 31 registers of RISCV::GPRRC register class
 ; CHECK-NEXT:  LV: The target has 32 registers of RISCV::VRRC register class
 ; CHECK-NEXT:  LV: Loop cost is 23
@@ -234,7 +234,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
 ; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 2 registers
 ; CHECK-NEXT:  LV(REG): Found invariant usage: 1 item
-; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 4 registers
+; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
 ; CHECK-NEXT:  LV: The target has 31 registers of RISCV::GPRRC register class
 ; CHECK-NEXT:  LV: The target has 32 registers of RISCV::VRRC register class
 ; CHECK-NEXT:  LV: Loop cost is 23

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll b/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll
index 7041a6731919b..164188db6ccf9 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll
@@ -26,7 +26,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers
 ; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers
 ; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
-; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers
+; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 1 registers
 
 define i32 @test_g(ptr nocapture readonly %a, i32 %n) local_unnamed_addr !dbg !6 {
 entry:
@@ -68,7 +68,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers
 ; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers
 ; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
-; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers
+; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 1 registers
 
 define i32 @test(ptr nocapture readonly %a, i32 %n) local_unnamed_addr {
 entry:


        


More information about the llvm-branch-commits mailing list