[llvm] [LV] Fix MinBWs in WidenIntrinsic case (PR #137005)

Tue Apr 29 01:46:47 PDT 2025

https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/137005

>From ed6a2635d469b31a166a7d04b05779323432d6f9 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 23 Apr 2025 16:23:37 +0100
Subject: [PATCH 1/5] [LV] Add test for #87407

This bug is especially difficult to fix, since simplifyRecipes()
simplifies the zext of 0 into the constant 0, removing an instruction
and leaving a stale entry in the MinBWs map: a crash is subsequently
observed in VPlanTransforms::truncateToMinimalBitwidths() due to a
mismatch between the number of recipes processed and the number of
entries in MinBWs. One possible way forward is to get simplifyRecipes()
to erase entries from MinBWs when replacing Instructions with Constants.

Check in the test to ease investigation, while we plan a way to fix the
bug.
---
 llvm/test/Transforms/LoopVectorize/pr87407.ll | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/pr87407.ll

diff --git a/llvm/test/Transforms/LoopVectorize/pr87407.ll b/llvm/test/Transforms/LoopVectorize/pr87407.ll
new file mode 100644
index 0000000000000..26afbb7af5422
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/pr87407.ll
@@ -0,0 +1,23 @@
+; REQUIRES: asserts
+; RUN: not --crash opt -passes=loop-vectorize -force-vector-width=4 -disable-output %s
+
+define i8 @pr87407(i8 %x, i64 %y, i64 %n) {
+entry:
+  %zext.x = zext i8 %x to i64
+  br label %loop
+
+loop:
+  %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
+  %max = tail call i64 @llvm.umax.i64(i64 %zext.x, i64 %y)
+  %cmp.max.0 = icmp ne i64 %max, 0
+  %zext.cmp = zext i1 %cmp.max.0 to i64
+  %trunc = trunc i64 %zext.cmp to i32
+  %shl = shl i32 %trunc, 8
+  %res = trunc i32 %shl to i8
+  %iv.next = add i64 %iv, 1
+  %exit.cond = icmp ne i64 %iv.next, %n
+  br i1 %exit.cond, label %loop, label %exit
+
+exit:
+  ret i8 %res
+}

>From 8ea8b88f484916da6c4785f1ff84f630a1494897 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 23 Apr 2025 18:29:17 +0100
Subject: [PATCH 2/5] [LV] Fix the bug!

---
 llvm/lib/Analysis/VectorUtils.cpp             |  4 +++-
 .../Transforms/Vectorize/VPlanTransforms.cpp  |  3 ++-
 llvm/test/Transforms/LoopVectorize/pr87407.ll | 24 +++++++++++++++++--
 3 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 6448c372f5d5d..6521a937d3b59 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -882,7 +882,9 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
 
       // If any of M's operands demand more bits than MinBW then M cannot be
       // performed safely in MinBW.
-      if (any_of(MI->operands(), [&DB, MinBW](Use &U) {
+      auto *Call = dyn_cast<CallBase>(MI);
+      auto Ops = Call ? Call->args() : MI->operands();
+      if (any_of(Ops, [&DB, MinBW](Use &U) {
             auto *CI = dyn_cast<ConstantInt>(U);
             // For constants shift amounts, check if the shift would result in
             // poison.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index f2dc68b2ea8b6..876d554dd00d2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1615,7 +1615,8 @@ void VPlanTransforms::truncateToMinimalBitwidths(
            vp_depth_first_deep(Plan.getVectorLoopRegion()))) {
     for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
       if (!isa<VPWidenRecipe, VPWidenCastRecipe, VPReplicateRecipe,
-               VPWidenSelectRecipe, VPWidenLoadRecipe>(&R))
+               VPWidenSelectRecipe, VPWidenLoadRecipe, VPWidenIntrinsicRecipe>(
+              &R))
         continue;
 
       VPValue *ResultVPV = R.getVPSingleValue();
diff --git a/llvm/test/Transforms/LoopVectorize/pr87407.ll b/llvm/test/Transforms/LoopVectorize/pr87407.ll
index 26afbb7af5422..2336149f2f4fb 100644
--- a/llvm/test/Transforms/LoopVectorize/pr87407.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr87407.ll
@@ -1,7 +1,27 @@
-; REQUIRES: asserts
-; RUN: not --crash opt -passes=loop-vectorize -force-vector-width=4 -disable-output %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s
 
 define i8 @pr87407(i8 %x, i64 %y, i64 %n) {
+; CHECK-LABEL: define i8 @pr87407(
+; CHECK-SAME: i8 [[X:%.*]], i64 [[Y:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[ZEXT_X:%.*]] = zext i8 [[X]] to i64
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    [[MAX:%.*]] = tail call i64 @llvm.umax.i64(i64 [[ZEXT_X]], i64 [[Y]])
+; CHECK-NEXT:    [[CMP_MAX_0:%.*]] = icmp ne i64 [[MAX]], 0
+; CHECK-NEXT:    [[ZEXT_CMP:%.*]] = zext i1 [[CMP_MAX_0]] to i64
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i64 [[ZEXT_CMP]] to i32
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[TRUNC]], 8
+; CHECK-NEXT:    [[RES:%.*]] = trunc i32 [[SHL]] to i8
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ne i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RES_LCSSA:%.*]] = phi i8 [ [[RES]], %[[LOOP]] ]
+; CHECK-NEXT:    ret i8 [[RES_LCSSA]]
+;
 entry:
   %zext.x = zext i8 %x to i64
   br label %loop

>From 98388a4fab69ddf22ad310e87ad6e8f5f7150037 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Mon, 28 Apr 2025 14:34:08 +0100
Subject: [PATCH 3/5] [LV] Address review: really fix the bug

---
 llvm/lib/Analysis/VectorUtils.cpp             |  5 +++
 .../Transforms/Vectorize/VPlanTransforms.cpp  |  2 +-
 llvm/test/Transforms/LoopVectorize/pr87407.ll | 35 ++++++++++++++++---
 3 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 6521a937d3b59..3e4d957dcc5bb 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -827,6 +827,11 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
     if (isa<PHINode>(I))
       continue;
 
+    // Do not widen the operands of a call, as doing that would cause a
+    // signature mismatch.
+    if (isa<CallBase>(I))
+      continue;
+
     if (DBits[Leader] == ~0ULL)
       // All bits demanded, no point continuing.
       continue;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 876d554dd00d2..942f503ad87dc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1689,7 +1689,7 @@ void VPlanTransforms::truncateToMinimalBitwidths(
       }
 
       assert(!isa<VPWidenStoreRecipe>(&R) && "stores cannot be narrowed");
-      if (isa<VPWidenLoadRecipe>(&R))
+      if (isa<VPWidenLoadRecipe, VPWidenIntrinsicRecipe>(&R))
         continue;
 
       // Shrink operands by introducing truncates as needed.
diff --git a/llvm/test/Transforms/LoopVectorize/pr87407.ll b/llvm/test/Transforms/LoopVectorize/pr87407.ll
index 2336149f2f4fb..8ee1dfbe2925a 100644
--- a/llvm/test/Transforms/LoopVectorize/pr87407.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr87407.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
 ; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s
 
 define i8 @pr87407(i8 %x, i64 %y, i64 %n) {
@@ -6,9 +6,36 @@ define i8 @pr87407(i8 %x, i64 %y, i64 %n) {
 ; CHECK-SAME: i8 [[X:%.*]], i64 [[Y:%.*]], i64 [[N:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[ZEXT_X:%.*]] = zext i8 [[X]] to i64
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[Y]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[ZEXT_X]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[BROADCAST_SPLAT2]], <4 x i64> [[BROADCAST_SPLAT]])
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <4 x i64> [[TMP0]] to <4 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <4 x i1> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i1> [[TMP2]] to <4 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 8)
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i8>
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i8> [[TMP5]], i32 3
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[MAX:%.*]] = tail call i64 @llvm.umax.i64(i64 [[ZEXT_X]], i64 [[Y]])
 ; CHECK-NEXT:    [[CMP_MAX_0:%.*]] = icmp ne i64 [[MAX]], 0
 ; CHECK-NEXT:    [[ZEXT_CMP:%.*]] = zext i1 [[CMP_MAX_0]] to i64
@@ -17,9 +44,9 @@ define i8 @pr87407(i8 %x, i64 %y, i64 %n) {
 ; CHECK-NEXT:    [[RES:%.*]] = trunc i32 [[SHL]] to i8
 ; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ne i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[RES_LCSSA:%.*]] = phi i8 [ [[RES]], %[[LOOP]] ]
+; CHECK-NEXT:    [[RES_LCSSA:%.*]] = phi i8 [ [[RES]], %[[LOOP]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i8 [[RES_LCSSA]]
 ;
 entry:

>From 713860244de4e0ab169f9cfb5f1cc77bae781cc3 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Mon, 28 Apr 2025 19:46:31 +0100
Subject: [PATCH 4/5] [VectorUtils] Refine comment

---
 llvm/lib/Analysis/VectorUtils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 3e4d957dcc5bb..4d394ea4d99c0 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -827,7 +827,7 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
     if (isa<PHINode>(I))
       continue;
 
-    // Do not widen the operands of a call, as doing that would cause a
+    // Don't modify the types of operands of a call, as doing that would cause a
     // signature mismatch.
     if (isa<CallBase>(I))
       continue;

>From 0c5c4bc3f5a4013052ca1bac33d51a2dc2d76dac Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Tue, 29 Apr 2025 09:46:14 +0100
Subject: [PATCH 5/5] [LV] Rename test file

---
 .../LoopVectorize/{pr87407.ll => pr87407-trunc-with-intrinsic.ll} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename llvm/test/Transforms/LoopVectorize/{pr87407.ll => pr87407-trunc-with-intrinsic.ll} (100%)

diff --git a/llvm/test/Transforms/LoopVectorize/pr87407.ll b/llvm/test/Transforms/LoopVectorize/pr87407-trunc-with-intrinsic.ll
similarity index 100%
rename from llvm/test/Transforms/LoopVectorize/pr87407.ll
rename to llvm/test/Transforms/LoopVectorize/pr87407-trunc-with-intrinsic.ll