[llvm] [LV] Fix MinBWs in WidenIntrinsic case (PR #137005)

Fri Apr 25 02:16:37 PDT 2025

https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/137005

>From b9cffa35f6e1cde2f426498eb6dcb7a38ee6a031 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 23 Apr 2025 16:23:37 +0100
Subject: [PATCH 1/2] [LV] Add test for #87407

This bug is especially difficult to fix, since simplifyRecipes()
simplifies the zext of 0 into the constant 0, removing an instruction
and leaving a stale entry in the MinBWs map: a crash is subsequently
observed in VPlanTransforms::truncateToMinimalBitwidths() due to a
mismatch between the number of recipes processed and the number of
entries in MinBWs. One possible way forward is to get simplifyRecipes()
to erase entries from MinBWs when replacing Instructions with Constants.

Check in the test to ease investigation, while we plan a way to fix the
bug.
---
 llvm/test/Transforms/LoopVectorize/pr87407.ll | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/pr87407.ll

diff --git a/llvm/test/Transforms/LoopVectorize/pr87407.ll b/llvm/test/Transforms/LoopVectorize/pr87407.ll
new file mode 100644
index 0000000000000..00fabdbd0881d
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/pr87407.ll
@@ -0,0 +1,23 @@
+; REQUIRES: asserts
+; RUN: not --crash opt -passes=loop-vectorize -disable-output %s
+
+define i8 @pr87407(i8 %x, i64 %y, i64 %n) {
+entry:
+  %zext.x = zext i8 %x to i64
+  br label %loop
+
+loop:
+  %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
+  %max = tail call i64 @llvm.umax.i64(i64 %zext.x, i64 %y)
+  %cmp.max.0 = icmp ne i64 %max, 0
+  %zext.cmp = zext i1 %cmp.max.0 to i64
+  %trunc = trunc i64 %zext.cmp to i32
+  %shl = shl i32 %trunc, 8
+  %res = trunc i32 %shl to i8
+  %iv.next = add i64 %iv, 1
+  %exit.cond = icmp ne i64 %iv.next, %n
+  br i1 %exit.cond, label %loop, label %exit
+
+exit:
+  ret i8 %res
+}

>From 285659808e3e677f5e70c4b387e4b856d2e867a8 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 23 Apr 2025 18:29:17 +0100
Subject: [PATCH 2/2] [LV] Fix the bug!

---
 llvm/lib/Analysis/VectorUtils.cpp             |  4 +++-
 .../Transforms/Vectorize/VPlanTransforms.cpp  |  3 ++-
 llvm/test/Transforms/LoopVectorize/pr87407.ll | 24 +++++++++++++++++--
 3 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 6448c372f5d5d..6521a937d3b59 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -882,7 +882,9 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
 
       // If any of M's operands demand more bits than MinBW then M cannot be
       // performed safely in MinBW.
-      if (any_of(MI->operands(), [&DB, MinBW](Use &U) {
+      auto *Call = dyn_cast<CallBase>(MI);
+      auto Ops = Call ? Call->args() : MI->operands();
+      if (any_of(Ops, [&DB, MinBW](Use &U) {
             auto *CI = dyn_cast<ConstantInt>(U);
             // For constants shift amounts, check if the shift would result in
             // poison.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index f2dc68b2ea8b6..876d554dd00d2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1615,7 +1615,8 @@ void VPlanTransforms::truncateToMinimalBitwidths(
            vp_depth_first_deep(Plan.getVectorLoopRegion()))) {
     for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
       if (!isa<VPWidenRecipe, VPWidenCastRecipe, VPReplicateRecipe,
-               VPWidenSelectRecipe, VPWidenLoadRecipe>(&R))
+               VPWidenSelectRecipe, VPWidenLoadRecipe, VPWidenIntrinsicRecipe>(
+              &R))
         continue;
 
       VPValue *ResultVPV = R.getVPSingleValue();
diff --git a/llvm/test/Transforms/LoopVectorize/pr87407.ll b/llvm/test/Transforms/LoopVectorize/pr87407.ll
index 00fabdbd0881d..592ea04b7b4fd 100644
--- a/llvm/test/Transforms/LoopVectorize/pr87407.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr87407.ll
@@ -1,7 +1,27 @@
-; REQUIRES: asserts
-; RUN: not --crash opt -passes=loop-vectorize -disable-output %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-vectorize -S %s | FileCheck %s
 
 define i8 @pr87407(i8 %x, i64 %y, i64 %n) {
+; CHECK-LABEL: define i8 @pr87407(
+; CHECK-SAME: i8 [[X:%.*]], i64 [[Y:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[ZEXT_X:%.*]] = zext i8 [[X]] to i64
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    [[MAX:%.*]] = tail call i64 @llvm.umax.i64(i64 [[ZEXT_X]], i64 [[Y]])
+; CHECK-NEXT:    [[CMP_MAX_0:%.*]] = icmp ne i64 [[MAX]], 0
+; CHECK-NEXT:    [[ZEXT_CMP:%.*]] = zext i1 [[CMP_MAX_0]] to i64
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i64 [[ZEXT_CMP]] to i32
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[TRUNC]], 8
+; CHECK-NEXT:    [[RES:%.*]] = trunc i32 [[SHL]] to i8
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[EXIT_COND:%.*]] = icmp ne i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RES_LCSSA:%.*]] = phi i8 [ [[RES]], %[[LOOP]] ]
+; CHECK-NEXT:    ret i8 [[RES_LCSSA]]
+;
 entry:
   %zext.x = zext i8 %x to i64
   br label %loop