[llvm] [LV] Fix stale entry in MinBWs in tryToWiden (PR #136858)

Wed Apr 23 05:50:40 PDT 2025

https://github.com/artagnon created https://github.com/llvm/llvm-project/pull/136858

tryToWiden attempts to replace an Instruction with a Constant from SCEV, but forgets to erase the Instruction from the MinBWs map, leading to a crash in VPlanTransforms::truncateToMinimalBitwidths. Fix this by erasing the stale entry.

Fixes #125278.

>From f63389f08549a7acf520f76a553cb914bb4698c2 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 23 Apr 2025 13:38:52 +0100
Subject: [PATCH 1/2] [LV] Pre-commit test for #125278

---
 .../test/Transforms/LoopVectorize/pr125278.ll | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/pr125278.ll

diff --git a/llvm/test/Transforms/LoopVectorize/pr125278.ll b/llvm/test/Transforms/LoopVectorize/pr125278.ll
new file mode 100644
index 0000000000000..61c7a924b441f
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/pr125278.ll
@@ -0,0 +1,21 @@
+; REQUIRES: asserts
+; RUN: not --crash opt -passes=loop-vectorize -disable-output %s
+
+define void @pr125278(ptr %dst, i64 %n) {
+entry:
+  %true.ext = zext i1 true to i32
+  br label %cond
+
+cond:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %cond ], [ %iv.next, %loop ]
+  %false.ext = zext i1 false to i32
+  %xor = xor i32 %false.ext, %true.ext
+  %xor.trunc = trunc i32 %xor to i8
+  store i8 %xor.trunc, ptr %dst, align 1
+  %iv.next = add i64 %iv, 1
+  %cmp = icmp ult i64 %iv.next, %n
+  br i1 %cmp, label %loop, label %cond
+}

>From 55fa72f54eafec16044371da38dfe2c6718834af Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 23 Apr 2025 13:42:00 +0100
Subject: [PATCH 2/2] [LV] Fix stale entry in MinBWs in tryToWiden

tryToWiden attempts to replace an Instruction with a Constant from SCEV,
but forgets to erase the Instruction from the MinBWs map, leading to a
crash in VPlanTransforms::truncateToMinimalBitwidths. Fix this by
erasing the stale entry.

Fixes #125278.
---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 13 ++++++-----
 .../test/Transforms/LoopVectorize/pr125278.ll | 23 +++++++++++++++++--
 2 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 32c3435ccb38d..1051b4c7bb17a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1047,9 +1047,7 @@ class LoopVectorizationCostModel {
   /// \returns The smallest bitwidth each instruction can be represented with.
   /// The vector equivalents of these instructions should be truncated to this
   /// type.
-  const MapVector<Instruction *, uint64_t> &getMinimalBitwidths() const {
-    return MinBWs;
-  }
+  MapVector<Instruction *, uint64_t> &getMinimalBitwidths() { return MinBWs; }
 
   /// \returns True if it is more profitable to scalarize instruction \p I for
   /// vectorization factor \p VF.
@@ -8868,12 +8866,15 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
       auto GetConstantViaSCEV = [this, &SE](VPValue *Op) {
         if (!Op->isLiveIn())
           return Op;
-        Value *V = Op->getUnderlyingValue();
-        if (isa<Constant>(V) || !SE.isSCEVable(V->getType()))
+        Instruction *I = dyn_cast<Instruction>(Op->getLiveInIRValue());
+        if (!I || !SE.isSCEVable(I->getType()))
           return Op;
-        auto *C = dyn_cast<SCEVConstant>(SE.getSCEV(V));
+        auto *C = dyn_cast<SCEVConstant>(SE.getSCEV(I));
         if (!C)
           return Op;
+        // If we're going to replace an instruction with a constant, erase any
+        // stale entry in MinBWs.
+        CM.getMinimalBitwidths().erase(I);
         return Plan.getOrAddLiveIn(C->getValue());
       };
       // For Mul, the legacy cost model checks both operands.
diff --git a/llvm/test/Transforms/LoopVectorize/pr125278.ll b/llvm/test/Transforms/LoopVectorize/pr125278.ll
index 61c7a924b441f..ec899a3e1f5c2 100644
--- a/llvm/test/Transforms/LoopVectorize/pr125278.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr125278.ll
@@ -1,7 +1,26 @@
-; REQUIRES: asserts
-; RUN: not --crash opt -passes=loop-vectorize -disable-output %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-vectorize -S %s | FileCheck %s
 
 define void @pr125278(ptr %dst, i64 %n) {
+; CHECK-LABEL: define void @pr125278(
+; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TRUE_EXT:%.*]] = zext i1 true to i32
+; CHECK-NEXT:    br label %[[COND:.*]]
+; CHECK:       [[COND_LOOPEXIT:.*]]:
+; CHECK-NEXT:    br label %[[COND]]
+; CHECK:       [[COND]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[COND]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[FALSE_EXT:%.*]] = zext i1 false to i32
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[FALSE_EXT]], [[TRUE_EXT]]
+; CHECK-NEXT:    [[XOR_TRUNC:%.*]] = trunc i32 [[XOR]] to i8
+; CHECK-NEXT:    store i8 [[XOR_TRUNC]], ptr [[DST]], align 1
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP]], label %[[COND_LOOPEXIT]]
+;
 entry:
   %true.ext = zext i1 true to i32
   br label %cond