[llvm] VectorCombine: fix logical error after m_Trunc match (PR #91201)

Mon May 6 10:40:57 PDT 2024

https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/91201

>From ccbdff0590a0730d7143b930d5ab915107113dfa Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <r at artagnon.com>
Date: Mon, 6 May 2024 13:43:01 +0100
Subject: [PATCH 1/3] VectorCombine: add test for crash #88796

---
 llvm/test/Transforms/VectorCombine/pr88796.ll | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 llvm/test/Transforms/VectorCombine/pr88796.ll

diff --git a/llvm/test/Transforms/VectorCombine/pr88796.ll b/llvm/test/Transforms/VectorCombine/pr88796.ll
new file mode 100644
index 00000000000000..d5cd52e11d39d7
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/pr88796.ll
@@ -0,0 +1,11 @@
+; REQUIRES: asserts
+; RUN: not --crash opt -passes=vector-combine -disable-output %s
+
+define i32 @test() {
+entry:
+  %0 = tail call i16 @llvm.vector.reduce.and.nxv8i16(<vscale x 8 x i16> trunc (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 268435456, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) to <vscale x 8 x i16>))
+  ret i32 0
+}
+
+declare i16 @llvm.vector.reduce.and.nxv8i16(<vscale x 8 x i16>)
+

>From c950394a37f66e10d1009c1ce3e3cba536c03894 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <r at artagnon.com>
Date: Mon, 6 May 2024 13:46:09 +0100
Subject: [PATCH 2/3] VectorCombine: fix logical error after m_Trunc match

The matcher m_Trunc() matches an Operator with a given Opcode, which
could either be an Instruction or ConstExpr.
VectorCombine::foldTruncFromReductions() incorrectly assumes that the
pattern matched is always an Instruction, and attempts a cast. Fix this.

Fixes #88796.
---
 llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 3 +--
 llvm/test/Transforms/VectorCombine/pr88796.ll   | 9 +++++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index bbb70134870ab6..50a8209b465168 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1961,7 +1961,6 @@ bool VectorCombine::foldTruncFromReductions(Instruction &I) {
   if (!match(ReductionSrc, m_OneUse(m_Trunc(m_Value(TruncSrc)))))
     return false;
 
-  auto *Trunc = cast<CastInst>(ReductionSrc);
   auto *TruncSrcTy = cast<VectorType>(TruncSrc->getType());
   auto *ReductionSrcTy = cast<VectorType>(ReductionSrc->getType());
   Type *ResultTy = I.getType();
@@ -1969,7 +1968,7 @@ bool VectorCombine::foldTruncFromReductions(Instruction &I) {
   TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
   InstructionCost OldCost =
       TTI.getCastInstrCost(Instruction::Trunc, ReductionSrcTy, TruncSrcTy,
-                           TTI::CastContextHint::None, CostKind, Trunc) +
+                           TTI::CastContextHint::None, CostKind) +
       TTI.getArithmeticReductionCost(ReductionOpc, ReductionSrcTy, std::nullopt,
                                      CostKind);
   InstructionCost NewCost =
diff --git a/llvm/test/Transforms/VectorCombine/pr88796.ll b/llvm/test/Transforms/VectorCombine/pr88796.ll
index d5cd52e11d39d7..4f26f5dcbb928d 100644
--- a/llvm/test/Transforms/VectorCombine/pr88796.ll
+++ b/llvm/test/Transforms/VectorCombine/pr88796.ll
@@ -1,7 +1,12 @@
-; REQUIRES: asserts
-; RUN: not --crash opt -passes=vector-combine -disable-output %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=vector-combine -S %s | FileCheck %s
 
 define i32 @test() {
+; CHECK-LABEL: define i32 @test() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i16 @llvm.vector.reduce.and.nxv8i16(<vscale x 8 x i16> trunc (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 268435456, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) to <vscale x 8 x i16>))
+; CHECK-NEXT:    ret i32 0
+;
 entry:
   %0 = tail call i16 @llvm.vector.reduce.and.nxv8i16(<vscale x 8 x i16> trunc (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 268435456, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) to <vscale x 8 x i16>))
   ret i32 0

>From b5d63a5fdb469568f578fa1f2b2d380e742eb0b1 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <r at artagnon.com>
Date: Mon, 6 May 2024 18:35:30 +0100
Subject: [PATCH 3/3] VectorCombine: fix costs

---
 llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 50a8209b465168..8573a8adf53b3a 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1966,11 +1966,12 @@ bool VectorCombine::foldTruncFromReductions(Instruction &I) {
   Type *ResultTy = I.getType();
 
   TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
-  InstructionCost OldCost =
-      TTI.getCastInstrCost(Instruction::Trunc, ReductionSrcTy, TruncSrcTy,
-                           TTI::CastContextHint::None, CostKind) +
-      TTI.getArithmeticReductionCost(ReductionOpc, ReductionSrcTy, std::nullopt,
-                                     CostKind);
+  InstructionCost OldCost = TTI.getArithmeticReductionCost(
+      ReductionOpc, ReductionSrcTy, std::nullopt, CostKind);
+  if (auto *Trunc = dyn_cast<CastInst>(ReductionSrc))
+    OldCost +=
+        TTI.getCastInstrCost(Instruction::Trunc, ReductionSrcTy, TruncSrcTy,
+                             TTI::CastContextHint::None, CostKind, Trunc);
   InstructionCost NewCost =
       TTI.getArithmeticReductionCost(ReductionOpc, TruncSrcTy, std::nullopt,
                                      CostKind) +