[llvm] [SLP] Fallback to sext when widening vectorized operands (PR #106726)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 30 06:19:42 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Yingwei Zheng (dtcxzyw)
<details>
<summary>Changes</summary>
See the following code:
https://github.com/llvm/llvm-project/blob/8586d0330e36b22496f9ba5ed116bc1aac5a1f28/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp#L13446-L13473
When widening a vectorized operand (i.e., `BWSz > SrcBWSz`), we should use an ext inst if both `SrcIt` and `It` are invalid.
https://github.com/llvm/llvm-project/blob/8586d0330e36b22496f9ba5ed116bc1aac5a1f28/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp#L16181-L16192
IIRC we can fallback to sext by assuming that some scalar results cannot be legally represented as zero-extended.
---
Full diff: https://github.com/llvm/llvm-project/pull/106726.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+7-2)
- (added) llvm/test/Transforms/SLPVectorizer/X86/pr100667.ll (+48)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index edb2567fa057b3..22a778507981ac 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9796,7 +9796,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
unsigned Opcode = ShuffleOrOp;
unsigned VecOpcode = Opcode;
if (!ScalarTy->isFPOrFPVectorTy() && !SrcScalarTy->isFPOrFPVectorTy() &&
- (SrcIt != MinBWs.end() || It != MinBWs.end())) {
+ (SrcIt != MinBWs.end() || It != MinBWs.end() ||
+ SrcScalarTy != VL0->getOperand(0)->getType()->getScalarType())) {
// Check if the values are candidates to demote.
unsigned SrcBWSz = DL->getTypeSizeInBits(SrcScalarTy->getScalarType());
if (SrcIt != MinBWs.end()) {
@@ -9818,6 +9819,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
assert(BWSz > SrcBWSz && "Invalid cast!");
VecOpcode =
SrcIt->second.second ? Instruction::SExt : Instruction::ZExt;
+ } else {
+ VecOpcode = Instruction::SExt;
}
} else if (VecOpcode == Instruction::SIToFP && SrcIt != MinBWs.end() &&
!SrcIt->second.second) {
@@ -13466,6 +13469,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
assert(BWSz > SrcBWSz && "Invalid cast!");
VecOpcode =
SrcIt->second.second ? Instruction::SExt : Instruction::ZExt;
+ } else {
+ VecOpcode = Instruction::SExt;
}
} else if (VecOpcode == Instruction::SIToFP && SrcIt != MinBWs.end() &&
!SrcIt->second.second) {
@@ -16166,7 +16171,7 @@ void BoUpSLP::computeMinimumValueSizes() {
});
}
- // If the maximum bit width we compute is less than the with of the roots'
+ // If the maximum bit width we compute is less than the width of the roots'
// type, we can proceed with the narrowing. Otherwise, do nothing.
if (MaxBitWidth == 0 ||
MaxBitWidth >=
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr100667.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr100667.ll
new file mode 100644
index 00000000000000..d6447bafa1e421
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr100667.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=slp-vectorizer < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Make sure that we don't crash when widening a vectorized operand.
+define void @main(ptr %p) {
+; CHECK-LABEL: define void @main(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr [[GEP1]], align 16
+; CHECK-NEXT: ret void
+;
+entry:
+ %conv548.2.i.13 = zext i32 0 to i64
+ %and551.2.i.13 = and i64 0, %conv548.2.i.13
+ %conv548.3.i.13 = zext i32 0 to i64
+ %and551.3.i.13 = and i64 0, %conv548.3.i.13
+ %0 = trunc i64 %and551.2.i.13 to i32
+ %conv54.2.i.14 = and i32 %0, 0
+ %conv548.2.i.14 = zext i32 %conv54.2.i.14 to i64
+ %and551.2.i.14 = and i64 %and551.2.i.13, %conv548.2.i.14
+ %1 = trunc i64 %and551.3.i.13 to i32
+ %conv54.3.i.14 = and i32 %1, 0
+ %conv548.3.i.14 = zext i32 %conv54.3.i.14 to i64
+ %and551.3.i.14 = and i64 %and551.3.i.13, %conv548.3.i.14
+ %and551.2.i.15 = and i64 %and551.2.i.14, 0
+ %and551.3.i.15 = and i64 %and551.3.i.14, 0
+ %and551.2.i.16 = and i64 %and551.2.i.15, 0
+ %and551.3.i.16 = and i64 %and551.3.i.15, 0
+ %and551.2.i.17 = and i64 %and551.2.i.16, 0
+ %and551.3.i.17 = and i64 %and551.3.i.16, 0
+ %and551.2.i.18 = and i64 %and551.2.i.17, 0
+ %and551.3.i.18 = and i64 %and551.3.i.17, 0
+ %and551.2.i.19 = and i64 %and551.2.i.18, 0
+ %and551.3.i.19 = and i64 %and551.3.i.18, 0
+ %and551.2.i.20 = and i64 %and551.2.i.19, 0
+ %and551.3.i.20 = and i64 %and551.3.i.19, 0
+ %and551.2.i.21 = and i64 %and551.2.i.20, 0
+ %and551.3.i.21 = and i64 %and551.3.i.20, 0
+ %gep1 = getelementptr inbounds i8, ptr %p, i64 16
+ %gep2 = getelementptr inbounds i8, ptr %p, i64 24
+ store i64 %and551.2.i.21, ptr %gep1, align 16
+ store i64 %and551.3.i.21, ptr %gep2, align 8
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/106726
More information about the llvm-commits
mailing list