[llvm] [SLP] Fallback to sext when widening vectorized operands (PR #106726)

Fri Aug 30 06:19:42 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Yingwei Zheng (dtcxzyw)

<details>
<summary>Changes</summary>

See the following code:
https://github.com/llvm/llvm-project/blob/8586d0330e36b22496f9ba5ed116bc1aac5a1f28/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp#L13446-L13473
When widening a vectorized operand (i.e., `BWSz > SrcBWSz`), we should use an ext inst if both `SrcIt` and `It` are invalid.

https://github.com/llvm/llvm-project/blob/8586d0330e36b22496f9ba5ed116bc1aac5a1f28/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp#L16181-L16192

IIRC we can fallback to sext by assuming that some scalar results cannot be legally represented as zero-extended.


---
Full diff: https://github.com/llvm/llvm-project/pull/106726.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+7-2) 
- (added) llvm/test/Transforms/SLPVectorizer/X86/pr100667.ll (+48) 


``````````diff

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index edb2567fa057b3..22a778507981ac 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9796,7 +9796,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
     unsigned Opcode = ShuffleOrOp;
     unsigned VecOpcode = Opcode;
     if (!ScalarTy->isFPOrFPVectorTy() && !SrcScalarTy->isFPOrFPVectorTy() &&
-        (SrcIt != MinBWs.end() || It != MinBWs.end())) {
+        (SrcIt != MinBWs.end() || It != MinBWs.end() ||
+         SrcScalarTy != VL0->getOperand(0)->getType()->getScalarType())) {
       // Check if the values are candidates to demote.
       unsigned SrcBWSz = DL->getTypeSizeInBits(SrcScalarTy->getScalarType());
       if (SrcIt != MinBWs.end()) {
@@ -9818,6 +9819,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
         assert(BWSz > SrcBWSz && "Invalid cast!");
         VecOpcode =
             SrcIt->second.second ? Instruction::SExt : Instruction::ZExt;
+      } else {
+        VecOpcode = Instruction::SExt;
       }
     } else if (VecOpcode == Instruction::SIToFP && SrcIt != MinBWs.end() &&
                !SrcIt->second.second) {
@@ -13466,6 +13469,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
           assert(BWSz > SrcBWSz && "Invalid cast!");
           VecOpcode =
               SrcIt->second.second ? Instruction::SExt : Instruction::ZExt;
+        } else {
+          VecOpcode = Instruction::SExt;
         }
       } else if (VecOpcode == Instruction::SIToFP && SrcIt != MinBWs.end() &&
                  !SrcIt->second.second) {
@@ -16166,7 +16171,7 @@ void BoUpSLP::computeMinimumValueSizes() {
                  });
     }
 
-    // If the maximum bit width we compute is less than the with of the roots'
+    // If the maximum bit width we compute is less than the width of the roots'
     // type, we can proceed with the narrowing. Otherwise, do nothing.
     if (MaxBitWidth == 0 ||
         MaxBitWidth >=
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr100667.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr100667.ll
new file mode 100644
index 00000000000000..d6447bafa1e421
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr100667.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=slp-vectorizer < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Make sure that we don't crash when widening a vectorized operand.
+define void @main(ptr %p) {
+; CHECK-LABEL: define void @main(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16
+; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr [[GEP1]], align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+  %conv548.2.i.13 = zext i32 0 to i64
+  %and551.2.i.13 = and i64 0, %conv548.2.i.13
+  %conv548.3.i.13 = zext i32 0 to i64
+  %and551.3.i.13 = and i64 0, %conv548.3.i.13
+  %0 = trunc i64 %and551.2.i.13 to i32
+  %conv54.2.i.14 = and i32 %0, 0
+  %conv548.2.i.14 = zext i32 %conv54.2.i.14 to i64
+  %and551.2.i.14 = and i64 %and551.2.i.13, %conv548.2.i.14
+  %1 = trunc i64 %and551.3.i.13 to i32
+  %conv54.3.i.14 = and i32 %1, 0
+  %conv548.3.i.14 = zext i32 %conv54.3.i.14 to i64
+  %and551.3.i.14 = and i64 %and551.3.i.13, %conv548.3.i.14
+  %and551.2.i.15 = and i64 %and551.2.i.14, 0
+  %and551.3.i.15 = and i64 %and551.3.i.14, 0
+  %and551.2.i.16 = and i64 %and551.2.i.15, 0
+  %and551.3.i.16 = and i64 %and551.3.i.15, 0
+  %and551.2.i.17 = and i64 %and551.2.i.16, 0
+  %and551.3.i.17 = and i64 %and551.3.i.16, 0
+  %and551.2.i.18 = and i64 %and551.2.i.17, 0
+  %and551.3.i.18 = and i64 %and551.3.i.17, 0
+  %and551.2.i.19 = and i64 %and551.2.i.18, 0
+  %and551.3.i.19 = and i64 %and551.3.i.18, 0
+  %and551.2.i.20 = and i64 %and551.2.i.19, 0
+  %and551.3.i.20 = and i64 %and551.3.i.19, 0
+  %and551.2.i.21 = and i64 %and551.2.i.20, 0
+  %and551.3.i.21 = and i64 %and551.3.i.20, 0
+  %gep1 = getelementptr inbounds i8, ptr %p, i64 16
+  %gep2 = getelementptr inbounds i8, ptr %p, i64 24
+  store i64 %and551.2.i.21, ptr %gep1, align 16
+  store i64 %and551.3.i.21, ptr %gep2, align 8
+  ret void
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/106726