[PATCH] D101477: [SLP]Fix the crash on cost calculation if non-compatible vectors shuffled.

Alexey Bataev via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 30 09:52:24 PDT 2021


This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
ABataev marked an inline comment as done.
Closed by commit rGa3fd82c28987: [SLP]Fix the crash on cost calculation if non-compatible vectors shuffled. (authored by ABataev).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D101477/new/

https://reviews.llvm.org/D101477

Files:
  llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
  llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll


Index: llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer %s | FileCheck %s
+
+define <2 x i16> @uadd_sat_v9i16_combine_vi16(<9 x i16> %arg0, <9 x i16> %arg1) {
+; CHECK-LABEL: @uadd_sat_v9i16_combine_vi16(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[ARG0_1:%.*]] = extractelement <9 x i16> undef, i64 7
+; CHECK-NEXT:    [[ARG0_2:%.*]] = extractelement <9 x i16> [[ARG0:%.*]], i64 8
+; CHECK-NEXT:    [[ARG1_1:%.*]] = extractelement <9 x i16> [[ARG1:%.*]], i64 7
+; CHECK-NEXT:    [[ARG1_2:%.*]] = extractelement <9 x i16> [[ARG1]], i64 8
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i16> poison, i16 [[ARG0_1]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i16> [[TMP0]], i16 [[ARG0_2]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i16> poison, i16 [[ARG1_1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i16> [[TMP2]], i16 [[ARG1_2]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP1]], <2 x i16> [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0
+; CHECK-NEXT:    [[INS_1:%.*]] = insertelement <2 x i16> undef, i16 [[TMP5]], i64 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i16> [[TMP4]], i32 1
+; CHECK-NEXT:    [[INS_2:%.*]] = insertelement <2 x i16> [[INS_1]], i16 [[TMP6]], i64 1
+; CHECK-NEXT:    ret <2 x i16> [[INS_2]]
+;
+bb:
+  %arg0.1 = extractelement <9 x i16> undef, i64 7
+  %arg0.2 = extractelement <9 x i16> %arg0, i64 8
+  %arg1.1 = extractelement <9 x i16> %arg1, i64 7
+  %arg1.2 = extractelement <9 x i16> %arg1, i64 8
+  %add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1)
+  %add.2 = call i16 @llvm.uadd.sat.i16(i16 %arg0.2, i16 %arg1.2)
+  %ins.1 = insertelement <2 x i16> undef, i16 %add.1, i64 0
+  %ins.2 = insertelement <2 x i16> %ins.1, i16 %add.2, i64 1
+  ret <2 x i16> %ins.2
+}
+
+declare i16 @llvm.uadd.sat.i16(i16, i16) #0
+attributes #0 = { nounwind readnone speculatable willreturn }
Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3589,13 +3589,27 @@
     for (const auto &Data : ExtractVectorsTys) {
       auto *EEVTy = cast<FixedVectorType>(Data.first->getType());
       unsigned NumElts = VecTy->getNumElements();
-      if (TTIRef.getNumberOfParts(EEVTy) > TTIRef.getNumberOfParts(VecTy))
-        Cost += TTIRef.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
-                                      EEVTy, None,
-                                      (Data.second / NumElts) * NumElts, VecTy);
-      else
+      if (TTIRef.getNumberOfParts(EEVTy) > TTIRef.getNumberOfParts(VecTy)) {
+        unsigned Idx = (Data.second / NumElts) * NumElts;
+        unsigned EENumElts = EEVTy->getNumElements();
+        if (Idx + NumElts <= EENumElts) {
+          Cost +=
+              TTIRef.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
+                                    EEVTy, None, Idx, VecTy);
+        } else {
+          // Need to round up the subvector type vectorization factor to avoid a
+          // crash in cost model functions. Make SubVT so that Idx + VF of SubVT
+          // <= EENumElts.
+          auto *SubVT =
+              FixedVectorType::get(VecTy->getElementType(), EENumElts - Idx);
+          Cost +=
+              TTIRef.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
+                                    EEVTy, None, Idx, SubVT);
+        }
+      } else {
         Cost += TTIRef.getShuffleCost(TargetTransformInfo::SK_InsertSubvector,
                                       VecTy, None, 0, EEVTy);
+      }
     }
   };
   if (E->State == TreeEntry::NeedToGather) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D101477.341948.patch
Type: text/x-patch
Size: 4142 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210430/808abb38/attachment.bin>


More information about the llvm-commits mailing list