[llvm] fbf0276 - [SLP] Reorder reuses mask, if it is not empty, for subvector operands

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 14 14:14:00 PDT 2025


Author: Alexey Bataev
Date: 2025-03-14T14:11:09-07:00
New Revision: fbf0276b6a7a7a4508c373cf87fc349569652659

URL: https://github.com/llvm/llvm-project/commit/fbf0276b6a7a7a4508c373cf87fc349569652659
DIFF: https://github.com/llvm/llvm-project/commit/fbf0276b6a7a7a4508c373cf87fc349569652659.diff

LOG: [SLP] Reorder reuses mask, if it is not empty, for subvector operands

If the subvector operands has reuses mask, need to reorder the mask, not
the scalars, to prevent compiler crash due to mask/scalars size
mismatch.

Fixes #131360

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-subvector.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 708dda9bd7f8d..d450336cbc3ce 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6665,6 +6665,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
           // Clear ordering of the operand.
           if (!OpTE.ReorderIndices.empty()) {
             OpTE.ReorderIndices.clear();
+          } else if (!OpTE.ReuseShuffleIndices.empty()) {
+            reorderReuses(OpTE.ReuseShuffleIndices, Mask);
           } else {
             assert(OpTE.isGather() && "Expected only gather/buildvector node.");
             reorderScalars(OpTE.Scalars, Mask);

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-subvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-subvector.ll
new file mode 100644
index 0000000000000..af9d808f45fa1
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-subvector.ll
@@ -0,0 +1,153 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-100 < %s | FileCheck %s
+
+define void @test(i32 %j.6, i32 %m.4, i8 %v.5, ptr %a, i1 %tobool14.not) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i32 [[J_6:%.*]], i32 [[M_4:%.*]], i8 [[V_5:%.*]], ptr [[A:%.*]], i1 [[TOBOOL14_NOT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i8> <i8 0, i8 poison>, i8 [[V_5]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 1, i32 poison>, i32 [[M_4]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[J_6]], i32 3
+; CHECK-NEXT:    br label %[[N:.*]]
+; CHECK:       [[ENTRY_O_CRIT_EDGE:.*]]:
+; CHECK-NEXT:    br label %[[O:.*]]
+; CHECK:       [[N]]:
+; CHECK-NEXT:    [[J_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[J_6]], %[[IF_END18:.*]] ]
+; CHECK-NEXT:    [[L_0:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[L_4:%.*]], %[[IF_END18]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi <4 x i32> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP37:%.*]], %[[IF_END18]] ]
+; CHECK-NEXT:    store i32 [[J_0]], ptr [[A]], align 4
+; CHECK-NEXT:    [[CMP_NOT_NOT:%.*]] = icmp eq i8 [[L_0]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 1, i32 poison, i32 3>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 poison, i32 1, i32 0, i32 0, i32 poison, i32 0, i32 poison>, <8 x i32> <i32 8, i32 poison, i32 10, i32 11, i32 12, i32 1, i32 14, i32 3>
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 10, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    br i1 [[CMP_NOT_NOT]], label %[[O_SINK_SPLIT:.*]], label %[[P:.*]]
+; CHECK:       [[P]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = phi <2 x i8> [ <i8 1, i8 0>, %[[IF_END18]] ], [ [[TMP0]], %[[N]] ]
+; CHECK-NEXT:    [[TMP8:%.*]] = phi <8 x i32> [ [[TMP35:%.*]], %[[IF_END18]] ], [ [[TMP6]], %[[N]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP8]], i32 7
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <2 x i32> <i32 5, i32 6>
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq <2 x i32> [[TMP10]], zeroinitializer
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TMP12]], i32 [[TMP9]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x i8> [[TMP7]], i32 0
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i8> [[TMP7]], i32 1
+; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <8 x i32> <i32 1, i32 0, i32 poison, i32 poison, i32 undef, i32 undef, i32 undef, i32 undef>, <8 x i32> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 9, i32 poison>
+; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[SPEC_SELECT]], i32 3
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i32> [[TMP8]], i32 2
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <8 x i32> [[TMP8]], i32 4
+; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1
+; CHECK-NEXT:    br i1 [[TMP21]], label %[[O_SINK_SPLIT]], label %[[Q:.*]]
+; CHECK:       [[O_SINK_SPLIT]]:
+; CHECK-NEXT:    [[SPEC_SELECT_SINK:%.*]] = phi i32 [ [[J_0]], %[[N]] ], [ [[SPEC_SELECT]], %[[P]] ]
+; CHECK-NEXT:    [[I_3_PH:%.*]] = phi i32 [ 0, %[[N]] ], [ [[TMP19]], %[[P]] ]
+; CHECK-NEXT:    [[L_3_PH:%.*]] = phi i8 [ 0, %[[N]] ], [ [[TMP13]], %[[P]] ]
+; CHECK-NEXT:    [[TMP22:%.*]] = phi <4 x i32> [ [[TMP3]], %[[N]] ], [ [[TMP18]], %[[P]] ]
+; CHECK-NEXT:    [[CONV10:%.*]] = zext i32 [[SPEC_SELECT_SINK]] to i64
+; CHECK-NEXT:    [[CALL11:%.*]] = tail call i32 (ptr, ...) @printf(ptr null, i64 [[CONV10]])
+; CHECK-NEXT:    br label %[[O]]
+; CHECK:       [[O]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ poison, %[[ENTRY_O_CRIT_EDGE]] ], [ [[TMP22]], %[[O_SINK_SPLIT]] ]
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 0, [[J_6]]
+; CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], [[I_3_PH]]
+; CHECK-NEXT:    [[SUB13:%.*]] = sub i32 0, [[DIV]]
+; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> poison, i32 [[SUB]], i32 0
+; CHECK-NEXT:    [[TMP25:%.*]] = insertelement <4 x i32> [[TMP24]], i32 [[SUB13]], i32 1
+; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <4 x i32> [[TMP25]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <4 x i32> [[TMP26]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP28:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP27]], <4 x i32> [[TMP23]], i64 4)
+; CHECK-NEXT:    [[TMP29:%.*]] = shufflevector <8 x i32> [[TMP28]], <8 x i32> poison, <2 x i32> <i32 poison, i32 6>
+; CHECK-NEXT:    [[TMP30:%.*]] = shufflevector <2 x i32> [[TMP29]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    br i1 [[TOBOOL14_NOT]], label %[[IF_END18]], label %[[Q]]
+; CHECK:       [[Q]]:
+; CHECK-NEXT:    [[U_4:%.*]] = phi i32 [ [[TMP20]], %[[P]] ], [ 0, %[[O]] ]
+; CHECK-NEXT:    [[V_44:%.*]] = phi i8 [ [[TMP14]], %[[P]] ], [ 0, %[[O]] ]
+; CHECK-NEXT:    [[TMP31:%.*]] = phi <2 x i32> [ [[TMP15]], %[[P]] ], [ [[TMP30]], %[[O]] ]
+; CHECK-NEXT:    [[TMP32:%.*]] = shufflevector <2 x i32> [[TMP31]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT:    [[CONV17:%.*]] = sext i8 [[V_44]] to i32
+; CHECK-NEXT:    [[REM:%.*]] = mul i32 [[U_4]], [[CONV17]]
+; CHECK-NEXT:    [[TMP33:%.*]] = insertelement <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 0, i32 0>, i32 [[REM]], i32 5
+; CHECK-NEXT:    [[TMP34:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP33]], <4 x i32> [[TMP32]], i64 0)
+; CHECK-NEXT:    br label %[[IF_END18]]
+; CHECK:       [[IF_END18]]:
+; CHECK-NEXT:    [[L_4]] = phi i8 [ 0, %[[Q]] ], [ [[L_3_PH]], %[[O]] ]
+; CHECK-NEXT:    [[TMP35]] = phi <8 x i32> [ [[TMP34]], %[[Q]] ], [ [[TMP28]], %[[O]] ]
+; CHECK-NEXT:    [[TMP36:%.*]] = shufflevector <8 x i32> [[TMP35]], <8 x i32> poison, <4 x i32> <i32 4, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP37]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP36]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    br i1 [[TOBOOL14_NOT]], label %[[N]], label %[[P]]
+;
+entry:
+  br label %n
+
+entry.o_crit_edge:
+  br label %o
+
+n:
+  %i.0 = phi i32 [ 0, %entry ], [ 1, %if.end18 ]
+  %j.0 = phi i32 [ 0, %entry ], [ %j.6, %if.end18 ]
+  %m.0 = phi i32 [ 0, %entry ], [ %m.4, %if.end18 ]
+  %l.0 = phi i8 [ 0, %entry ], [ %l.4, %if.end18 ]
+  %u.0 = phi i32 [ 0, %entry ], [ %u.5, %if.end18 ]
+  store i32 %j.0, ptr %a, align 4
+  %cmp.not.not = icmp eq i8 %l.0, 0
+  br i1 %cmp.not.not, label %o.sink.split, label %p
+
+p:
+  %0 = phi i32 [ %r.3, %if.end18 ], [ 0, %n ]
+  %i.1 = phi i32 [ %s.3, %if.end18 ], [ 1, %n ]
+  %j.1 = phi i32 [ %j.65, %if.end18 ], [ %j.0, %n ]
+  %k.1 = phi i32 [ %t.3, %if.end18 ], [ %i.0, %n ]
+  %m.1 = phi i32 [ %m.46, %if.end18 ], [ %m.0, %n ]
+  %l.1 = phi i8 [ 1, %if.end18 ], [ 0, %n ]
+  %s.1 = phi i32 [ %s.3, %if.end18 ], [ 0, %n ]
+  %t.1 = phi i32 [ %t.3, %if.end18 ], [ 0, %n ]
+  %u.12 = phi i32 [ %u.5, %if.end18 ], [ 0, %n ]
+  %v.1 = phi i8 [ 0, %if.end18 ], [ %v.5, %n ]
+  %tobool4.not = icmp eq i32 %m.1, 0
+  %spec.select = select i1 %tobool4.not, i32 %j.1, i32 0
+  %tobool7.not = icmp eq i32 %0, 0
+  br i1 %tobool7.not, label %o.sink.split, label %q
+
+o.sink.split:
+  %spec.select.sink = phi i32 [ %j.0, %n ], [ %spec.select, %p ]
+  %i.3.ph = phi i32 [ 0, %n ], [ %i.1, %p ]
+  %k.3.ph = phi i32 [ %i.0, %n ], [ %k.1, %p ]
+  %m.3.ph = phi i32 [ %m.0, %n ], [ 0, %p ]
+  %l.3.ph = phi i8 [ 0, %n ], [ %l.1, %p ]
+  %u.3.ph = phi i32 [ %u.0, %n ], [ 1, %p ]
+  %conv10 = zext i32 %spec.select.sink to i64
+  %call11 = tail call i32 (ptr, ...) @printf(ptr null, i64 %conv10)
+  br label %o
+
+o:
+  %j.4 = phi i32 [ 0, %entry.o_crit_edge ], [ %spec.select.sink, %o.sink.split ]
+  %k.3 = phi i32 [ 0, %entry.o_crit_edge ], [ %k.3.ph, %o.sink.split ]
+  %m.3 = phi i32 [ 0, %entry.o_crit_edge ], [ %m.3.ph, %o.sink.split ]
+  %u.3 = phi i32 [ 0, %entry.o_crit_edge ], [ %u.3.ph, %o.sink.split ]
+  %sub = sub i32 0, %j.6
+  %div = sdiv i32 %sub, %i.3.ph
+  %sub13 = sub i32 0, %div
+  br i1 %tobool14.not, label %if.end18, label %q
+
+q:
+  %s.2 = phi i32 [ %s.1, %p ], [ 0, %o ]
+  %t.2 = phi i32 [ %t.1, %p ], [ %k.3, %o ]
+  %u.4 = phi i32 [ %u.12, %p ], [ 0, %o ]
+  %v.44 = phi i8 [ %v.1, %p ], [ 0, %o ]
+  %conv17 = sext i8 %v.44 to i32
+  %rem = mul i32 %u.4, %conv17
+  br label %if.end18
+
+if.end18:
+  %j.65 = phi i32 [ 0, %q ], [ %j.4, %o ]
+  %m.46 = phi i32 [ %rem, %q ], [ %m.3, %o ]
+  %l.4 = phi i8 [ 0, %q ], [ %l.3.ph, %o ]
+  %r.3 = phi i32 [ 0, %q ], [ %k.3, %o ]
+  %s.3 = phi i32 [ %s.2, %q ], [ %sub, %o ]
+  %t.3 = phi i32 [ %t.2, %q ], [ %sub13, %o ]
+  %u.5 = phi i32 [ 0, %q ], [ %u.3, %o ]
+  br i1 %tobool14.not, label %n, label %p
+
+}
+declare i32 @printf(ptr, ...)


        


More information about the llvm-commits mailing list