[llvm] [SLP][REVEC] Fix ShuffleVector does not consider alternate instruction. (PR #134599)

Han-Kuan Chen via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 7 01:52:21 PDT 2025


https://github.com/HanKuanChen created https://github.com/llvm/llvm-project/pull/134599

None

>From 5dd43e7ba07297f9bbd3b3b2a28d90214d3ee075 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Mon, 7 Apr 2025 00:13:59 -0700
Subject: [PATCH 1/3] add another check

---
 .../SLPVectorizer/revec-shufflevector.ll      | 140 +++++++++++++++---
 1 file changed, 117 insertions(+), 23 deletions(-)

diff --git a/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll b/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll
index f11a0a9c024a2..ae97df8802b8e 100644
--- a/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll
@@ -1,14 +1,28 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes=slp-vectorizer,instcombine -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck %s
+; RUN: opt -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck %s
+; RUN: opt -passes=slp-vectorizer,instcombine -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck %s --check-prefix COMBINE
 
 define void @test1(ptr %in, ptr %out) {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 1
-; CHECK-NEXT:    [[TMP1:%.*]] = zext <8 x i32> [[TMP0]] to <8 x i64>
-; CHECK-NEXT:    store <8 x i64> [[TMP1]], ptr [[OUT:%.*]], align 8
+; CHECK-NEXT:    [[OUT:%.*]] = getelementptr inbounds i64, ptr [[OUT1:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> [[TMP0]], i64 0)
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP5:%.*]] = zext <8 x i32> [[TMP4]] to <8 x i64>
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i64> [[TMP5]], <8 x i64> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> [[TMP5]], <8 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    store <8 x i64> [[TMP1]], ptr [[OUT]], align 8
 ; CHECK-NEXT:    ret void
 ;
+; COMBINE-LABEL: @test1(
+; COMBINE-NEXT:  entry:
+; COMBINE-NEXT:    [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 1
+; COMBINE-NEXT:    [[TMP1:%.*]] = zext <8 x i32> [[TMP0]] to <8 x i64>
+; COMBINE-NEXT:    store <8 x i64> [[TMP1]], ptr [[OUT:%.*]], align 8
+; COMBINE-NEXT:    ret void
+;
 entry:
   %0 = load <8 x i32>, ptr %in, align 1
   %1 = shufflevector <8 x i32> %0, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -34,11 +48,24 @@ define void @test2(ptr %in, ptr %out) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 1
-; CHECK-NEXT:    [[TMP1:%.*]] = zext <8 x i32> [[TMP0]] to <8 x i64>
+; CHECK-NEXT:    [[OUT:%.*]] = getelementptr inbounds i64, ptr [[OUT1:%.*]], i64 0
+; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> [[TMP0]], i64 0)
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <8 x i32> [[TMP4]] to <8 x i64>
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    store <8 x i64> [[TMP2]], ptr [[OUT:%.*]], align 8
+; CHECK-NEXT:    store <8 x i64> [[TMP2]], ptr [[OUT]], align 8
 ; CHECK-NEXT:    ret void
 ;
+; COMBINE-LABEL: @test2(
+; COMBINE-NEXT:  entry:
+; COMBINE-NEXT:    [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 1
+; COMBINE-NEXT:    [[TMP1:%.*]] = zext <8 x i32> [[TMP0]] to <8 x i64>
+; COMBINE-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7>
+; COMBINE-NEXT:    store <8 x i64> [[TMP2]], ptr [[OUT:%.*]], align 8
+; COMBINE-NEXT:    ret void
+;
 entry:
   %0 = load <8 x i32>, ptr %in, align 1
   %1 = shufflevector <8 x i32> %0, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -63,10 +90,19 @@ entry:
 define void @test3(<16 x i32> %0, ptr %out) {
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0:%.*]], <16 x i32> poison, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    store <16 x i32> [[TMP1]], ptr [[OUT:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = call <64 x i32> @llvm.vector.insert.v64i32.v16i32(<64 x i32> poison, <16 x i32> [[TMP0:%.*]], i64 0)
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <64 x i32> [[TMP3]], <64 x i32> poison, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i32> [[TMP3]], <64 x i32> poison, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[OUT:%.*]] = getelementptr inbounds i32, ptr [[OUT1:%.*]], i64 0
+; CHECK-NEXT:    store <16 x i32> [[TMP1]], ptr [[OUT]], align 4
 ; CHECK-NEXT:    ret void
 ;
+; COMBINE-LABEL: @test3(
+; COMBINE-NEXT:  entry:
+; COMBINE-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0:%.*]], <16 x i32> poison, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+; COMBINE-NEXT:    store <16 x i32> [[TMP1]], ptr [[OUT:%.*]], align 4
+; COMBINE-NEXT:    ret void
+;
 entry:
   %1 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
   %2 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
@@ -87,10 +123,21 @@ define void @test4(ptr %in, ptr %out) {
 ; CHECK-LABEL: @test4(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    store <16 x i32> [[TMP1]], ptr [[OUT:%.*]], align 4
+; CHECK-NEXT:    [[OUT:%.*]] = getelementptr inbounds i32, ptr [[OUT1:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> [[TMP0]], i64 0)
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    store <16 x i32> [[TMP1]], ptr [[OUT]], align 4
 ; CHECK-NEXT:    ret void
 ;
+; COMBINE-LABEL: @test4(
+; COMBINE-NEXT:  entry:
+; COMBINE-NEXT:    [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 4
+; COMBINE-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; COMBINE-NEXT:    store <16 x i32> [[TMP1]], ptr [[OUT:%.*]], align 4
+; COMBINE-NEXT:    ret void
+;
 entry:
   %0 = load <8 x i32>, ptr %in, align 4
   %1 = shufflevector <8 x i32> %0, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -109,9 +156,18 @@ entry:
 define void @test5(ptr %out) {
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store <8 x i32> zeroinitializer, ptr [[OUT:%.*]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> zeroinitializer, i64 0)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> [[TMP0]], <8 x i32> zeroinitializer, i64 8)
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 0
+; CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[TMP3]], align 4
 ; CHECK-NEXT:    ret void
 ;
+; COMBINE-LABEL: @test5(
+; COMBINE-NEXT:  entry:
+; COMBINE-NEXT:    store <8 x i32> zeroinitializer, ptr [[OUT:%.*]], align 4
+; COMBINE-NEXT:    ret void
+;
 entry:
   %0 = shufflevector <8 x i32> zeroinitializer, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %1 = shufflevector <8 x i32> zeroinitializer, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -125,33 +181,71 @@ entry:
 define void @test6(ptr %in0, ptr %in1, ptr %in2) {
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds nuw i8, ptr [[IN0:%.*]], i64 32
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[IN0:%.*]], i64 32
 ; CHECK-NEXT:    [[LOAD2:%.*]] = load <4 x float>, ptr [[GEP1]], align 16
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x float>, ptr [[IN0]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr [[IN1:%.*]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = uitofp <32 x i8> [[TMP1]] to <32 x float>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> poison, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    [[TMP9:%.*]] = zext <32 x i8> [[TMP8]] to <32 x i16>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <32 x i16> [[TMP9]], <32 x i16> poison, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <32 x i16> [[TMP9]], <32 x i16> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    [[TMP2:%.*]] = uitofp <32 x i16> [[TMP10]] to <32 x float>
 ; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <4 x float> [[LOAD2]], <4 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <16 x float> [[TMP14]], <16 x float> [[TMP15]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP16:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP14]], <4 x float> [[LOAD2]], i64 8)
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <16 x float> [[TMP16]], <16 x float> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <32 x float> [[TMP3]], [[TMP2]]
-; CHECK-NEXT:    [[GEP10:%.*]] = getelementptr inbounds nuw i8, ptr [[IN1]], i64 32
-; CHECK-NEXT:    [[GEP11:%.*]] = getelementptr inbounds nuw i8, ptr [[IN2:%.*]], i64 128
+; CHECK-NEXT:    [[GEP10:%.*]] = getelementptr inbounds i8, ptr [[IN1]], i64 32
+; CHECK-NEXT:    [[GEP11:%.*]] = getelementptr inbounds i8, ptr [[IN2:%.*]], i64 128
 ; CHECK-NEXT:    [[TMP17:%.*]] = load <8 x float>, ptr [[IN0]], align 16
 ; CHECK-NEXT:    store <32 x float> [[TMP4]], ptr [[IN2]], align 16
 ; CHECK-NEXT:    [[LOAD5:%.*]] = load <16 x i8>, ptr [[GEP10]], align 1
-; CHECK-NEXT:    [[TMP6:%.*]] = uitofp <16 x i8> [[LOAD5]] to <16 x float>
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x float> [[LOAD2]], <4 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <8 x float> [[TMP17]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> [[TMP8]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <8 x float> [[TMP17]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <16 x float> [[TMP9]], <16 x float> [[TMP10]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP19:%.*]] = call <32 x i8> @llvm.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> [[LOAD5]], i64 0)
+; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <32 x i8> [[TMP19]], <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <32 x i8> [[TMP19]], <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP25:%.*]] = zext <16 x i8> [[TMP15]] to <16 x i16>
+; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <16 x i16> [[TMP25]], <16 x i16> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <16 x i16> [[TMP25]], <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP6:%.*]] = uitofp <16 x i16> [[TMP18]] to <16 x float>
+; CHECK-NEXT:    [[TMP20:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> poison, <4 x float> [[LOAD2]], i64 0)
+; CHECK-NEXT:    [[TMP21:%.*]] = call <4 x float> @llvm.vector.extract.v4f32.v8f32(<8 x float> [[TMP17]], i64 0)
+; CHECK-NEXT:    [[TMP22:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP20]], <4 x float> [[TMP21]], i64 4)
+; CHECK-NEXT:    [[TMP23:%.*]] = call <4 x float> @llvm.vector.extract.v4f32.v8f32(<8 x float> [[TMP17]], i64 4)
+; CHECK-NEXT:    [[TMP11:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP22]], <4 x float> [[TMP23]], i64 8)
 ; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP13:%.*]] = fmul <16 x float> [[TMP12]], [[TMP6]]
 ; CHECK-NEXT:    store <16 x float> [[TMP13]], ptr [[GEP11]], align 16
 ; CHECK-NEXT:    ret void
 ;
+; COMBINE-LABEL: @test6(
+; COMBINE-NEXT:  entry:
+; COMBINE-NEXT:    [[GEP1:%.*]] = getelementptr inbounds nuw i8, ptr [[IN0:%.*]], i64 32
+; COMBINE-NEXT:    [[LOAD2:%.*]] = load <4 x float>, ptr [[GEP1]], align 16
+; COMBINE-NEXT:    [[TMP0:%.*]] = load <8 x float>, ptr [[IN0]], align 16
+; COMBINE-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr [[IN1:%.*]], align 1
+; COMBINE-NEXT:    [[TMP2:%.*]] = uitofp <32 x i8> [[TMP1]] to <32 x float>
+; COMBINE-NEXT:    [[TMP3:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; COMBINE-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[LOAD2]], <4 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; COMBINE-NEXT:    [[TMP5:%.*]] = shufflevector <16 x float> [[TMP3]], <16 x float> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
+; COMBINE-NEXT:    [[TMP6:%.*]] = shufflevector <16 x float> [[TMP5]], <16 x float> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; COMBINE-NEXT:    [[TMP7:%.*]] = fmul <32 x float> [[TMP6]], [[TMP2]]
+; COMBINE-NEXT:    [[GEP10:%.*]] = getelementptr inbounds nuw i8, ptr [[IN1]], i64 32
+; COMBINE-NEXT:    [[GEP11:%.*]] = getelementptr inbounds nuw i8, ptr [[IN2:%.*]], i64 128
+; COMBINE-NEXT:    [[TMP8:%.*]] = load <8 x float>, ptr [[IN0]], align 16
+; COMBINE-NEXT:    store <32 x float> [[TMP7]], ptr [[IN2]], align 16
+; COMBINE-NEXT:    [[LOAD5:%.*]] = load <16 x i8>, ptr [[GEP10]], align 1
+; COMBINE-NEXT:    [[TMP9:%.*]] = uitofp <16 x i8> [[LOAD5]] to <16 x float>
+; COMBINE-NEXT:    [[TMP10:%.*]] = shufflevector <4 x float> [[LOAD2]], <4 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; COMBINE-NEXT:    [[TMP11:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; COMBINE-NEXT:    [[TMP12:%.*]] = shufflevector <16 x float> [[TMP10]], <16 x float> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; COMBINE-NEXT:    [[TMP13:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; COMBINE-NEXT:    [[TMP14:%.*]] = shufflevector <4 x float> [[TMP13]], <4 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; COMBINE-NEXT:    [[TMP15:%.*]] = shufflevector <16 x float> [[TMP12]], <16 x float> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
+; COMBINE-NEXT:    [[TMP16:%.*]] = shufflevector <16 x float> [[TMP15]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>
+; COMBINE-NEXT:    [[TMP17:%.*]] = fmul <16 x float> [[TMP16]], [[TMP9]]
+; COMBINE-NEXT:    store <16 x float> [[TMP17]], ptr [[GEP11]], align 16
+; COMBINE-NEXT:    ret void
+;
 entry:
   %gep0 = getelementptr inbounds i8, ptr %in0, i64 16
   %gep1 = getelementptr inbounds i8, ptr %in0, i64 32

>From f5042f16224a06d033b237a94ae5c510e85cfa90 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Wed, 2 Apr 2025 06:31:32 -0700
Subject: [PATCH 2/3] [SLP][REVEC] Pre-commit test.

---
 .../SLPVectorizer/revec-shufflevector.ll      | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll b/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll
index ae97df8802b8e..a10e529e58842 100644
--- a/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll
@@ -330,3 +330,29 @@ entry:
   store <4 x float> %fmul11, ptr %gep14, align 16
   ret void
 }
+
+define i32 @test7() {
+entry:
+  %0 = getelementptr i8, ptr null, i64 16
+  %1 = getelementptr i8, ptr null, i64 32
+  %2 = getelementptr i8, ptr null, i64 48
+  %3 = fadd <8 x float> zeroinitializer, zeroinitializer
+  %4 = fsub <8 x float> zeroinitializer, zeroinitializer
+  %5 = shufflevector <8 x float> %3, <8 x float> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %6 = shufflevector <8 x float> %3, <8 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %7 = shufflevector <8 x float> %4, <8 x float> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %8 = shufflevector <8 x float> %4, <8 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %9 = fsub <4 x float> zeroinitializer, %5
+  %10 = fsub <4 x float> zeroinitializer, %6
+  %11 = fadd <4 x float> zeroinitializer, %7
+  %12 = fadd <4 x float> zeroinitializer, %8
+  %13 = fadd <4 x float> zeroinitializer, %9
+  %14 = fadd <4 x float> zeroinitializer, %10
+  %15 = fadd <4 x float> zeroinitializer, %11
+  %16 = fadd <4 x float> zeroinitializer, %12
+  store <4 x float> %13, ptr %2, align 16
+  store <4 x float> %14, ptr %1, align 16
+  store <4 x float> %15, ptr %0, align 16
+  store <4 x float> %16, ptr null, align 16
+  ret i32 0
+}

>From 0f38d513216ae8b391af102275576cdf5b41c117 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Mon, 7 Apr 2025 00:09:55 -0700
Subject: [PATCH 3/3] [SLP][REVEC] Fix ShuffleVector does not consider
 alternate instruction.

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    |  5 ++--
 .../SLPVectorizer/revec-shufflevector.ll      | 24 +++++++++++++++++++
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 31c684e16f051..6bcb75bfe6e70 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -17741,13 +17741,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
         Value *Src = vectorizeOperand(E, 0);
         SmallVector<int> ThisMask(calculateShufflevectorMask(E->Scalars));
         if (auto *SVSrc = dyn_cast<ShuffleVectorInst>(Src)) {
-          assert(isa<PoisonValue>(SVSrc->getOperand(1)) &&
-                 "Not supported shufflevector usage.");
           SmallVector<int> NewMask(ThisMask.size());
           transform(ThisMask, NewMask.begin(), [&SVSrc](int Mask) {
             return SVSrc->getShuffleMask()[Mask];
           });
-          V = Builder.CreateShuffleVector(SVSrc->getOperand(0), NewMask);
+          V = Builder.CreateShuffleVector(SVSrc->getOperand(0),
+                                          SVSrc->getOperand(1), NewMask);
         } else {
           V = Builder.CreateShuffleVector(Src, ThisMask);
         }
diff --git a/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll b/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll
index a10e529e58842..b85c78ec8d2d0 100644
--- a/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll
@@ -332,6 +332,30 @@ entry:
 }
 
 define i32 @test7() {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> zeroinitializer, i64 0)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP0]], <8 x float> zeroinitializer, i64 8)
+; CHECK-NEXT:    [[TMP2:%.*]] = fsub <16 x float> [[TMP1]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <16 x float> [[TMP1]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> [[TMP3]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> poison, <4 x float> zeroinitializer, i64 0)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP6]], <4 x float> zeroinitializer, i64 4)
+; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP7]], <4 x float> zeroinitializer, i64 8)
+; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP8]], <4 x float> zeroinitializer, i64 12)
+; CHECK-NEXT:    [[TMP10:%.*]] = fadd <16 x float> [[TMP9]], [[TMP5]]
+; CHECK-NEXT:    [[TMP11:%.*]] = fsub <16 x float> [[TMP9]], [[TMP5]]
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <16 x float> [[TMP10]], <16 x float> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    [[TMP13:%.*]] = fadd <16 x float> [[TMP9]], [[TMP12]]
+; CHECK-NEXT:    store <16 x float> [[TMP13]], ptr null, align 16
+; CHECK-NEXT:    ret i32 0
+;
+; COMBINE-LABEL: @test7(
+; COMBINE-NEXT:  entry:
+; COMBINE-NEXT:    store <16 x float> poison, ptr null, align 16
+; COMBINE-NEXT:    ret i32 0
+;
 entry:
   %0 = getelementptr i8, ptr null, i64 16
   %1 = getelementptr i8, ptr null, i64 32



More information about the llvm-commits mailing list