[llvm] [SLP] Sort PHIs by ExtractElements when relevant (PR #131229)

via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 13 15:10:04 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Jeffrey Byrnes (jrbyrnes)

<details>
<summary>Changes</summary>

Considering the PHIs in order of element extracted can lead to better shuffles

---
Full diff: https://github.com/llvm/llvm-project/pull/131229.diff


4 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+34-1) 
- (added) llvm/test/Transforms/SLPVectorizer/AMDGPU/extract-ordering.ll (+142) 
- (modified) llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll (+7-7) 
- (modified) llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll (+4-4) 


``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a9f61d7a9798a..46677db052897 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -22687,8 +22687,41 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
           if (NodeI1 != NodeI2)
             return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
           InstructionsState S = getSameOpcode({I1, I2}, *TLI);
-          if (S && !S.isAltShuffle())
+          if (S && !S.isAltShuffle()) {
+            if (!isa<ExtractElementInst>(I1) || !isa<ExtractElementInst>(I2))
+              continue;
+
+            auto E1 = cast<ExtractElementInst>(I1);
+            auto E2 = cast<ExtractElementInst>(I2);
+            // Sort on ExtractElementInsts primarily by vector operands. Prefer
+            // program order of the vector operands
+            if (E1->getVectorOperand() != E2->getVectorOperand()) {
+              Instruction *V1 = dyn_cast<Instruction>(E1->getVectorOperand());
+              Instruction *V2 = dyn_cast<Instruction>(E2->getVectorOperand());
+              if (!V1 || !V2)
+                continue;
+              if (V1->getParent() != V2->getParent())
+                continue;
+              return V1->comesBefore(V2);
+            }
+            // If we have the same vector operand, try to sort by constant index
+            auto Id1 = E1->getIndexOperand();
+            auto Id2 = E2->getIndexOperand();
+            // Bring constants to the top
+            if (isa<ConstantInt>(Id1) && !isa<ConstantInt>(Id2))
+              return true;
+            if (!isa<ConstantInt>(Id1) && isa<ConstantInt>(Id2))
+              return false;
+            if (isa<ConstantInt>(Id1) && isa<ConstantInt>(Id2)) {
+              auto C1 = cast<ConstantInt>(Id1);
+              auto C2 = cast<ConstantInt>(Id2);
+              // First elements first
+              return C1->getValue().getZExtValue() <
+                     C2->getValue().getZExtValue();
+            }
+
             continue;
+          }
           return I1->getOpcode() < I2->getOpcode();
         }
         if (I1)
diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/extract-ordering.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/extract-ordering.ll
new file mode 100644
index 0000000000000..c585a7f08ad0c
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/extract-ordering.ll
@@ -0,0 +1,142 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=slp-vectorizer -S  | FileCheck %s --check-prefix=GFX9
+
+define protected amdgpu_kernel void @myfun(i32 %in, ptr addrspace(1) %aptr1, ptr addrspace(1) %bptr1, ptr addrspace(1) %aptr2, ptr addrspace(1) %bptr2)  {
+; GFX9-LABEL: define protected amdgpu_kernel void @myfun(
+; GFX9-SAME: i32 [[IN:%.*]], ptr addrspace(1) [[APTR1:%.*]], ptr addrspace(1) [[BPTR1:%.*]], ptr addrspace(1) [[APTR2:%.*]], ptr addrspace(1) [[BPTR2:%.*]]) #[[ATTR0:[0-9]+]] {
+; GFX9-NEXT:  [[ENTRY:.*]]:
+; GFX9-NEXT:    [[VEC1:%.*]] = load <8 x i16>, ptr addrspace(1) [[APTR1]], align 16
+; GFX9-NEXT:    [[BVEC1:%.*]] = load <8 x i16>, ptr addrspace(1) [[BPTR1]], align 16
+; GFX9-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> <i32 0, i32 1>
+; GFX9-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> <i32 2, i32 3>
+; GFX9-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> <i32 4, i32 5>
+; GFX9-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> <i32 6, i32 7>
+; GFX9-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> <i32 0, i32 1>
+; GFX9-NEXT:    [[TMP5:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> <i32 2, i32 3>
+; GFX9-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> <i32 4, i32 5>
+; GFX9-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> <i32 6, i32 7>
+; GFX9-NEXT:    br label %[[DO_BODY:.*]]
+; GFX9:       [[DO_BODY]]:
+; GFX9-NEXT:    [[ADD:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEWADD:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[TMP8:%.*]] = phi <2 x i16> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP30:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[TMP9:%.*]] = phi <2 x i16> [ [[TMP1]], %[[ENTRY]] ], [ [[TMP31:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[TMP10:%.*]] = phi <2 x i16> [ [[TMP2]], %[[ENTRY]] ], [ [[TMP32:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[TMP11:%.*]] = phi <2 x i16> [ [[TMP3]], %[[ENTRY]] ], [ [[TMP33:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[TMP12:%.*]] = phi <2 x i16> [ [[TMP4]], %[[ENTRY]] ], [ [[TMP34:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[TMP13:%.*]] = phi <2 x i16> [ [[TMP5]], %[[ENTRY]] ], [ [[TMP35:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[TMP14:%.*]] = phi <2 x i16> [ [[TMP6]], %[[ENTRY]] ], [ [[TMP36:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[TMP15:%.*]] = phi <2 x i16> [ [[TMP7]], %[[ENTRY]] ], [ [[TMP37:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT:    [[TMP16:%.*]] = shufflevector <2 x i16> [[TMP8]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP17:%.*]] = shufflevector <2 x i16> [[TMP9]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP18:%.*]] = shufflevector <2 x i16> [[TMP8]], <2 x i16> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP19:%.*]] = shufflevector <2 x i16> [[TMP10]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP20:%.*]] = shufflevector <8 x i16> [[TMP18]], <8 x i16> [[TMP19]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP21:%.*]] = shufflevector <2 x i16> [[TMP11]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP22:%.*]] = shufflevector <8 x i16> [[TMP20]], <8 x i16> [[TMP21]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; GFX9-NEXT:    [[TMP23:%.*]] = shufflevector <2 x i16> [[TMP12]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP24:%.*]] = shufflevector <2 x i16> [[TMP13]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP25:%.*]] = shufflevector <2 x i16> [[TMP12]], <2 x i16> [[TMP13]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP26:%.*]] = shufflevector <2 x i16> [[TMP14]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP27:%.*]] = shufflevector <8 x i16> [[TMP25]], <8 x i16> [[TMP26]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP28:%.*]] = shufflevector <2 x i16> [[TMP15]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; GFX9-NEXT:    [[TMP29:%.*]] = shufflevector <8 x i16> [[TMP27]], <8 x i16> [[TMP28]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; GFX9-NEXT:    [[RES:%.*]] = add <8 x i16> [[TMP22]], [[TMP29]]
+; GFX9-NEXT:    [[VEC2:%.*]] = load <8 x i16>, ptr addrspace(1) [[APTR2]], align 16
+; GFX9-NEXT:    [[BVEC2:%.*]] = load <8 x i16>, ptr addrspace(1) [[BPTR2]], align 16
+; GFX9-NEXT:    [[NEWADD]] = add i32 [[ADD]], 1
+; GFX9-NEXT:    [[COND:%.*]] = icmp sgt i32 [[NEWADD]], [[IN]]
+; GFX9-NEXT:    [[TMP30]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> <i32 0, i32 1>
+; GFX9-NEXT:    [[TMP31]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> <i32 2, i32 3>
+; GFX9-NEXT:    [[TMP32]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> <i32 4, i32 5>
+; GFX9-NEXT:    [[TMP33]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> <i32 6, i32 7>
+; GFX9-NEXT:    [[TMP34]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> <i32 0, i32 1>
+; GFX9-NEXT:    [[TMP35]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> <i32 2, i32 3>
+; GFX9-NEXT:    [[TMP36]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> <i32 4, i32 5>
+; GFX9-NEXT:    [[TMP37]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> <i32 6, i32 7>
+; GFX9-NEXT:    br i1 [[COND]], label %[[DO_BODY]], label %[[END:.*]]
+; GFX9:       [[END]]:
+; GFX9-NEXT:    ret void
+;
+entry:
+  %vec1 = load <8 x i16>, ptr addrspace(1) %aptr1
+  %el0 = extractelement <8 x i16> %vec1, i64 0
+  %el1 = extractelement <8 x i16> %vec1, i64 1
+  %el2 = extractelement <8 x i16> %vec1, i64 2
+  %el3 = extractelement <8 x i16> %vec1, i64 3
+  %el4 = extractelement <8 x i16> %vec1, i64 4
+  %el5 = extractelement <8 x i16> %vec1, i64 5
+  %el6 = extractelement <8 x i16> %vec1, i64 6
+  %el7 = extractelement <8 x i16> %vec1, i64 7
+  %bvec1 = load <8 x i16>, ptr addrspace(1) %bptr1
+  %bel0 = extractelement <8 x i16> %bvec1, i64 0
+  %bel1 = extractelement <8 x i16> %bvec1, i64 1
+  %bel2 = extractelement <8 x i16> %bvec1, i64 2
+  %bel3 = extractelement <8 x i16> %bvec1, i64 3
+  %bel4 = extractelement <8 x i16> %bvec1, i64 4
+  %bel5 = extractelement <8 x i16> %bvec1, i64 5
+  %bel6 = extractelement <8 x i16> %bvec1, i64 6
+  %bel7 = extractelement <8 x i16> %bvec1, i64 7
+  br label %do.body
+
+do.body:
+  %a_thread_buf1 = phi i16 [%el1, %entry], [%newel1, %do.body]
+  %a_thread_buf2 = phi i16 [%el2, %entry], [%newel2, %do.body]
+  %a_thread_buf3 = phi i16 [%el3, %entry], [%newel3, %do.body]
+  %a_thread_buf4 = phi i16 [%el4, %entry], [%newel4, %do.body]
+  %a_thread_buf5 = phi i16 [%el5, %entry], [%newel5, %do.body]
+  %a_thread_buf6 = phi i16 [%el6, %entry], [%newel6, %do.body]
+  %a_thread_buf7 = phi i16 [%el7, %entry], [%newel7, %do.body]
+  %b_thread_buf1 = phi i16 [%bel1, %entry], [%bnewel1, %do.body]
+  %b_thread_buf2 = phi i16 [%bel2, %entry], [%bnewel2, %do.body]
+  %b_thread_buf3 = phi i16 [%bel3, %entry], [%bnewel3, %do.body]
+  %b_thread_buf4 = phi i16 [%bel4, %entry], [%bnewel4, %do.body]
+  %b_thread_buf5 = phi i16 [%bel5, %entry], [%bnewel5, %do.body]
+  %b_thread_buf6 = phi i16 [%bel6, %entry], [%bnewel6, %do.body]
+  %b_thread_buf7 = phi i16 [%bel7, %entry], [%bnewel7, %do.body]
+  %add = phi i32 [0, %entry], [%newadd, %do.body]
+  %a_thread_buf0 = phi i16 [%el0, %entry], [%newel0, %do.body]
+  %b_thread_buf0 = phi i16 [%bel0, %entry], [%bnewel0, %do.body]
+  %a_thread_vec0 = insertelement <8 x i16> poison, i16 %a_thread_buf0, i64 0
+  %a_thread_vec1 = insertelement <8 x i16> %a_thread_vec0, i16 %a_thread_buf1, i64 1
+  %a_thread_vec2 = insertelement <8 x i16> %a_thread_vec1, i16 %a_thread_buf2, i64 2
+  %a_thread_vec3 = insertelement <8 x i16> %a_thread_vec2, i16 %a_thread_buf3, i64 3
+  %a_thread_vec4 = insertelement <8 x i16> %a_thread_vec3, i16 %a_thread_buf4, i64 4
+  %a_thread_vec5 = insertelement <8 x i16> %a_thread_vec4, i16 %a_thread_buf5, i64 5
+  %a_thread_vec6 = insertelement <8 x i16> %a_thread_vec5, i16 %a_thread_buf6, i64 6
+  %a_thread_vec7 = insertelement <8 x i16> %a_thread_vec6, i16 %a_thread_buf7, i64 7
+  %b_thread_vec0 = insertelement <8 x i16> poison, i16 %b_thread_buf0, i64 0
+  %b_thread_vec1 = insertelement <8 x i16> %b_thread_vec0, i16 %b_thread_buf1, i64 1
+  %b_thread_vec2 = insertelement <8 x i16> %b_thread_vec1, i16 %b_thread_buf2, i64 2
+  %b_thread_vec3 = insertelement <8 x i16> %b_thread_vec2, i16 %b_thread_buf3, i64 3
+  %b_thread_vec4 = insertelement <8 x i16> %b_thread_vec3, i16 %b_thread_buf4, i64 4
+  %b_thread_vec5 = insertelement <8 x i16> %b_thread_vec4, i16 %b_thread_buf5, i64 5
+  %b_thread_vec6 = insertelement <8 x i16> %b_thread_vec5, i16 %b_thread_buf6, i64 6
+  %b_thread_vec7 = insertelement <8 x i16> %b_thread_vec6, i16 %b_thread_buf7, i64 7
+  %res = add <8 x i16> %a_thread_vec7, %b_thread_vec7
+  %vec2 = load <8 x i16>, ptr addrspace(1) %aptr2
+  %newel0 = extractelement <8 x i16> %vec2, i64 0
+  %newel1 = extractelement <8 x i16> %vec2, i64 1
+  %newel2 = extractelement <8 x i16> %vec2, i64 2
+  %newel3 = extractelement <8 x i16> %vec2, i64 3
+  %newel4 = extractelement <8 x i16> %vec2, i64 4
+  %newel5 = extractelement <8 x i16> %vec2, i64 5
+  %newel6 = extractelement <8 x i16> %vec2, i64 6
+  %newel7 = extractelement <8 x i16> %vec2, i64 7
+  %bvec2 = load <8 x i16>, ptr addrspace(1) %bptr2
+  %bnewel0 = extractelement <8 x i16> %bvec2, i64 0
+  %bnewel1 = extractelement <8 x i16> %bvec2, i64 1
+  %bnewel2 = extractelement <8 x i16> %bvec2, i64 2
+  %bnewel3 = extractelement <8 x i16> %bvec2, i64 3
+  %bnewel4 = extractelement <8 x i16> %bvec2, i64 4
+  %bnewel5 = extractelement <8 x i16> %bvec2, i64 5
+  %bnewel6 = extractelement <8 x i16> %bvec2, i64 6
+  %bnewel7 = extractelement <8 x i16> %bvec2, i64 7
+  %newadd = add i32 %add, 1
+  %cond = icmp sgt i32 %newadd, %in
+  br i1 %cond, label %do.body, label %end
+
+end:
+  ret void
+}
+
+
diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll
index 3b63c1e35610f..a3a4ab948519f 100644
--- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll
@@ -49,19 +49,19 @@ bb1:
 define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2)  {
 ; CHECK-LABEL: @phis_reverse(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[BB0:%.*]]
 ; CHECK:       bb0:
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    br label [[BB1]]
 ; CHECK:       bb1:
-; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB0]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY]] ], [ [[TMP3]], [[BB0]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP3]], [[BB0]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x half> [ [[TMP2]], [[ENTRY]] ], [ [[TMP9]], [[BB0]] ]
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    ret <4 x half> [[TMP8]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll
index dd7a21198ac1f..651f565412830 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll
@@ -141,7 +141,7 @@ define ptr @test4() {
 ; POWEROF2-NEXT:    [[TMP1:%.*]] = fadd <8 x float> zeroinitializer, zeroinitializer
 ; POWEROF2-NEXT:    [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 1, i32 2>
 ; POWEROF2-NEXT:    [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 5, i32 6>
-; POWEROF2-NEXT:    [[TMP4:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 4, i32 0>
+; POWEROF2-NEXT:    [[TMP4:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 0, i32 4>
 ; POWEROF2-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP2]], i64 0)
 ; POWEROF2-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP5]], <2 x float> [[TMP3]], i64 2)
 ; POWEROF2-NEXT:    br label [[TMP8:%.*]]
@@ -156,10 +156,10 @@ define ptr @test4() {
 ; POWEROF2-NEXT:    [[TMP13:%.*]] = fmul <2 x float> [[TMP12]], zeroinitializer
 ; POWEROF2-NEXT:    [[TMP14:%.*]] = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> [[TMP10]], i64 2)
 ; POWEROF2-NEXT:    [[TMP15:%.*]] = fmul <2 x float> zeroinitializer, [[TMP14]]
-; POWEROF2-NEXT:    [[TMP16:%.*]] = extractelement <2 x float> [[TMP9]], i32 1
-; POWEROF2-NEXT:    [[TMP17:%.*]] = fmul float 0.000000e+00, [[TMP16]]
 ; POWEROF2-NEXT:    [[TMP18:%.*]] = extractelement <2 x float> [[TMP9]], i32 0
-; POWEROF2-NEXT:    [[TMP19:%.*]] = fmul float [[TMP18]], 0.000000e+00
+; POWEROF2-NEXT:    [[TMP17:%.*]] = fmul float 0.000000e+00, [[TMP18]]
+; POWEROF2-NEXT:    [[TMP30:%.*]] = extractelement <2 x float> [[TMP9]], i32 1
+; POWEROF2-NEXT:    [[TMP19:%.*]] = fmul float [[TMP30]], 0.000000e+00
 ; POWEROF2-NEXT:    [[TMP20:%.*]] = extractelement <2 x float> [[TMP13]], i32 0
 ; POWEROF2-NEXT:    [[TMP21:%.*]] = fadd reassoc nsz float [[TMP20]], [[TMP17]]
 ; POWEROF2-NEXT:    [[TMP22:%.*]] = extractelement <2 x float> [[TMP15]], i32 0

``````````

</details>


https://github.com/llvm/llvm-project/pull/131229


More information about the llvm-commits mailing list