[llvm] [SLP] Sort PHIs by ExtractElements when relevant (PR #131229)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 17 10:57:26 PDT 2025
https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/131229
>From b450a7c2fb6284d572c43fee3e99a8d81c3d5899 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 13 Mar 2025 10:16:19 -0700
Subject: [PATCH 1/4] [SLP] Sort PHIs by ExtractElements when relevant
Change-Id: I62668ebc355c7746ec5ef69249573fe87e1343cc
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 35 ++++++++++++++-
.../SLPVectorizer/AMDGPU/extract-ordering.ll | 44 ++++++++-----------
.../AMDGPU/phi-result-use-order.ll | 14 +++---
.../Transforms/SLPVectorizer/RISCV/revec.ll | 8 ++--
4 files changed, 63 insertions(+), 38 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a2200f283168d..95a27b5a088bd 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -22690,8 +22690,41 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
if (NodeI1 != NodeI2)
return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
InstructionsState S = getSameOpcode({I1, I2}, *TLI);
- if (S && !S.isAltShuffle())
+ if (S && !S.isAltShuffle()) {
+ if (!isa<ExtractElementInst>(I1) || !isa<ExtractElementInst>(I2))
+ continue;
+
+ auto E1 = cast<ExtractElementInst>(I1);
+ auto E2 = cast<ExtractElementInst>(I2);
+ // Sort on ExtractElementInsts primarily by vector operands. Prefer
+ // program order of the vector operands
+ if (E1->getVectorOperand() != E2->getVectorOperand()) {
+ Instruction *V1 = dyn_cast<Instruction>(E1->getVectorOperand());
+ Instruction *V2 = dyn_cast<Instruction>(E2->getVectorOperand());
+ if (!V1 || !V2)
+ continue;
+ if (V1->getParent() != V2->getParent())
+ continue;
+ return V1->comesBefore(V2);
+ }
+ // If we have the same vector operand, try to sort by constant index
+ auto Id1 = E1->getIndexOperand();
+ auto Id2 = E2->getIndexOperand();
+ // Bring constants to the top
+ if (isa<ConstantInt>(Id1) && !isa<ConstantInt>(Id2))
+ return true;
+ if (!isa<ConstantInt>(Id1) && isa<ConstantInt>(Id2))
+ return false;
+ if (isa<ConstantInt>(Id1) && isa<ConstantInt>(Id2)) {
+ auto C1 = cast<ConstantInt>(Id1);
+ auto C2 = cast<ConstantInt>(Id2);
+ // First elements first
+ return C1->getValue().getZExtValue() <
+ C2->getValue().getZExtValue();
+ }
+
continue;
+ }
return I1->getOpcode() < I2->getOpcode();
}
if (I1)
diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/extract-ordering.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/extract-ordering.ll
index 67ef5ba3732f4..c585a7f08ad0c 100644
--- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/extract-ordering.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/extract-ordering.ll
@@ -6,59 +6,51 @@ define protected amdgpu_kernel void @myfun(i32 %in, ptr addrspace(1) %aptr1, ptr
; GFX9-SAME: i32 [[IN:%.*]], ptr addrspace(1) [[APTR1:%.*]], ptr addrspace(1) [[BPTR1:%.*]], ptr addrspace(1) [[APTR2:%.*]], ptr addrspace(1) [[BPTR2:%.*]]) #[[ATTR0:[0-9]+]] {
; GFX9-NEXT: [[ENTRY:.*]]:
; GFX9-NEXT: [[VEC1:%.*]] = load <8 x i16>, ptr addrspace(1) [[APTR1]], align 16
-; GFX9-NEXT: [[EL0:%.*]] = extractelement <8 x i16> [[VEC1]], i64 0
-; GFX9-NEXT: [[EL3:%.*]] = extractelement <8 x i16> [[VEC1]], i64 3
; GFX9-NEXT: [[BVEC1:%.*]] = load <8 x i16>, ptr addrspace(1) [[BPTR1]], align 16
-; GFX9-NEXT: [[BEL0:%.*]] = extractelement <8 x i16> [[BVEC1]], i64 0
-; GFX9-NEXT: [[BEL3:%.*]] = extractelement <8 x i16> [[BVEC1]], i64 3
-; GFX9-NEXT: [[TMP0:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> <i32 1, i32 2>
+; GFX9-NEXT: [[TMP0:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> <i32 0, i32 1>
+; GFX9-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> <i32 2, i32 3>
; GFX9-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> <i32 4, i32 5>
; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> <i32 6, i32 7>
-; GFX9-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> <i32 1, i32 2>
+; GFX9-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> <i32 0, i32 1>
+; GFX9-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> <i32 2, i32 3>
; GFX9-NEXT: [[TMP6:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> <i32 4, i32 5>
; GFX9-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> <i32 6, i32 7>
; GFX9-NEXT: br label %[[DO_BODY:.*]]
; GFX9: [[DO_BODY]]:
-; GFX9-NEXT: [[A_THREAD_BUF3:%.*]] = phi i16 [ [[EL3]], %[[ENTRY]] ], [ [[NEWEL3:%.*]], %[[DO_BODY]] ]
-; GFX9-NEXT: [[B_THREAD_BUF3:%.*]] = phi i16 [ [[BEL3]], %[[ENTRY]] ], [ [[BNEWEL3:%.*]], %[[DO_BODY]] ]
; GFX9-NEXT: [[ADD:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEWADD:%.*]], %[[DO_BODY]] ]
-; GFX9-NEXT: [[A_THREAD_BUF0:%.*]] = phi i16 [ [[EL0]], %[[ENTRY]] ], [ [[NEWEL0:%.*]], %[[DO_BODY]] ]
-; GFX9-NEXT: [[B_THREAD_BUF0:%.*]] = phi i16 [ [[BEL0]], %[[ENTRY]] ], [ [[BNEWEL0:%.*]], %[[DO_BODY]] ]
-; GFX9-NEXT: [[TMP9:%.*]] = phi <2 x i16> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP25:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT: [[TMP8:%.*]] = phi <2 x i16> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP30:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT: [[TMP9:%.*]] = phi <2 x i16> [ [[TMP1]], %[[ENTRY]] ], [ [[TMP31:%.*]], %[[DO_BODY]] ]
; GFX9-NEXT: [[TMP10:%.*]] = phi <2 x i16> [ [[TMP2]], %[[ENTRY]] ], [ [[TMP32:%.*]], %[[DO_BODY]] ]
; GFX9-NEXT: [[TMP11:%.*]] = phi <2 x i16> [ [[TMP3]], %[[ENTRY]] ], [ [[TMP33:%.*]], %[[DO_BODY]] ]
-; GFX9-NEXT: [[TMP13:%.*]] = phi <2 x i16> [ [[TMP4]], %[[ENTRY]] ], [ [[TMP30:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT: [[TMP12:%.*]] = phi <2 x i16> [ [[TMP4]], %[[ENTRY]] ], [ [[TMP34:%.*]], %[[DO_BODY]] ]
+; GFX9-NEXT: [[TMP13:%.*]] = phi <2 x i16> [ [[TMP5]], %[[ENTRY]] ], [ [[TMP35:%.*]], %[[DO_BODY]] ]
; GFX9-NEXT: [[TMP14:%.*]] = phi <2 x i16> [ [[TMP6]], %[[ENTRY]] ], [ [[TMP36:%.*]], %[[DO_BODY]] ]
; GFX9-NEXT: [[TMP15:%.*]] = phi <2 x i16> [ [[TMP7]], %[[ENTRY]] ], [ [[TMP37:%.*]], %[[DO_BODY]] ]
-; GFX9-NEXT: [[A_THREAD_VEC0:%.*]] = insertelement <8 x i16> poison, i16 [[A_THREAD_BUF0]], i64 0
+; GFX9-NEXT: [[TMP16:%.*]] = shufflevector <2 x i16> [[TMP8]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; GFX9-NEXT: [[TMP17:%.*]] = shufflevector <2 x i16> [[TMP9]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; GFX9-NEXT: [[TMP16:%.*]] = shufflevector <8 x i16> [[A_THREAD_VEC0]], <8 x i16> [[TMP17]], <8 x i32> <i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; GFX9-NEXT: [[A_THREAD_VEC3:%.*]] = insertelement <8 x i16> [[TMP16]], i16 [[A_THREAD_BUF3]], i64 3
+; GFX9-NEXT: [[TMP18:%.*]] = shufflevector <2 x i16> [[TMP8]], <2 x i16> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; GFX9-NEXT: [[TMP19:%.*]] = shufflevector <2 x i16> [[TMP10]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; GFX9-NEXT: [[TMP20:%.*]] = shufflevector <8 x i16> [[A_THREAD_VEC3]], <8 x i16> [[TMP19]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; GFX9-NEXT: [[TMP20:%.*]] = shufflevector <8 x i16> [[TMP18]], <8 x i16> [[TMP19]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
; GFX9-NEXT: [[TMP21:%.*]] = shufflevector <2 x i16> [[TMP11]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; GFX9-NEXT: [[TMP22:%.*]] = shufflevector <8 x i16> [[TMP20]], <8 x i16> [[TMP21]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
-; GFX9-NEXT: [[B_THREAD_VEC0:%.*]] = insertelement <8 x i16> poison, i16 [[B_THREAD_BUF0]], i64 0
+; GFX9-NEXT: [[TMP23:%.*]] = shufflevector <2 x i16> [[TMP12]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; GFX9-NEXT: [[TMP24:%.*]] = shufflevector <2 x i16> [[TMP13]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; GFX9-NEXT: [[TMP23:%.*]] = shufflevector <8 x i16> [[B_THREAD_VEC0]], <8 x i16> [[TMP24]], <8 x i32> <i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; GFX9-NEXT: [[B_THREAD_VEC3:%.*]] = insertelement <8 x i16> [[TMP23]], i16 [[B_THREAD_BUF3]], i64 3
+; GFX9-NEXT: [[TMP25:%.*]] = shufflevector <2 x i16> [[TMP12]], <2 x i16> [[TMP13]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; GFX9-NEXT: [[TMP26:%.*]] = shufflevector <2 x i16> [[TMP14]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; GFX9-NEXT: [[TMP27:%.*]] = shufflevector <8 x i16> [[B_THREAD_VEC3]], <8 x i16> [[TMP26]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; GFX9-NEXT: [[TMP27:%.*]] = shufflevector <8 x i16> [[TMP25]], <8 x i16> [[TMP26]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
; GFX9-NEXT: [[TMP28:%.*]] = shufflevector <2 x i16> [[TMP15]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; GFX9-NEXT: [[TMP29:%.*]] = shufflevector <8 x i16> [[TMP27]], <8 x i16> [[TMP28]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; GFX9-NEXT: [[RES:%.*]] = add <8 x i16> [[TMP22]], [[TMP29]]
; GFX9-NEXT: [[VEC2:%.*]] = load <8 x i16>, ptr addrspace(1) [[APTR2]], align 16
-; GFX9-NEXT: [[NEWEL0]] = extractelement <8 x i16> [[VEC2]], i64 0
-; GFX9-NEXT: [[NEWEL3]] = extractelement <8 x i16> [[VEC2]], i64 3
; GFX9-NEXT: [[BVEC2:%.*]] = load <8 x i16>, ptr addrspace(1) [[BPTR2]], align 16
-; GFX9-NEXT: [[BNEWEL0]] = extractelement <8 x i16> [[BVEC2]], i64 0
-; GFX9-NEXT: [[BNEWEL3]] = extractelement <8 x i16> [[BVEC2]], i64 3
; GFX9-NEXT: [[NEWADD]] = add i32 [[ADD]], 1
; GFX9-NEXT: [[COND:%.*]] = icmp sgt i32 [[NEWADD]], [[IN]]
-; GFX9-NEXT: [[TMP25]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> <i32 1, i32 2>
+; GFX9-NEXT: [[TMP30]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> <i32 0, i32 1>
+; GFX9-NEXT: [[TMP31]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> <i32 2, i32 3>
; GFX9-NEXT: [[TMP32]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> <i32 4, i32 5>
; GFX9-NEXT: [[TMP33]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> <i32 6, i32 7>
-; GFX9-NEXT: [[TMP30]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> <i32 1, i32 2>
+; GFX9-NEXT: [[TMP34]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> <i32 0, i32 1>
+; GFX9-NEXT: [[TMP35]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> <i32 2, i32 3>
; GFX9-NEXT: [[TMP36]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> <i32 4, i32 5>
; GFX9-NEXT: [[TMP37]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> <i32 6, i32 7>
; GFX9-NEXT: br i1 [[COND]], label %[[DO_BODY]], label %[[END:.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll
index 3b63c1e35610f..a3a4ab948519f 100644
--- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll
@@ -49,19 +49,19 @@ bb1:
define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) {
; CHECK-LABEL: @phis_reverse(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[BB0:%.*]]
; CHECK: bb0:
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: br label [[BB1]]
; CHECK: bb1:
-; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB0]] ]
-; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY]] ], [ [[TMP3]], [[BB0]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP3]], [[BB0]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP2]], [[ENTRY]] ], [ [[TMP9]], [[BB0]] ]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: ret <4 x half> [[TMP8]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll
index dd7a21198ac1f..651f565412830 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll
@@ -141,7 +141,7 @@ define ptr @test4() {
; POWEROF2-NEXT: [[TMP1:%.*]] = fadd <8 x float> zeroinitializer, zeroinitializer
; POWEROF2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 1, i32 2>
; POWEROF2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 5, i32 6>
-; POWEROF2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 4, i32 0>
+; POWEROF2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 0, i32 4>
; POWEROF2-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP2]], i64 0)
; POWEROF2-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP5]], <2 x float> [[TMP3]], i64 2)
; POWEROF2-NEXT: br label [[TMP8:%.*]]
@@ -156,10 +156,10 @@ define ptr @test4() {
; POWEROF2-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[TMP12]], zeroinitializer
; POWEROF2-NEXT: [[TMP14:%.*]] = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> [[TMP10]], i64 2)
; POWEROF2-NEXT: [[TMP15:%.*]] = fmul <2 x float> zeroinitializer, [[TMP14]]
-; POWEROF2-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP9]], i32 1
-; POWEROF2-NEXT: [[TMP17:%.*]] = fmul float 0.000000e+00, [[TMP16]]
; POWEROF2-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[TMP9]], i32 0
-; POWEROF2-NEXT: [[TMP19:%.*]] = fmul float [[TMP18]], 0.000000e+00
+; POWEROF2-NEXT: [[TMP17:%.*]] = fmul float 0.000000e+00, [[TMP18]]
+; POWEROF2-NEXT: [[TMP30:%.*]] = extractelement <2 x float> [[TMP9]], i32 1
+; POWEROF2-NEXT: [[TMP19:%.*]] = fmul float [[TMP30]], 0.000000e+00
; POWEROF2-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP13]], i32 0
; POWEROF2-NEXT: [[TMP21:%.*]] = fadd reassoc nsz float [[TMP20]], [[TMP17]]
; POWEROF2-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[TMP15]], i32 0
>From 0b5d6c0757f621390bb787e488515d011a2ee8a6 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 17 Mar 2025 09:46:51 -0700
Subject: [PATCH 2/4] Review comments
Change-Id: I5bb81e7e3177f3cf196e682786a3de966a7a0014
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 37 +++++++++----------
1 file changed, 18 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 95a27b5a088bd..ea5541d118be1 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -795,7 +795,7 @@ isFixedVectorShuffle(ArrayRef<Value *> VL, SmallVectorImpl<int> &Mask,
}
/// \returns True if Extract{Value,Element} instruction extracts element Idx.
-static std::optional<unsigned> getExtractIndex(Instruction *E) {
+static std::optional<unsigned> getExtractIndex(const Instruction *E) {
unsigned Opcode = E->getOpcode();
assert((Opcode == Instruction::ExtractElement ||
Opcode == Instruction::ExtractValue) &&
@@ -22691,37 +22691,36 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
InstructionsState S = getSameOpcode({I1, I2}, *TLI);
if (S && !S.isAltShuffle()) {
- if (!isa<ExtractElementInst>(I1) || !isa<ExtractElementInst>(I2))
+ const auto *E1 = dyn_cast<ExtractElementInst>(I1);
+ const auto *E2 = dyn_cast<ExtractElementInst>(I2);
+ if (!E1 || !E2)
continue;
- auto E1 = cast<ExtractElementInst>(I1);
- auto E2 = cast<ExtractElementInst>(I2);
// Sort on ExtractElementInsts primarily by vector operands. Prefer
- // program order of the vector operands
+ // program order of the vector operands.
if (E1->getVectorOperand() != E2->getVectorOperand()) {
- Instruction *V1 = dyn_cast<Instruction>(E1->getVectorOperand());
- Instruction *V2 = dyn_cast<Instruction>(E2->getVectorOperand());
+ const Instruction *V1 =
+ dyn_cast<Instruction>(E1->getVectorOperand());
+ const Instruction *V2 =
+ dyn_cast<Instruction>(E2->getVectorOperand());
if (!V1 || !V2)
continue;
if (V1->getParent() != V2->getParent())
continue;
return V1->comesBefore(V2);
}
- // If we have the same vector operand, try to sort by constant index
- auto Id1 = E1->getIndexOperand();
- auto Id2 = E2->getIndexOperand();
+ // If we have the same vector operand, try to sort by constant
+ // index.
+ std::optional<unsigned> Id1 = getExtractIndex(E1);
+ std::optional<unsigned> Id2 = getExtractIndex(E2);
// Bring constants to the top
- if (isa<ConstantInt>(Id1) && !isa<ConstantInt>(Id2))
+ if (Id1 && !Id2)
return true;
- if (!isa<ConstantInt>(Id1) && isa<ConstantInt>(Id2))
+ if (!Id1 && Id2)
return false;
- if (isa<ConstantInt>(Id1) && isa<ConstantInt>(Id2)) {
- auto C1 = cast<ConstantInt>(Id1);
- auto C2 = cast<ConstantInt>(Id2);
- // First elements first
- return C1->getValue().getZExtValue() <
- C2->getValue().getZExtValue();
- }
+ // First elements come first.
+ if (Id1 && Id2)
+ return *Id1 < *Id2;
continue;
}
>From a8f8f423633662a25b8db79030b1fc3ce191f329 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 17 Mar 2025 10:39:57 -0700
Subject: [PATCH 3/4] Review comments 2
Change-Id: I8b15558af0c8b23ff8c769d1a01323a87068f6ef
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ea5541d118be1..9ac6b988ff574 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -22699,10 +22699,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// Sort on ExtractElementInsts primarily by vector operands. Prefer
// program order of the vector operands.
if (E1->getVectorOperand() != E2->getVectorOperand()) {
- const Instruction *V1 =
- dyn_cast<Instruction>(E1->getVectorOperand());
- const Instruction *V2 =
- dyn_cast<Instruction>(E2->getVectorOperand());
+ const auto *V1 = dyn_cast<Instruction>(E1->getVectorOperand());
+ const auto *V2 = dyn_cast<Instruction>(E2->getVectorOperand());
if (!V1 || !V2)
continue;
if (V1->getParent() != V2->getParent())
>From fe2623c9a5a998191f4db4f8d1a20ddc8f39daa8 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 17 Mar 2025 10:54:25 -0700
Subject: [PATCH 4/4] Review Comments 3
Change-Id: I666a35830bc431b763474dcbb2d7ecae58768a45
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9ac6b988ff574..b73229d23e1fc 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -22698,9 +22698,9 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// Sort on ExtractElementInsts primarily by vector operands. Prefer
// program order of the vector operands.
- if (E1->getVectorOperand() != E2->getVectorOperand()) {
- const auto *V1 = dyn_cast<Instruction>(E1->getVectorOperand());
- const auto *V2 = dyn_cast<Instruction>(E2->getVectorOperand());
+ const auto *V1 = dyn_cast<Instruction>(E1->getVectorOperand());
+ const auto *V2 = dyn_cast<Instruction>(E2->getVectorOperand());
+ if (V1 != V2) {
if (!V1 || !V2)
continue;
if (V1->getParent() != V2->getParent())
More information about the llvm-commits
mailing list