[llvm] [VectorCombine] Combine BinOp with extract/insert to vector BinOp (PR #115213)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 7 07:57:52 PST 2024
https://github.com/ParkHanbum updated https://github.com/llvm/llvm-project/pull/115213
>From 600538fb3daa16209350fadbdf20171596365afb Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Tue, 29 Oct 2024 04:17:30 +0900
Subject: [PATCH 1/6] [VectorCombine] Combine BinOp with extract/insert to
vector BinOp
insert (DstVec, (extract (binop), ExtIdx), InsIdx)
--> shuffl (DstVec, (binop), Mask)
This commit combines extract/insert with BinaryOperation on a vector
into Shuffle+BinaryOperation with vector.
---
.../Transforms/Vectorize/VectorCombine.cpp | 48 ++++++++++
.../X86/extract-binop-inseltpoison.ll | 87 ++++++++++++-------
.../VectorCombine/X86/extract-binop.ll | 87 ++++++++++++-------
.../VectorCombine/X86/load-inseltpoison.ll | 39 ++++++---
.../test/Transforms/VectorCombine/X86/load.ll | 39 ++++++---
5 files changed, 216 insertions(+), 84 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 58145c7e3c5913..0ccd535303686d 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -106,6 +106,7 @@ class VectorCombine {
Instruction &I);
bool foldExtractExtract(Instruction &I);
bool foldInsExtFNeg(Instruction &I);
+ bool foldInsExtOfBinOpShuffle(Instruction &I);
bool foldBitcastShuffle(Instruction &I);
bool scalarizeBinopOrCmp(Instruction &I);
bool scalarizeVPIntrinsic(Instruction &I);
@@ -2678,6 +2679,52 @@ bool VectorCombine::shrinkType(llvm::Instruction &I) {
return true;
}
+/// insert (DstVec, (extract (binop), ExtIdx), InsIdx) -->
+/// shuffl (DstVec, (binop), Mask)
+bool VectorCombine::foldInsExtOfBinOpShuffle(Instruction &I) {
+ Value *DstVec;
+ BinaryOperator *BO;
+ uint64_t ExtIdx, InsIdx;
+ if (!match(&I, m_InsertElt(
+ m_Value(DstVec),
+ m_OneUse(m_ExtractElt(m_BinOp(BO), m_ConstantInt(ExtIdx))),
+ m_ConstantInt(InsIdx))))
+ return false;
+
+ if (!isSafeToSpeculativelyExecute(BO))
+ return false;
+
+ auto *VecTy = cast<FixedVectorType>(I.getType());
+ if (BO->getType() != VecTy)
+ return false;
+
+ unsigned NumElts = VecTy->getNumElements();
+ if (ExtIdx >= NumElts)
+ return false;
+
+ SmallVector<int> Mask(NumElts);
+ std::iota(Mask.begin(), Mask.end(), 0);
+ Mask[InsIdx] = ExtIdx + NumElts;
+ // Cost
+ ExtractElementInst *Ext;
+ if ((Ext = dyn_cast<ExtractElementInst>(I.getOperand(0))) == nullptr)
+ Ext = dyn_cast<ExtractElementInst>(I.getOperand(1));
+
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ InstructionCost OldCost =
+ TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx);
+ InstructionCost NewCost =
+ TTI.getShuffleCost(TargetTransformInfo::SK_Select, VecTy, Mask);
+
+ if (OldCost < NewCost)
+ return false;
+
+ Value *Shuf = Builder.CreateShuffleVector(DstVec, BO, Mask);
+ replaceValue(I, *Shuf);
+
+ return true;
+}
+
/// This is the entry point for all transforms. Pass manager differences are
/// handled in the callers of this function.
bool VectorCombine::run() {
@@ -2734,6 +2781,7 @@ bool VectorCombine::run() {
switch (Opcode) {
case Instruction::InsertElement:
MadeChange |= foldInsExtFNeg(I);
+ MadeChange |= foldInsExtOfBinOpShuffle(I);
break;
case Instruction::ShuffleVector:
MadeChange |= foldShuffleOfBinops(I);
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
index 3d69f15fc5f249..e5880c93a9020f 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
@@ -417,12 +417,18 @@ define float @ext14_ext15_fmul_v16f32(<16 x float> %x) {
}
define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @ins_bo_ext_ext(
-; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
-; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
-; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
-; CHECK-NEXT: ret <4 x float> [[V3]]
+; SSE-LABEL: @ins_bo_ext_ext(
+; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
+; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
+; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
+; SSE-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
+; SSE-NEXT: ret <4 x float> [[V3]]
+;
+; AVX-LABEL: @ins_bo_ext_ext(
+; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
+; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
+; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; AVX-NEXT: ret <4 x float> [[V3]]
;
%a2 = extractelement <4 x float> %a, i32 2
%a3 = extractelement <4 x float> %a, i32 3
@@ -435,13 +441,21 @@ define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) {
; but it is likely that extracting from index 3 is the better option.
define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @ins_bo_ext_ext_uses(
-; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
-; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
-; CHECK-NEXT: call void @use_f32(float [[A23]])
-; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
-; CHECK-NEXT: ret <4 x float> [[V3]]
+; SSE-LABEL: @ins_bo_ext_ext_uses(
+; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
+; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
+; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; SSE-NEXT: call void @use_f32(float [[A23]])
+; SSE-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
+; SSE-NEXT: ret <4 x float> [[V3]]
+;
+; AVX-LABEL: @ins_bo_ext_ext_uses(
+; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
+; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
+; AVX-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; AVX-NEXT: call void @use_f32(float [[A23]])
+; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; AVX-NEXT: ret <4 x float> [[V3]]
;
%a2 = extractelement <4 x float> %a, i32 2
%a3 = extractelement <4 x float> %a, i32 3
@@ -452,22 +466,37 @@ define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
}
define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @PR34724(
-; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
-; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
-; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
-; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
-; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
-; CHECK-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
-; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
-; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
-; CHECK-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3
-; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> poison, float [[A23]], i32 1
-; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
-; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[B23]], i32 3
-; CHECK-NEXT: ret <4 x float> [[V3]]
+; SSE-LABEL: @PR34724(
+; SSE-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; SSE-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
+; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
+; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
+; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; SSE-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; SSE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
+; SSE-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+; SSE-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
+; SSE-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
+; SSE-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3
+; SSE-NEXT: [[V1:%.*]] = insertelement <4 x float> poison, float [[A23]], i32 1
+; SSE-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
+; SSE-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[B23]], i32 3
+; SSE-NEXT: ret <4 x float> [[V3]]
+;
+; AVX-LABEL: @PR34724(
+; AVX-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; AVX-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
+; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
+; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
+; AVX-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
+; AVX-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+; AVX-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
+; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
+; AVX-NEXT: [[V1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 6, i32 2, i32 3>
+; AVX-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
+; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; AVX-NEXT: ret <4 x float> [[V3]]
;
%a0 = extractelement <4 x float> %a, i32 0
%a1 = extractelement <4 x float> %a, i32 1
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
index 52f7cd859a1ab1..49a636c1f804d0 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
@@ -417,12 +417,18 @@ define float @ext14_ext15_fmul_v16f32(<16 x float> %x) {
}
define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @ins_bo_ext_ext(
-; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
-; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
-; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
-; CHECK-NEXT: ret <4 x float> [[V3]]
+; SSE-LABEL: @ins_bo_ext_ext(
+; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
+; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
+; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
+; SSE-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
+; SSE-NEXT: ret <4 x float> [[V3]]
+;
+; AVX-LABEL: @ins_bo_ext_ext(
+; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
+; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
+; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; AVX-NEXT: ret <4 x float> [[V3]]
;
%a2 = extractelement <4 x float> %a, i32 2
%a3 = extractelement <4 x float> %a, i32 3
@@ -435,13 +441,21 @@ define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) {
; but it is likely that extracting from index 3 is the better option.
define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @ins_bo_ext_ext_uses(
-; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
-; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
-; CHECK-NEXT: call void @use_f32(float [[A23]])
-; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
-; CHECK-NEXT: ret <4 x float> [[V3]]
+; SSE-LABEL: @ins_bo_ext_ext_uses(
+; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
+; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
+; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; SSE-NEXT: call void @use_f32(float [[A23]])
+; SSE-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
+; SSE-NEXT: ret <4 x float> [[V3]]
+;
+; AVX-LABEL: @ins_bo_ext_ext_uses(
+; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
+; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
+; AVX-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; AVX-NEXT: call void @use_f32(float [[A23]])
+; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; AVX-NEXT: ret <4 x float> [[V3]]
;
%a2 = extractelement <4 x float> %a, i32 2
%a3 = extractelement <4 x float> %a, i32 3
@@ -452,22 +466,37 @@ define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
}
define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @PR34724(
-; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
-; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
-; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
-; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
-; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
-; CHECK-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
-; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
-; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
-; CHECK-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3
-; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> undef, float [[A23]], i32 1
-; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
-; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[B23]], i32 3
-; CHECK-NEXT: ret <4 x float> [[V3]]
+; SSE-LABEL: @PR34724(
+; SSE-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; SSE-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
+; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
+; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
+; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; SSE-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; SSE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
+; SSE-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+; SSE-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
+; SSE-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
+; SSE-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3
+; SSE-NEXT: [[V1:%.*]] = insertelement <4 x float> undef, float [[A23]], i32 1
+; SSE-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
+; SSE-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[B23]], i32 3
+; SSE-NEXT: ret <4 x float> [[V3]]
+;
+; AVX-LABEL: @PR34724(
+; AVX-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; AVX-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
+; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
+; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
+; AVX-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; AVX-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
+; AVX-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+; AVX-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
+; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
+; AVX-NEXT: [[V1:%.*]] = shufflevector <4 x float> undef, <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 6, i32 2, i32 3>
+; AVX-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
+; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; AVX-NEXT: ret <4 x float> [[V3]]
;
%a0 = extractelement <4 x float> %a, i32 0
%a1 = extractelement <4 x float> %a, i32 1
diff --git a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
index c4aba63568e2ff..e99e21641531ab 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
@@ -537,19 +537,32 @@ define <2 x float> @load_f32_insert_v2f32_asan(ptr align 16 dereferenceable(16)
declare ptr @getscaleptr()
define void @PR47558_multiple_use_load(ptr nocapture nonnull %resultptr, ptr nocapture nonnull readonly %opptr) nofree nosync {
-; CHECK-LABEL: @PR47558_multiple_use_load(
-; CHECK-NEXT: [[SCALEPTR:%.*]] = tail call nonnull align 16 dereferenceable(64) ptr @getscaleptr()
-; CHECK-NEXT: [[OP:%.*]] = load <2 x float>, ptr [[OPPTR:%.*]], align 4
-; CHECK-NEXT: [[SCALE:%.*]] = load float, ptr [[SCALEPTR]], align 16
-; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x float> poison, float [[SCALE]], i32 0
-; CHECK-NEXT: [[T2:%.*]] = insertelement <2 x float> [[T1]], float [[SCALE]], i32 1
-; CHECK-NEXT: [[T3:%.*]] = fmul <2 x float> [[OP]], [[T2]]
-; CHECK-NEXT: [[T4:%.*]] = extractelement <2 x float> [[T3]], i32 0
-; CHECK-NEXT: [[RESULT0:%.*]] = insertelement <2 x float> poison, float [[T4]], i32 0
-; CHECK-NEXT: [[T5:%.*]] = extractelement <2 x float> [[T3]], i32 1
-; CHECK-NEXT: [[RESULT1:%.*]] = insertelement <2 x float> [[RESULT0]], float [[T5]], i32 1
-; CHECK-NEXT: store <2 x float> [[RESULT1]], ptr [[RESULTPTR:%.*]], align 8
-; CHECK-NEXT: ret void
+; SSE2-LABEL: @PR47558_multiple_use_load(
+; SSE2-NEXT: [[SCALEPTR:%.*]] = tail call nonnull align 16 dereferenceable(64) ptr @getscaleptr()
+; SSE2-NEXT: [[OP:%.*]] = load <2 x float>, ptr [[OPPTR:%.*]], align 4
+; SSE2-NEXT: [[SCALE:%.*]] = load float, ptr [[SCALEPTR]], align 16
+; SSE2-NEXT: [[T1:%.*]] = insertelement <2 x float> poison, float [[SCALE]], i32 0
+; SSE2-NEXT: [[T2:%.*]] = insertelement <2 x float> [[T1]], float [[SCALE]], i32 1
+; SSE2-NEXT: [[T3:%.*]] = fmul <2 x float> [[OP]], [[T2]]
+; SSE2-NEXT: [[T4:%.*]] = extractelement <2 x float> [[T3]], i32 0
+; SSE2-NEXT: [[RESULT0:%.*]] = insertelement <2 x float> poison, float [[T4]], i32 0
+; SSE2-NEXT: [[T5:%.*]] = extractelement <2 x float> [[T3]], i32 1
+; SSE2-NEXT: [[RESULT1:%.*]] = insertelement <2 x float> [[RESULT0]], float [[T5]], i32 1
+; SSE2-NEXT: store <2 x float> [[RESULT1]], ptr [[RESULTPTR:%.*]], align 8
+; SSE2-NEXT: ret void
+;
+; AVX2-LABEL: @PR47558_multiple_use_load(
+; AVX2-NEXT: [[SCALEPTR:%.*]] = tail call nonnull align 16 dereferenceable(64) ptr @getscaleptr()
+; AVX2-NEXT: [[OP:%.*]] = load <2 x float>, ptr [[OPPTR:%.*]], align 4
+; AVX2-NEXT: [[SCALE:%.*]] = load float, ptr [[SCALEPTR]], align 16
+; AVX2-NEXT: [[T1:%.*]] = insertelement <2 x float> poison, float [[SCALE]], i32 0
+; AVX2-NEXT: [[T2:%.*]] = insertelement <2 x float> [[T1]], float [[SCALE]], i32 1
+; AVX2-NEXT: [[T3:%.*]] = fmul <2 x float> [[OP]], [[T2]]
+; AVX2-NEXT: [[T4:%.*]] = extractelement <2 x float> [[T3]], i32 0
+; AVX2-NEXT: [[RESULT0:%.*]] = insertelement <2 x float> poison, float [[T4]], i32 0
+; AVX2-NEXT: [[RESULT1:%.*]] = shufflevector <2 x float> [[RESULT0]], <2 x float> [[T3]], <2 x i32> <i32 0, i32 3>
+; AVX2-NEXT: store <2 x float> [[RESULT1]], ptr [[RESULTPTR:%.*]], align 8
+; AVX2-NEXT: ret void
;
%scaleptr = tail call nonnull align 16 dereferenceable(64) ptr @getscaleptr()
%op = load <2 x float>, ptr %opptr, align 4
diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll
index d5c19b35838d70..cf3ec41c2935e3 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load.ll
@@ -520,19 +520,32 @@ define <2 x float> @load_f32_insert_v2f32_asan(ptr align 16 dereferenceable(16)
declare ptr @getscaleptr()
define void @PR47558_multiple_use_load(ptr nocapture nonnull %resultptr, ptr nocapture nonnull readonly %opptr) {
-; CHECK-LABEL: @PR47558_multiple_use_load(
-; CHECK-NEXT: [[SCALEPTR:%.*]] = tail call nonnull align 16 dereferenceable(64) ptr @getscaleptr()
-; CHECK-NEXT: [[OP:%.*]] = load <2 x float>, ptr [[OPPTR:%.*]], align 4
-; CHECK-NEXT: [[SCALE:%.*]] = load float, ptr [[SCALEPTR]], align 16
-; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x float> undef, float [[SCALE]], i32 0
-; CHECK-NEXT: [[T2:%.*]] = insertelement <2 x float> [[T1]], float [[SCALE]], i32 1
-; CHECK-NEXT: [[T3:%.*]] = fmul <2 x float> [[OP]], [[T2]]
-; CHECK-NEXT: [[T4:%.*]] = extractelement <2 x float> [[T3]], i32 0
-; CHECK-NEXT: [[RESULT0:%.*]] = insertelement <2 x float> undef, float [[T4]], i32 0
-; CHECK-NEXT: [[T5:%.*]] = extractelement <2 x float> [[T3]], i32 1
-; CHECK-NEXT: [[RESULT1:%.*]] = insertelement <2 x float> [[RESULT0]], float [[T5]], i32 1
-; CHECK-NEXT: store <2 x float> [[RESULT1]], ptr [[RESULTPTR:%.*]], align 8
-; CHECK-NEXT: ret void
+; SSE2-LABEL: @PR47558_multiple_use_load(
+; SSE2-NEXT: [[SCALEPTR:%.*]] = tail call nonnull align 16 dereferenceable(64) ptr @getscaleptr()
+; SSE2-NEXT: [[OP:%.*]] = load <2 x float>, ptr [[OPPTR:%.*]], align 4
+; SSE2-NEXT: [[SCALE:%.*]] = load float, ptr [[SCALEPTR]], align 16
+; SSE2-NEXT: [[T1:%.*]] = insertelement <2 x float> undef, float [[SCALE]], i32 0
+; SSE2-NEXT: [[T2:%.*]] = insertelement <2 x float> [[T1]], float [[SCALE]], i32 1
+; SSE2-NEXT: [[T3:%.*]] = fmul <2 x float> [[OP]], [[T2]]
+; SSE2-NEXT: [[T4:%.*]] = extractelement <2 x float> [[T3]], i32 0
+; SSE2-NEXT: [[RESULT0:%.*]] = insertelement <2 x float> undef, float [[T4]], i32 0
+; SSE2-NEXT: [[T5:%.*]] = extractelement <2 x float> [[T3]], i32 1
+; SSE2-NEXT: [[RESULT1:%.*]] = insertelement <2 x float> [[RESULT0]], float [[T5]], i32 1
+; SSE2-NEXT: store <2 x float> [[RESULT1]], ptr [[RESULTPTR:%.*]], align 8
+; SSE2-NEXT: ret void
+;
+; AVX2-LABEL: @PR47558_multiple_use_load(
+; AVX2-NEXT: [[SCALEPTR:%.*]] = tail call nonnull align 16 dereferenceable(64) ptr @getscaleptr()
+; AVX2-NEXT: [[OP:%.*]] = load <2 x float>, ptr [[OPPTR:%.*]], align 4
+; AVX2-NEXT: [[SCALE:%.*]] = load float, ptr [[SCALEPTR]], align 16
+; AVX2-NEXT: [[T1:%.*]] = insertelement <2 x float> undef, float [[SCALE]], i32 0
+; AVX2-NEXT: [[T2:%.*]] = insertelement <2 x float> [[T1]], float [[SCALE]], i32 1
+; AVX2-NEXT: [[T3:%.*]] = fmul <2 x float> [[OP]], [[T2]]
+; AVX2-NEXT: [[T4:%.*]] = extractelement <2 x float> [[T3]], i32 0
+; AVX2-NEXT: [[RESULT0:%.*]] = insertelement <2 x float> undef, float [[T4]], i32 0
+; AVX2-NEXT: [[RESULT1:%.*]] = shufflevector <2 x float> [[RESULT0]], <2 x float> [[T3]], <2 x i32> <i32 0, i32 3>
+; AVX2-NEXT: store <2 x float> [[RESULT1]], ptr [[RESULTPTR:%.*]], align 8
+; AVX2-NEXT: ret void
;
%scaleptr = tail call nonnull align 16 dereferenceable(64) ptr @getscaleptr()
%op = load <2 x float>, ptr %opptr, align 4
>From 2e05d30725dd8ead96b13db438abfa514bdc165f Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Fri, 8 Nov 2024 00:26:54 +0900
Subject: [PATCH 2/6] fix comment
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 0ccd535303686d..d2f781f3dd867c 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2680,7 +2680,7 @@ bool VectorCombine::shrinkType(llvm::Instruction &I) {
}
/// insert (DstVec, (extract (binop), ExtIdx), InsIdx) -->
-/// shuffl (DstVec, (binop), Mask)
+/// shuffle (DstVec, (binop), Mask)
bool VectorCombine::foldInsExtOfBinOpShuffle(Instruction &I) {
Value *DstVec;
BinaryOperator *BO;
>From d77e2f45f7ffa508608f3b2007d25847433520a2 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Fri, 8 Nov 2024 00:28:00 +0900
Subject: [PATCH 3/6] check whether current instruction is vector or not
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index d2f781f3dd867c..56f181825c2c94 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2694,8 +2694,8 @@ bool VectorCombine::foldInsExtOfBinOpShuffle(Instruction &I) {
if (!isSafeToSpeculativelyExecute(BO))
return false;
- auto *VecTy = cast<FixedVectorType>(I.getType());
- if (BO->getType() != VecTy)
+ auto *VecTy = dyn_cast<FixedVectorType>(I.getType());
+ if (!VecTy || BO->getType() != VecTy)
return false;
unsigned NumElts = VecTy->getNumElements();
>From 5d5ac39493c92c3aa64719dedf5e397f45d76f10 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Fri, 8 Nov 2024 00:30:38 +0900
Subject: [PATCH 4/6] use isa to get ExtractElts from current Instruction
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 56f181825c2c94..f1da2192db90b1 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2707,8 +2707,9 @@ bool VectorCombine::foldInsExtOfBinOpShuffle(Instruction &I) {
Mask[InsIdx] = ExtIdx + NumElts;
// Cost
ExtractElementInst *Ext;
- if ((Ext = dyn_cast<ExtractElementInst>(I.getOperand(0))) == nullptr)
- Ext = dyn_cast<ExtractElementInst>(I.getOperand(1));
+ Ext = isa<ExtractElementInst>(I.getOperand(0))
+ ? cast<ExtractElementInst>(I.getOperand(0))
+ : cast<ExtractElementInst>(I.getOperand(1));
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost OldCost =
>From 713566d08a9589fb3cfcce43f02e7dc095258c2a Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Fri, 8 Nov 2024 00:48:04 +0900
Subject: [PATCH 5/6] use SmallVector with initial value
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index f1da2192db90b1..35315a7e39ad28 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2702,7 +2702,7 @@ bool VectorCombine::foldInsExtOfBinOpShuffle(Instruction &I) {
if (ExtIdx >= NumElts)
return false;
- SmallVector<int> Mask(NumElts);
+ SmallVector<int> Mask(NumElts, 0);
std::iota(Mask.begin(), Mask.end(), 0);
Mask[InsIdx] = ExtIdx + NumElts;
// Cost
>From 5afe48eacde0a58e9008481922026982dfef7dd5 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Fri, 8 Nov 2024 00:49:05 +0900
Subject: [PATCH 6/6] fix mis-updated tests
---
.../X86/extract-binop-inseltpoison.ll | 22 ++++++-------------
.../VectorCombine/X86/extract-binop.ll | 22 ++++++-------------
2 files changed, 14 insertions(+), 30 deletions(-)
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
index e5880c93a9020f..ed9029e14717ea 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
@@ -441,21 +441,13 @@ define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) {
; but it is likely that extracting from index 3 is the better option.
define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: @ins_bo_ext_ext_uses(
-; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
-; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
-; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
-; SSE-NEXT: call void @use_f32(float [[A23]])
-; SSE-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
-; SSE-NEXT: ret <4 x float> [[V3]]
-;
-; AVX-LABEL: @ins_bo_ext_ext_uses(
-; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
-; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
-; AVX-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
-; AVX-NEXT: call void @use_f32(float [[A23]])
-; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 6>
-; AVX-NEXT: ret <4 x float> [[V3]]
+; CHECK-LABEL: @ins_bo_ext_ext_uses(
+; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
+; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
+; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT: call void @use_f32(float [[A23]])
+; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
+; CHECK-NEXT: ret <4 x float> [[V3]]
;
%a2 = extractelement <4 x float> %a, i32 2
%a3 = extractelement <4 x float> %a, i32 3
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
index 49a636c1f804d0..1d3177e5d83f38 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
@@ -441,21 +441,13 @@ define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) {
; but it is likely that extracting from index 3 is the better option.
define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: @ins_bo_ext_ext_uses(
-; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
-; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
-; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
-; SSE-NEXT: call void @use_f32(float [[A23]])
-; SSE-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
-; SSE-NEXT: ret <4 x float> [[V3]]
-;
-; AVX-LABEL: @ins_bo_ext_ext_uses(
-; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
-; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
-; AVX-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
-; AVX-NEXT: call void @use_f32(float [[A23]])
-; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 6>
-; AVX-NEXT: ret <4 x float> [[V3]]
+; CHECK-LABEL: @ins_bo_ext_ext_uses(
+; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
+; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
+; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT: call void @use_f32(float [[A23]])
+; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
+; CHECK-NEXT: ret <4 x float> [[V3]]
;
%a2 = extractelement <4 x float> %a, i32 2
%a3 = extractelement <4 x float> %a, i32 3
More information about the llvm-commits
mailing list