[llvm] [VectorCombine] isExtractExtractCheap - specify the extract/insert shuffle mask to improve shuffle costs (PR #114780)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 4 06:51:37 PST 2024
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/114780
>From eb1c3ac0d48d104f7dc27a5bc32ba52e087bdae9 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 4 Nov 2024 12:01:23 +0000
Subject: [PATCH 1/2] [VectorCombine] isExtractExtractCheap - specify the
extract/insert shuffle mask to improve shuffle costs
This shuffle mask is so focused, the cost model is very likely to be able to determine a specific (lower) cost
---
.../Transforms/Vectorize/VectorCombine.cpp | 14 +++-
.../Transforms/PhaseOrdering/X86/pr50392.ll | 29 +++-----
.../load-extractelement-scalarization.ll | 72 +++++++++----------
.../X86/extract-binop-inseltpoison.ll | 16 +++--
.../VectorCombine/X86/extract-binop.ll | 16 +++--
5 files changed, 80 insertions(+), 67 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 025234c54956b2..4318ed3c4173f8 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -447,6 +447,8 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
// TODO: Evaluate whether that always results in lowest cost. Alternatively,
// check the cost of creating a broadcast shuffle and shuffling both
// operands to element 0.
+ unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
+ unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
// Extra uses of the extracts mean that we include those costs in the
@@ -482,8 +484,16 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
// ShufMask = { poison, poison, 0, poison }
// TODO: The cost model has an option for a "broadcast" shuffle
// (splat-from-element-0), but no option for a more general splat.
- NewCost +=
- TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
+ if (auto *FixedVecTy = dyn_cast<FixedVectorType>(VecTy)) {
+ SmallVector<int> ShuffleMask;
+ ShuffleMask.append(FixedVecTy->getNumElements(), PoisonMaskElem);
+ ShuffleMask[BestInsIndex] = BestExtIndex;
+ NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
+ VecTy, ShuffleMask);
+ } else {
+ NewCost +=
+ TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
+ }
}
// Aggressively form a vector op if the cost is equal because the transform
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll
index d0568f3b961fde..82e92be7c21bcc 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll
@@ -30,24 +30,14 @@ define <4 x double> @PR50392(<4 x double> %a, <4 x double> %b) {
; SSE4-NEXT: [[SHUFFLE:%.*]] = insertelement <4 x double> [[TMP3]], double [[ADD12]], i64 3
; SSE4-NEXT: ret <4 x double> [[SHUFFLE]]
;
-; AVX1-LABEL: @PR50392(
-; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 4, i32 poison>
-; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 5, i32 poison>
-; AVX1-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
-; AVX1-NEXT: [[VECEXT10:%.*]] = extractelement <4 x double> [[B]], i64 2
-; AVX1-NEXT: [[VECEXT11:%.*]] = extractelement <4 x double> [[B]], i64 3
-; AVX1-NEXT: [[ADD12:%.*]] = fadd double [[VECEXT10]], [[VECEXT11]]
-; AVX1-NEXT: [[SHUFFLE:%.*]] = insertelement <4 x double> [[TMP4]], double [[ADD12]], i64 3
-; AVX1-NEXT: ret <4 x double> [[SHUFFLE]]
-;
-; AVX2-LABEL: @PR50392(
-; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 4, i32 poison>
-; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 5, i32 poison>
-; AVX2-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
-; AVX2-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
-; AVX2-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[B]], [[SHIFT]]
-; AVX2-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP5]], <4 x i32> <i32 0, i32 poison, i32 2, i32 6>
-; AVX2-NEXT: ret <4 x double> [[SHUFFLE]]
+; AVX-LABEL: @PR50392(
+; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 4, i32 poison>
+; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 5, i32 poison>
+; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
+; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
+; AVX-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[B]], [[SHIFT]]
+; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 poison, i32 2, i32 6>
+; AVX-NEXT: ret <4 x double> [[SHUFFLE]]
;
%vecext = extractelement <4 x double> %a, i32 0
%vecext1 = extractelement <4 x double> %a, i32 1
@@ -69,5 +59,6 @@ define <4 x double> @PR50392(<4 x double> %a, <4 x double> %b) {
ret <4 x double> %shuffle
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; AVX: {{.*}}
+; AVX1: {{.*}}
+; AVX2: {{.*}}
; SSE: {{.*}}
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll
index 613c71cb769b0c..551d6d1cabd41e 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll
@@ -541,18 +541,18 @@ define i32 @load_extract_clobber_store_between(ptr %x, ptr %y) {
define i32 @load_extract_clobber_store_between_limit(ptr %x, ptr %y, <8 x i32> %z) {
; CHECK-LABEL: @load_extract_clobber_store_between_limit(
; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
-; CHECK-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z:%.*]], i32 0
-; CHECK-NEXT: [[Z_1:%.*]] = extractelement <8 x i32> [[Z]], i32 1
-; CHECK-NEXT: [[ADD_0:%.*]] = add i32 [[Z_0]], [[Z_1]]
-; CHECK-NEXT: [[Z_2:%.*]] = extractelement <8 x i32> [[Z]], i32 2
-; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[Z_2]]
-; CHECK-NEXT: [[Z_3:%.*]] = extractelement <8 x i32> [[Z]], i32 3
-; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[Z_3]]
-; CHECK-NEXT: [[Z_4:%.*]] = extractelement <8 x i32> [[Z]], i32 4
-; CHECK-NEXT: [[ADD_3:%.*]] = add i32 [[ADD_2]], [[Z_4]]
+; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[Z1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[Z1]], [[SHIFT]]
+; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> [[TMP1]], [[SHIFT1]]
+; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[SHIFT2]]
+; CHECK-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[Z:%.*]] = add <8 x i32> [[TMP3]], [[SHIFT3]]
+; CHECK-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z]], i32 0
; CHECK-NEXT: store i8 0, ptr [[Y:%.*]], align 1
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2
-; CHECK-NEXT: [[ADD_4:%.*]] = add i32 [[ADD_3]], [[R]]
+; CHECK-NEXT: [[ADD_4:%.*]] = add i32 [[Z_0]], [[R]]
; CHECK-NEXT: ret i32 [[ADD_4]]
;
%lv = load <4 x i32>, ptr %x
@@ -573,35 +573,35 @@ define i32 @load_extract_clobber_store_between_limit(ptr %x, ptr %y, <8 x i32> %
define i32 @load_extract_clobber_store_after_limit(ptr %x, ptr %y, <8 x i32> %z) {
; LIMIT-DEFAULT-LABEL: @load_extract_clobber_store_after_limit(
-; LIMIT-DEFAULT-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z:%.*]], i32 0
-; LIMIT-DEFAULT-NEXT: [[Z_1:%.*]] = extractelement <8 x i32> [[Z]], i32 1
-; LIMIT-DEFAULT-NEXT: [[ADD_0:%.*]] = add i32 [[Z_0]], [[Z_1]]
-; LIMIT-DEFAULT-NEXT: [[Z_2:%.*]] = extractelement <8 x i32> [[Z]], i32 2
-; LIMIT-DEFAULT-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[Z_2]]
-; LIMIT-DEFAULT-NEXT: [[Z_3:%.*]] = extractelement <8 x i32> [[Z]], i32 3
-; LIMIT-DEFAULT-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[Z_3]]
-; LIMIT-DEFAULT-NEXT: [[Z_4:%.*]] = extractelement <8 x i32> [[Z]], i32 4
-; LIMIT-DEFAULT-NEXT: [[ADD_3:%.*]] = add i32 [[ADD_2]], [[Z_4]]
+; LIMIT-DEFAULT-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[Z1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LIMIT-DEFAULT-NEXT: [[TMP4:%.*]] = add <8 x i32> [[Z1]], [[SHIFT]]
+; LIMIT-DEFAULT-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LIMIT-DEFAULT-NEXT: [[TMP2:%.*]] = add <8 x i32> [[TMP4]], [[SHIFT1]]
+; LIMIT-DEFAULT-NEXT: [[SHIFT2:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LIMIT-DEFAULT-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[SHIFT2]]
+; LIMIT-DEFAULT-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LIMIT-DEFAULT-NEXT: [[Z:%.*]] = add <8 x i32> [[TMP3]], [[SHIFT3]]
+; LIMIT-DEFAULT-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z]], i32 0
; LIMIT-DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2
; LIMIT-DEFAULT-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 8
; LIMIT-DEFAULT-NEXT: store i8 0, ptr [[Y:%.*]], align 1
-; LIMIT-DEFAULT-NEXT: [[ADD_4:%.*]] = add i32 [[ADD_3]], [[R]]
+; LIMIT-DEFAULT-NEXT: [[ADD_4:%.*]] = add i32 [[Z_0]], [[R]]
; LIMIT-DEFAULT-NEXT: ret i32 [[ADD_4]]
;
; LIMIT2-LABEL: @load_extract_clobber_store_after_limit(
; LIMIT2-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
-; LIMIT2-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z:%.*]], i32 0
-; LIMIT2-NEXT: [[Z_1:%.*]] = extractelement <8 x i32> [[Z]], i32 1
-; LIMIT2-NEXT: [[ADD_0:%.*]] = add i32 [[Z_0]], [[Z_1]]
-; LIMIT2-NEXT: [[Z_2:%.*]] = extractelement <8 x i32> [[Z]], i32 2
-; LIMIT2-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[Z_2]]
-; LIMIT2-NEXT: [[Z_3:%.*]] = extractelement <8 x i32> [[Z]], i32 3
-; LIMIT2-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[Z_3]]
-; LIMIT2-NEXT: [[Z_4:%.*]] = extractelement <8 x i32> [[Z]], i32 4
-; LIMIT2-NEXT: [[ADD_3:%.*]] = add i32 [[ADD_2]], [[Z_4]]
+; LIMIT2-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[Z1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LIMIT2-NEXT: [[TMP1:%.*]] = add <8 x i32> [[Z1]], [[SHIFT]]
+; LIMIT2-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LIMIT2-NEXT: [[TMP2:%.*]] = add <8 x i32> [[TMP1]], [[SHIFT1]]
+; LIMIT2-NEXT: [[SHIFT2:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LIMIT2-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[SHIFT2]]
+; LIMIT2-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; LIMIT2-NEXT: [[Z:%.*]] = add <8 x i32> [[TMP3]], [[SHIFT3]]
+; LIMIT2-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z]], i32 0
; LIMIT2-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2
; LIMIT2-NEXT: store i8 0, ptr [[Y:%.*]], align 1
-; LIMIT2-NEXT: [[ADD_4:%.*]] = add i32 [[ADD_3]], [[R]]
+; LIMIT2-NEXT: [[ADD_4:%.*]] = add i32 [[Z_0]], [[R]]
; LIMIT2-NEXT: ret i32 [[ADD_4]]
;
%lv = load <4 x i32>, ptr %x
@@ -671,9 +671,9 @@ define i1 @load_with_non_power_of_2_element_type_2(ptr %x) {
define i32 @load_multiple_extracts_with_constant_idx(ptr %x) {
; CHECK-LABEL: @load_multiple_extracts_with_constant_idx(
; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
-; CHECK-NEXT: [[E_0:%.*]] = extractelement <4 x i32> [[LV]], i32 0
-; CHECK-NEXT: [[E_1:%.*]] = extractelement <4 x i32> [[LV]], i32 1
-; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]]
+; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[LV]], <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[LV]], [[SHIFT]]
+; CHECK-NEXT: [[RES:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
; CHECK-NEXT: ret i32 [[RES]]
;
%lv = load <4 x i32>, ptr %x
@@ -688,9 +688,9 @@ define i32 @load_multiple_extracts_with_constant_idx(ptr %x) {
define i32 @load_multiple_extracts_with_constant_idx_profitable(ptr %x) {
; CHECK-LABEL: @load_multiple_extracts_with_constant_idx_profitable(
; CHECK-NEXT: [[LV:%.*]] = load <8 x i32>, ptr [[X:%.*]], align 16
-; CHECK-NEXT: [[E_0:%.*]] = extractelement <8 x i32> [[LV]], i32 0
-; CHECK-NEXT: [[E_1:%.*]] = extractelement <8 x i32> [[LV]], i32 6
-; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]]
+; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[LV]], <8 x i32> poison, <8 x i32> <i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[LV]], [[SHIFT]]
+; CHECK-NEXT: [[RES:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
; CHECK-NEXT: ret i32 [[RES]]
;
%lv = load <8 x i32>, ptr %x, align 16
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
index 3d69f15fc5f249..effd846e7e6336 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
@@ -404,11 +404,17 @@ define float @ext0_ext8_fmul_v16f32(<16 x float> %x) {
}
define float @ext14_ext15_fmul_v16f32(<16 x float> %x) {
-; CHECK-LABEL: @ext14_ext15_fmul_v16f32(
-; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x float> [[X:%.*]], i32 14
-; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x float> [[X]], i32 15
-; CHECK-NEXT: [[R:%.*]] = fadd float [[E0]], [[E1]]
-; CHECK-NEXT: ret float [[R]]
+; SSE-LABEL: @ext14_ext15_fmul_v16f32(
+; SSE-NEXT: [[E0:%.*]] = extractelement <16 x float> [[X:%.*]], i32 14
+; SSE-NEXT: [[E1:%.*]] = extractelement <16 x float> [[X]], i32 15
+; SSE-NEXT: [[R:%.*]] = fadd float [[E0]], [[E1]]
+; SSE-NEXT: ret float [[R]]
+;
+; AVX-LABEL: @ext14_ext15_fmul_v16f32(
+; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x float> [[X:%.*]], <16 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 15, i32 poison>
+; AVX-NEXT: [[TMP1:%.*]] = fadd <16 x float> [[X]], [[SHIFT]]
+; AVX-NEXT: [[R:%.*]] = extractelement <16 x float> [[TMP1]], i32 14
+; AVX-NEXT: ret float [[R]]
;
%e0 = extractelement <16 x float> %x, i32 14
%e1 = extractelement <16 x float> %x, i32 15
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
index 52f7cd859a1ab1..7481ae0de4ad84 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
@@ -404,11 +404,17 @@ define float @ext0_ext8_fmul_v16f32(<16 x float> %x) {
}
define float @ext14_ext15_fmul_v16f32(<16 x float> %x) {
-; CHECK-LABEL: @ext14_ext15_fmul_v16f32(
-; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x float> [[X:%.*]], i32 14
-; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x float> [[X]], i32 15
-; CHECK-NEXT: [[R:%.*]] = fadd float [[E0]], [[E1]]
-; CHECK-NEXT: ret float [[R]]
+; SSE-LABEL: @ext14_ext15_fmul_v16f32(
+; SSE-NEXT: [[E0:%.*]] = extractelement <16 x float> [[X:%.*]], i32 14
+; SSE-NEXT: [[E1:%.*]] = extractelement <16 x float> [[X]], i32 15
+; SSE-NEXT: [[R:%.*]] = fadd float [[E0]], [[E1]]
+; SSE-NEXT: ret float [[R]]
+;
+; AVX-LABEL: @ext14_ext15_fmul_v16f32(
+; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x float> [[X:%.*]], <16 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 15, i32 poison>
+; AVX-NEXT: [[TMP1:%.*]] = fadd <16 x float> [[X]], [[SHIFT]]
+; AVX-NEXT: [[R:%.*]] = extractelement <16 x float> [[TMP1]], i32 14
+; AVX-NEXT: ret float [[R]]
;
%e0 = extractelement <16 x float> %x, i32 14
%e1 = extractelement <16 x float> %x, i32 15
>From 0369be88933c450a7fe4d057a3002ee4fd035a32 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 4 Nov 2024 14:51:20 +0000
Subject: [PATCH 2/2] Cleanup SmallVector initialization
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 4318ed3c4173f8..5ce8cae4cd80b3 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -485,8 +485,8 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
// TODO: The cost model has an option for a "broadcast" shuffle
// (splat-from-element-0), but no option for a more general splat.
if (auto *FixedVecTy = dyn_cast<FixedVectorType>(VecTy)) {
- SmallVector<int> ShuffleMask;
- ShuffleMask.append(FixedVecTy->getNumElements(), PoisonMaskElem);
+ SmallVector<int> ShuffleMask(FixedVecTy->getNumElements(),
+ PoisonMaskElem);
ShuffleMask[BestInsIndex] = BestExtIndex;
NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
VecTy, ShuffleMask);
More information about the llvm-commits
mailing list