[llvm] [VectorCombine][X86] Use updated getVectorInstrCost hook (PR #137823)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 29 07:57:30 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
This addresses a TODO where previously scalarizeBinopOrCmp conservatively bailed if one of the operands was a load.
getVectorInstrCost was updated to take in values in https://reviews.llvm.org/D140498 so we can pass in the scalar value to be inserted, which should return an accurate cost for a gather.
We want to remove this restriction on RISC-V since this is always profitable whether or not the scalar is a load.
On X86 this seems to prevent scalarization on SSE where the index is 0, because the cost of an insertion into undef goes from 12 -> 1 with the value passed into it. Is this correct? Or is there a way to fix this in X86TTIImpl::getVectorInstrCost? cc @<!-- -->alexey-bataev
---
Patch is 30.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137823.diff
7 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+8-12)
- (modified) llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll (+23-10)
- (modified) llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant-inseltpoison.ll (+58-28)
- (modified) llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll (+58-28)
- (modified) llvm/test/Transforms/VectorCombine/X86/insert-binop.ll (+23-10)
- (modified) llvm/test/Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll (+41-19)
- (modified) llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll (+41-19)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 04c084ffdda97..f046a7d305d51 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1066,14 +1066,6 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
VecTy1->getElementCount().getKnownMinValue() <= Index1)
return false;
- // Bail for single insertion if it is a load.
- // TODO: Handle this once getVectorInstrCost can cost for load/stores.
- auto *I0 = dyn_cast_or_null<Instruction>(V0);
- auto *I1 = dyn_cast_or_null<Instruction>(V1);
- if ((IsConst0 && I1 && I1->mayReadFromMemory()) ||
- (IsConst1 && I0 && I0->mayReadFromMemory()))
- return false;
-
uint64_t Index = IsConst0 ? Index1 : Index0;
Type *ScalarTy = IsConst0 ? V1->getType() : V0->getType();
Type *VecTy = I.getType();
@@ -1100,11 +1092,15 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
// both sequences.
InstructionCost InsertCost = TTI.getVectorInstrCost(
Instruction::InsertElement, VecTy, CostKind, Index);
- InstructionCost OldCost =
- (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
+ InstructionCost InsertCostV0 = TTI.getVectorInstrCost(
+ Instruction::InsertElement, VecTy, CostKind, Index, VecC0, V0);
+ InstructionCost InsertCostV1 = TTI.getVectorInstrCost(
+ Instruction::InsertElement, VecTy, CostKind, Index, VecC1, V1);
+ InstructionCost OldCost = (IsConst0 ? 0 : InsertCostV0) +
+ (IsConst1 ? 0 : InsertCostV1) + VectorOpCost;
InstructionCost NewCost = ScalarOpCost + InsertCost +
- (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) +
- (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost);
+ (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCostV0) +
+ (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCostV1);
// We want to scalarize unless the vector variant actually has lower cost.
if (OldCost < NewCost || !NewCost.isValid())
diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll
index c1100780254c1..76440c7047059 100644
--- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll
@@ -8,10 +8,16 @@ declare void @usef(<4 x float>)
; Eliminating an insert is profitable.
define <16 x i8> @ins0_ins0_add(i8 %x, i8 %y) {
-; CHECK-LABEL: @ins0_ins0_add(
-; CHECK-NEXT: [[R_SCALAR:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <16 x i8> poison, i8 [[R_SCALAR]], i64 0
-; CHECK-NEXT: ret <16 x i8> [[R]]
+; SSE-LABEL: @ins0_ins0_add(
+; SSE-NEXT: [[I0:%.*]] = insertelement <16 x i8> poison, i8 [[X:%.*]], i32 0
+; SSE-NEXT: [[I1:%.*]] = insertelement <16 x i8> poison, i8 [[Y:%.*]], i32 0
+; SSE-NEXT: [[R:%.*]] = add <16 x i8> [[I0]], [[I1]]
+; SSE-NEXT: ret <16 x i8> [[R]]
+;
+; AVX-LABEL: @ins0_ins0_add(
+; AVX-NEXT: [[R_SCALAR:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
+; AVX-NEXT: [[R:%.*]] = insertelement <16 x i8> poison, i8 [[R_SCALAR]], i64 0
+; AVX-NEXT: ret <16 x i8> [[R]]
;
%i0 = insertelement <16 x i8> poison, i8 %x, i32 0
%i1 = insertelement <16 x i8> poison, i8 %y, i32 0
@@ -155,12 +161,19 @@ define <2 x i64> @ins1_ins1_urem(i64 %x, i64 %y) {
; Extra use is accounted for in cost calculation.
define <4 x i32> @ins0_ins0_xor(i32 %x, i32 %y) {
-; CHECK-LABEL: @ins0_ins0_xor(
-; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
-; CHECK-NEXT: call void @use(<4 x i32> [[I0]])
-; CHECK-NEXT: [[R_SCALAR:%.*]] = xor i32 [[X]], [[Y:%.*]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[R_SCALAR]], i64 0
-; CHECK-NEXT: ret <4 x i32> [[R]]
+; SSE-LABEL: @ins0_ins0_xor(
+; SSE-NEXT: [[I0:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
+; SSE-NEXT: call void @use(<4 x i32> [[I0]])
+; SSE-NEXT: [[I1:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i32 0
+; SSE-NEXT: [[R:%.*]] = xor <4 x i32> [[I0]], [[I1]]
+; SSE-NEXT: ret <4 x i32> [[R]]
+;
+; AVX-LABEL: @ins0_ins0_xor(
+; AVX-NEXT: [[I0:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
+; AVX-NEXT: call void @use(<4 x i32> [[I0]])
+; AVX-NEXT: [[R_SCALAR:%.*]] = xor i32 [[X]], [[Y:%.*]]
+; AVX-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[R_SCALAR]], i64 0
+; AVX-NEXT: ret <4 x i32> [[R]]
;
%i0 = insertelement <4 x i32> poison, i32 %x, i32 0
call void @use(<4 x i32> %i0)
diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant-inseltpoison.ll
index 05251cb829b2b..751539aa0f431 100644
--- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant-inseltpoison.ll
@@ -3,10 +3,15 @@
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
define <2 x i64> @add_constant(i64 %x) {
-; CHECK-LABEL: @add_constant(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @add_constant(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = add <2 x i64> [[INS]], <i64 42, i64 undef>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @add_constant(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> poison, i64 %x, i32 0
%bo = add <2 x i64> %ins, <i64 42, i64 undef>
@@ -14,10 +19,15 @@ define <2 x i64> @add_constant(i64 %x) {
}
define <2 x i64> @add_constant_not_undef_lane(i64 %x) {
-; CHECK-LABEL: @add_constant_not_undef_lane(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @add_constant_not_undef_lane(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = add <2 x i64> [[INS]], <i64 42, i64 -42>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @add_constant_not_undef_lane(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> poison, i64 %x, i32 0
%bo = add <2 x i64> %ins, <i64 42, i64 -42>
@@ -153,8 +163,8 @@ define <2 x i64> @shl_constant_op0_not_undef_lane(i64 %x) {
define <2 x i64> @shl_constant_op0_load(ptr %p) {
; CHECK-LABEL: @shl_constant_op0_load(
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[LD]], i32 1
-; CHECK-NEXT: [[BO:%.*]] = shl <2 x i64> <i64 undef, i64 2>, [[INS]]
+; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl i64 2, [[LD]]
+; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 1
; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ld = load i64, ptr %p
@@ -204,8 +214,8 @@ define <2 x i64> @shl_constant_op1_not_undef_lane(i64 %x) {
define <2 x i64> @shl_constant_op1_load(ptr %p) {
; CHECK-LABEL: @shl_constant_op1_load(
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[LD]], i32 0
-; CHECK-NEXT: [[BO:%.*]] = shl nuw <2 x i64> [[INS]], <i64 5, i64 2>
+; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl nuw i64 [[LD]], 5
+; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ld = load i64, ptr %p
@@ -479,10 +489,15 @@ define <2 x i64> @sdiv_constant_op1_not_undef_lane(i64 %x) {
}
define <2 x i64> @and_constant(i64 %x) {
-; CHECK-LABEL: @and_constant(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @and_constant(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = and <2 x i64> [[INS]], <i64 42, i64 undef>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @and_constant(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> poison, i64 %x, i32 0
%bo = and <2 x i64> %ins, <i64 42, i64 undef>
@@ -490,10 +505,15 @@ define <2 x i64> @and_constant(i64 %x) {
}
define <2 x i64> @and_constant_not_undef_lane(i64 %x) {
-; CHECK-LABEL: @and_constant_not_undef_lane(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @and_constant_not_undef_lane(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = and <2 x i64> [[INS]], <i64 42, i64 -42>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @and_constant_not_undef_lane(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> poison, i64 %x, i32 0
%bo = and <2 x i64> %ins, <i64 42, i64 -42>
@@ -523,10 +543,15 @@ define <2 x i64> @or_constant_not_undef_lane(i64 %x) {
}
define <2 x i64> @xor_constant(i64 %x) {
-; CHECK-LABEL: @xor_constant(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @xor_constant(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = xor <2 x i64> [[INS]], <i64 42, i64 undef>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @xor_constant(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> poison, i64 %x, i32 0
%bo = xor <2 x i64> %ins, <i64 42, i64 undef>
@@ -534,10 +559,15 @@ define <2 x i64> @xor_constant(i64 %x) {
}
define <2 x i64> @xor_constant_not_undef_lane(i64 %x) {
-; CHECK-LABEL: @xor_constant_not_undef_lane(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @xor_constant_not_undef_lane(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = xor <2 x i64> [[INS]], <i64 42, i64 -42>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @xor_constant_not_undef_lane(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> poison, i64 %x, i32 0
%bo = xor <2 x i64> %ins, <i64 42, i64 -42>
diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
index bbdd76c58b58e..2b4db0583e69c 100644
--- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
@@ -3,10 +3,15 @@
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
define <2 x i64> @add_constant(i64 %x) {
-; CHECK-LABEL: @add_constant(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @add_constant(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = add <2 x i64> [[INS]], <i64 42, i64 undef>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @add_constant(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
%bo = add <2 x i64> %ins, <i64 42, i64 undef>
@@ -14,10 +19,15 @@ define <2 x i64> @add_constant(i64 %x) {
}
define <2 x i64> @add_constant_not_undef_lane(i64 %x) {
-; CHECK-LABEL: @add_constant_not_undef_lane(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @add_constant_not_undef_lane(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = add <2 x i64> [[INS]], <i64 42, i64 -42>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @add_constant_not_undef_lane(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
%bo = add <2 x i64> %ins, <i64 42, i64 -42>
@@ -153,8 +163,8 @@ define <2 x i64> @shl_constant_op0_not_undef_lane(i64 %x) {
define <2 x i64> @shl_constant_op0_load(ptr %p) {
; CHECK-LABEL: @shl_constant_op0_load(
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[LD]], i32 1
-; CHECK-NEXT: [[BO:%.*]] = shl <2 x i64> <i64 undef, i64 2>, [[INS]]
+; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl i64 2, [[LD]]
+; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 1
; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ld = load i64, ptr %p
@@ -204,8 +214,8 @@ define <2 x i64> @shl_constant_op1_not_undef_lane(i64 %x) {
define <2 x i64> @shl_constant_op1_load(ptr %p) {
; CHECK-LABEL: @shl_constant_op1_load(
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[LD]], i32 0
-; CHECK-NEXT: [[BO:%.*]] = shl nuw <2 x i64> [[INS]], <i64 5, i64 2>
+; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl nuw i64 [[LD]], 5
+; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0
; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ld = load i64, ptr %p
@@ -479,10 +489,15 @@ define <2 x i64> @sdiv_constant_op1_not_undef_lane(i64 %x) {
}
define <2 x i64> @and_constant(i64 %x) {
-; CHECK-LABEL: @and_constant(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @and_constant(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = and <2 x i64> [[INS]], <i64 42, i64 undef>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @and_constant(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
%bo = and <2 x i64> %ins, <i64 42, i64 undef>
@@ -490,10 +505,15 @@ define <2 x i64> @and_constant(i64 %x) {
}
define <2 x i64> @and_constant_not_undef_lane(i64 %x) {
-; CHECK-LABEL: @and_constant_not_undef_lane(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @and_constant_not_undef_lane(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = and <2 x i64> [[INS]], <i64 42, i64 -42>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @and_constant_not_undef_lane(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
%bo = and <2 x i64> %ins, <i64 42, i64 -42>
@@ -523,10 +543,15 @@ define <2 x i64> @or_constant_not_undef_lane(i64 %x) {
}
define <2 x i64> @xor_constant(i64 %x) {
-; CHECK-LABEL: @xor_constant(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @xor_constant(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = xor <2 x i64> [[INS]], <i64 42, i64 undef>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @xor_constant(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
%bo = xor <2 x i64> %ins, <i64 42, i64 undef>
@@ -534,10 +559,15 @@ define <2 x i64> @xor_constant(i64 %x) {
}
define <2 x i64> @xor_constant_not_undef_lane(i64 %x) {
-; CHECK-LABEL: @xor_constant_not_undef_lane(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @xor_constant_not_undef_lane(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = xor <2 x i64> [[INS]], <i64 42, i64 -42>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @xor_constant_not_undef_lane(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
%bo = xor <2 x i64> %ins, <i64 42, i64 -42>
diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
index cd7e2ad2ca2c6..789ee7b3cdf0d 100644
--- a/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
@@ -8,10 +8,16 @@ declare void @usef(<4 x float>)
; Eliminating an insert is profitable.
define <16 x i8> @ins0_ins0_add(i8 %x, i8 %y) {
-; CHECK-LABEL: @ins0_ins0_add(
-; CHECK-NEXT: [[R_SCALAR:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <16 x i8> undef, i8 [[R_SCALAR]], i64 0
-; CHECK-NEXT: ret <16 x i8> [[R]]
+; SSE-LABEL: @ins0_ins0_add(
+; SSE-NEXT: [[I0:%.*]] = insertelement <16 x i8> undef, i8 [[X:%.*]], i32 0
+; SSE-NEXT: [[I1:%.*]] = insertelement <16 x i8> undef, i8 [[Y:%.*]], i32 0
+; SSE-NEXT: [[R:%.*]] = add <16 x i8> [[I0]], [[I1]]
+; SSE-NEXT: ret <16 x i8> [[R]]
+;
+; AVX-LABEL: @ins0_ins0_add(
+; AVX-NEXT: [[R_SCALAR:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
+; AVX-NEXT: [[R:%.*]] = insertelement <16 x i8> undef, i8 [[R_SCALAR]], i64 0
+; AVX-NEXT: ret <16 x i8> [[R]]
;
%i0 = insertelement <16 x i8> undef, i8 %x, i32 0
%i1 = insertelement <16 x i8> undef, i8 %y, i32 0
@@ -155,12 +161,19 @@ define <2 x i64> @ins1_ins1_urem(i64 %x, i64 %y) {
; Extra use is accounted for in cost calculation.
define <4 x i32> @ins0_ins0_xor(i32 %x, i32 %y) {
-; CHECK-LABEL: @ins0_ins0_xor(
-; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x i32> undef, i32 [[X:...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/137823
More information about the llvm-commits
mailing list