[llvm] [WIP][VectorCombine] Support nary intrinsics in scalarizeBinOpOrCmp (PR #138406)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Fri May 23 04:58:37 PDT 2025
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/138406
>From 1b9607aeec6b2a5267afb8e1b411a4b067de0cd3 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 29 Apr 2025 22:27:58 +0800
Subject: [PATCH 1/7] [VectorCombine][X86] Use updated getVectorInstrCost hook
This addresses a TODO where previously scalarizeBinopOrCmp conservatively bailed if one of the operands was a load.
getVectorInstrCost was updated to take in values in https://reviews.llvm.org/D140498 so we can pass in the scalar value to be inserted, which should return an accurate cost for a gather.
We want to remove this restriction on RISC-V since this is always profitable whether or not the scalar is a load.
On X86 this seems to prevent scalarization on SSE where the index is 0, because the cost of an insertion into undef goes from 12 -> 1 with the value passed into it. Is this correct? Or is there a way to fix this in X86TTIImpl::getVectorInstrCost? cc @alexey-bataev
---
.../Transforms/Vectorize/VectorCombine.cpp | 20 ++---
.../X86/insert-binop-inseltpoison.ll | 33 ++++---
...insert-binop-with-constant-inseltpoison.ll | 86 +++++++++++++------
.../X86/insert-binop-with-constant.ll | 86 +++++++++++++------
.../VectorCombine/X86/insert-binop.ll | 33 ++++---
.../X86/scalarize-cmp-inseltpoison.ll | 60 +++++++++----
.../VectorCombine/X86/scalarize-cmp.ll | 60 +++++++++----
7 files changed, 252 insertions(+), 126 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 04c084ffdda97..f046a7d305d51 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1066,14 +1066,6 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
VecTy1->getElementCount().getKnownMinValue() <= Index1)
return false;
- // Bail for single insertion if it is a load.
- // TODO: Handle this once getVectorInstrCost can cost for load/stores.
- auto *I0 = dyn_cast_or_null<Instruction>(V0);
- auto *I1 = dyn_cast_or_null<Instruction>(V1);
- if ((IsConst0 && I1 && I1->mayReadFromMemory()) ||
- (IsConst1 && I0 && I0->mayReadFromMemory()))
- return false;
-
uint64_t Index = IsConst0 ? Index1 : Index0;
Type *ScalarTy = IsConst0 ? V1->getType() : V0->getType();
Type *VecTy = I.getType();
@@ -1100,11 +1092,15 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
// both sequences.
InstructionCost InsertCost = TTI.getVectorInstrCost(
Instruction::InsertElement, VecTy, CostKind, Index);
- InstructionCost OldCost =
- (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
+ InstructionCost InsertCostV0 = TTI.getVectorInstrCost(
+ Instruction::InsertElement, VecTy, CostKind, Index, VecC0, V0);
+ InstructionCost InsertCostV1 = TTI.getVectorInstrCost(
+ Instruction::InsertElement, VecTy, CostKind, Index, VecC1, V1);
+ InstructionCost OldCost = (IsConst0 ? 0 : InsertCostV0) +
+ (IsConst1 ? 0 : InsertCostV1) + VectorOpCost;
InstructionCost NewCost = ScalarOpCost + InsertCost +
- (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) +
- (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost);
+ (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCostV0) +
+ (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCostV1);
// We want to scalarize unless the vector variant actually has lower cost.
if (OldCost < NewCost || !NewCost.isValid())
diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll
index c1100780254c1..76440c7047059 100644
--- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll
@@ -8,10 +8,16 @@ declare void @usef(<4 x float>)
; Eliminating an insert is profitable.
define <16 x i8> @ins0_ins0_add(i8 %x, i8 %y) {
-; CHECK-LABEL: @ins0_ins0_add(
-; CHECK-NEXT: [[R_SCALAR:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <16 x i8> poison, i8 [[R_SCALAR]], i64 0
-; CHECK-NEXT: ret <16 x i8> [[R]]
+; SSE-LABEL: @ins0_ins0_add(
+; SSE-NEXT: [[I0:%.*]] = insertelement <16 x i8> poison, i8 [[X:%.*]], i32 0
+; SSE-NEXT: [[I1:%.*]] = insertelement <16 x i8> poison, i8 [[Y:%.*]], i32 0
+; SSE-NEXT: [[R:%.*]] = add <16 x i8> [[I0]], [[I1]]
+; SSE-NEXT: ret <16 x i8> [[R]]
+;
+; AVX-LABEL: @ins0_ins0_add(
+; AVX-NEXT: [[R_SCALAR:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
+; AVX-NEXT: [[R:%.*]] = insertelement <16 x i8> poison, i8 [[R_SCALAR]], i64 0
+; AVX-NEXT: ret <16 x i8> [[R]]
;
%i0 = insertelement <16 x i8> poison, i8 %x, i32 0
%i1 = insertelement <16 x i8> poison, i8 %y, i32 0
@@ -155,12 +161,19 @@ define <2 x i64> @ins1_ins1_urem(i64 %x, i64 %y) {
; Extra use is accounted for in cost calculation.
define <4 x i32> @ins0_ins0_xor(i32 %x, i32 %y) {
-; CHECK-LABEL: @ins0_ins0_xor(
-; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
-; CHECK-NEXT: call void @use(<4 x i32> [[I0]])
-; CHECK-NEXT: [[R_SCALAR:%.*]] = xor i32 [[X]], [[Y:%.*]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[R_SCALAR]], i64 0
-; CHECK-NEXT: ret <4 x i32> [[R]]
+; SSE-LABEL: @ins0_ins0_xor(
+; SSE-NEXT: [[I0:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
+; SSE-NEXT: call void @use(<4 x i32> [[I0]])
+; SSE-NEXT: [[I1:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i32 0
+; SSE-NEXT: [[R:%.*]] = xor <4 x i32> [[I0]], [[I1]]
+; SSE-NEXT: ret <4 x i32> [[R]]
+;
+; AVX-LABEL: @ins0_ins0_xor(
+; AVX-NEXT: [[I0:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
+; AVX-NEXT: call void @use(<4 x i32> [[I0]])
+; AVX-NEXT: [[R_SCALAR:%.*]] = xor i32 [[X]], [[Y:%.*]]
+; AVX-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[R_SCALAR]], i64 0
+; AVX-NEXT: ret <4 x i32> [[R]]
;
%i0 = insertelement <4 x i32> poison, i32 %x, i32 0
call void @use(<4 x i32> %i0)
diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant-inseltpoison.ll
index 05251cb829b2b..751539aa0f431 100644
--- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant-inseltpoison.ll
@@ -3,10 +3,15 @@
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
define <2 x i64> @add_constant(i64 %x) {
-; CHECK-LABEL: @add_constant(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @add_constant(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = add <2 x i64> [[INS]], <i64 42, i64 undef>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @add_constant(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> poison, i64 %x, i32 0
%bo = add <2 x i64> %ins, <i64 42, i64 undef>
@@ -14,10 +19,15 @@ define <2 x i64> @add_constant(i64 %x) {
}
define <2 x i64> @add_constant_not_undef_lane(i64 %x) {
-; CHECK-LABEL: @add_constant_not_undef_lane(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @add_constant_not_undef_lane(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = add <2 x i64> [[INS]], <i64 42, i64 -42>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @add_constant_not_undef_lane(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> poison, i64 %x, i32 0
%bo = add <2 x i64> %ins, <i64 42, i64 -42>
@@ -153,8 +163,8 @@ define <2 x i64> @shl_constant_op0_not_undef_lane(i64 %x) {
define <2 x i64> @shl_constant_op0_load(ptr %p) {
; CHECK-LABEL: @shl_constant_op0_load(
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[LD]], i32 1
-; CHECK-NEXT: [[BO:%.*]] = shl <2 x i64> <i64 undef, i64 2>, [[INS]]
+; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl i64 2, [[LD]]
+; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 1
; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ld = load i64, ptr %p
@@ -204,8 +214,8 @@ define <2 x i64> @shl_constant_op1_not_undef_lane(i64 %x) {
define <2 x i64> @shl_constant_op1_load(ptr %p) {
; CHECK-LABEL: @shl_constant_op1_load(
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[LD]], i32 0
-; CHECK-NEXT: [[BO:%.*]] = shl nuw <2 x i64> [[INS]], <i64 5, i64 2>
+; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl nuw i64 [[LD]], 5
+; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ld = load i64, ptr %p
@@ -479,10 +489,15 @@ define <2 x i64> @sdiv_constant_op1_not_undef_lane(i64 %x) {
}
define <2 x i64> @and_constant(i64 %x) {
-; CHECK-LABEL: @and_constant(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @and_constant(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = and <2 x i64> [[INS]], <i64 42, i64 undef>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @and_constant(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> poison, i64 %x, i32 0
%bo = and <2 x i64> %ins, <i64 42, i64 undef>
@@ -490,10 +505,15 @@ define <2 x i64> @and_constant(i64 %x) {
}
define <2 x i64> @and_constant_not_undef_lane(i64 %x) {
-; CHECK-LABEL: @and_constant_not_undef_lane(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @and_constant_not_undef_lane(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = and <2 x i64> [[INS]], <i64 42, i64 -42>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @and_constant_not_undef_lane(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> poison, i64 %x, i32 0
%bo = and <2 x i64> %ins, <i64 42, i64 -42>
@@ -523,10 +543,15 @@ define <2 x i64> @or_constant_not_undef_lane(i64 %x) {
}
define <2 x i64> @xor_constant(i64 %x) {
-; CHECK-LABEL: @xor_constant(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @xor_constant(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = xor <2 x i64> [[INS]], <i64 42, i64 undef>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @xor_constant(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> poison, i64 %x, i32 0
%bo = xor <2 x i64> %ins, <i64 42, i64 undef>
@@ -534,10 +559,15 @@ define <2 x i64> @xor_constant(i64 %x) {
}
define <2 x i64> @xor_constant_not_undef_lane(i64 %x) {
-; CHECK-LABEL: @xor_constant_not_undef_lane(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @xor_constant_not_undef_lane(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = xor <2 x i64> [[INS]], <i64 42, i64 -42>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @xor_constant_not_undef_lane(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> poison, i64 %x, i32 0
%bo = xor <2 x i64> %ins, <i64 42, i64 -42>
diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
index bbdd76c58b58e..2b4db0583e69c 100644
--- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
@@ -3,10 +3,15 @@
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
define <2 x i64> @add_constant(i64 %x) {
-; CHECK-LABEL: @add_constant(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @add_constant(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = add <2 x i64> [[INS]], <i64 42, i64 undef>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @add_constant(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
%bo = add <2 x i64> %ins, <i64 42, i64 undef>
@@ -14,10 +19,15 @@ define <2 x i64> @add_constant(i64 %x) {
}
define <2 x i64> @add_constant_not_undef_lane(i64 %x) {
-; CHECK-LABEL: @add_constant_not_undef_lane(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @add_constant_not_undef_lane(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = add <2 x i64> [[INS]], <i64 42, i64 -42>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @add_constant_not_undef_lane(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
%bo = add <2 x i64> %ins, <i64 42, i64 -42>
@@ -153,8 +163,8 @@ define <2 x i64> @shl_constant_op0_not_undef_lane(i64 %x) {
define <2 x i64> @shl_constant_op0_load(ptr %p) {
; CHECK-LABEL: @shl_constant_op0_load(
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[LD]], i32 1
-; CHECK-NEXT: [[BO:%.*]] = shl <2 x i64> <i64 undef, i64 2>, [[INS]]
+; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl i64 2, [[LD]]
+; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 1
; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ld = load i64, ptr %p
@@ -204,8 +214,8 @@ define <2 x i64> @shl_constant_op1_not_undef_lane(i64 %x) {
define <2 x i64> @shl_constant_op1_load(ptr %p) {
; CHECK-LABEL: @shl_constant_op1_load(
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[LD]], i32 0
-; CHECK-NEXT: [[BO:%.*]] = shl nuw <2 x i64> [[INS]], <i64 5, i64 2>
+; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl nuw i64 [[LD]], 5
+; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0
; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ld = load i64, ptr %p
@@ -479,10 +489,15 @@ define <2 x i64> @sdiv_constant_op1_not_undef_lane(i64 %x) {
}
define <2 x i64> @and_constant(i64 %x) {
-; CHECK-LABEL: @and_constant(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @and_constant(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = and <2 x i64> [[INS]], <i64 42, i64 undef>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @and_constant(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
%bo = and <2 x i64> %ins, <i64 42, i64 undef>
@@ -490,10 +505,15 @@ define <2 x i64> @and_constant(i64 %x) {
}
define <2 x i64> @and_constant_not_undef_lane(i64 %x) {
-; CHECK-LABEL: @and_constant_not_undef_lane(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @and_constant_not_undef_lane(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = and <2 x i64> [[INS]], <i64 42, i64 -42>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @and_constant_not_undef_lane(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
%bo = and <2 x i64> %ins, <i64 42, i64 -42>
@@ -523,10 +543,15 @@ define <2 x i64> @or_constant_not_undef_lane(i64 %x) {
}
define <2 x i64> @xor_constant(i64 %x) {
-; CHECK-LABEL: @xor_constant(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @xor_constant(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = xor <2 x i64> [[INS]], <i64 42, i64 undef>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @xor_constant(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
%bo = xor <2 x i64> %ins, <i64 42, i64 undef>
@@ -534,10 +559,15 @@ define <2 x i64> @xor_constant(i64 %x) {
}
define <2 x i64> @xor_constant_not_undef_lane(i64 %x) {
-; CHECK-LABEL: @xor_constant_not_undef_lane(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @xor_constant_not_undef_lane(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = xor <2 x i64> [[INS]], <i64 42, i64 -42>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @xor_constant_not_undef_lane(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
%bo = xor <2 x i64> %ins, <i64 42, i64 -42>
diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
index cd7e2ad2ca2c6..789ee7b3cdf0d 100644
--- a/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
@@ -8,10 +8,16 @@ declare void @usef(<4 x float>)
; Eliminating an insert is profitable.
define <16 x i8> @ins0_ins0_add(i8 %x, i8 %y) {
-; CHECK-LABEL: @ins0_ins0_add(
-; CHECK-NEXT: [[R_SCALAR:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <16 x i8> undef, i8 [[R_SCALAR]], i64 0
-; CHECK-NEXT: ret <16 x i8> [[R]]
+; SSE-LABEL: @ins0_ins0_add(
+; SSE-NEXT: [[I0:%.*]] = insertelement <16 x i8> undef, i8 [[X:%.*]], i32 0
+; SSE-NEXT: [[I1:%.*]] = insertelement <16 x i8> undef, i8 [[Y:%.*]], i32 0
+; SSE-NEXT: [[R:%.*]] = add <16 x i8> [[I0]], [[I1]]
+; SSE-NEXT: ret <16 x i8> [[R]]
+;
+; AVX-LABEL: @ins0_ins0_add(
+; AVX-NEXT: [[R_SCALAR:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
+; AVX-NEXT: [[R:%.*]] = insertelement <16 x i8> undef, i8 [[R_SCALAR]], i64 0
+; AVX-NEXT: ret <16 x i8> [[R]]
;
%i0 = insertelement <16 x i8> undef, i8 %x, i32 0
%i1 = insertelement <16 x i8> undef, i8 %y, i32 0
@@ -155,12 +161,19 @@ define <2 x i64> @ins1_ins1_urem(i64 %x, i64 %y) {
; Extra use is accounted for in cost calculation.
define <4 x i32> @ins0_ins0_xor(i32 %x, i32 %y) {
-; CHECK-LABEL: @ins0_ins0_xor(
-; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
-; CHECK-NEXT: call void @use(<4 x i32> [[I0]])
-; CHECK-NEXT: [[R_SCALAR:%.*]] = xor i32 [[X]], [[Y:%.*]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[R_SCALAR]], i64 0
-; CHECK-NEXT: ret <4 x i32> [[R]]
+; SSE-LABEL: @ins0_ins0_xor(
+; SSE-NEXT: [[I0:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
+; SSE-NEXT: call void @use(<4 x i32> [[I0]])
+; SSE-NEXT: [[I1:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0
+; SSE-NEXT: [[R:%.*]] = xor <4 x i32> [[I0]], [[I1]]
+; SSE-NEXT: ret <4 x i32> [[R]]
+;
+; AVX-LABEL: @ins0_ins0_xor(
+; AVX-NEXT: [[I0:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
+; AVX-NEXT: call void @use(<4 x i32> [[I0]])
+; AVX-NEXT: [[R_SCALAR:%.*]] = xor i32 [[X]], [[Y:%.*]]
+; AVX-NEXT: [[R:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[R_SCALAR]], i64 0
+; AVX-NEXT: ret <4 x i32> [[R]]
;
%i0 = insertelement <4 x i32> undef, i32 %x, i32 0
call void @use(<4 x i32> %i0)
diff --git a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll
index 14b517d613de4..adef56256bc7d 100644
--- a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s
-; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX
declare void @use(<4 x i32>)
declare void @usef(<4 x float>)
@@ -8,10 +8,16 @@ declare void @usef(<4 x float>)
; Eliminating an insert is profitable.
define <16 x i1> @ins0_ins0_i8(i8 %x, i8 %y) {
-; CHECK-LABEL: @ins0_ins0_i8(
-; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <16 x i1> poison, i1 [[R_SCALAR]], i64 0
-; CHECK-NEXT: ret <16 x i1> [[R]]
+; SSE-LABEL: @ins0_ins0_i8(
+; SSE-NEXT: [[I0:%.*]] = insertelement <16 x i8> poison, i8 [[X:%.*]], i32 0
+; SSE-NEXT: [[I1:%.*]] = insertelement <16 x i8> poison, i8 [[Y:%.*]], i32 0
+; SSE-NEXT: [[R:%.*]] = icmp eq <16 x i8> [[I0]], [[I1]]
+; SSE-NEXT: ret <16 x i1> [[R]]
+;
+; AVX-LABEL: @ins0_ins0_i8(
+; AVX-NEXT: [[R_SCALAR:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]]
+; AVX-NEXT: [[R:%.*]] = insertelement <16 x i1> poison, i1 [[R_SCALAR]], i64 0
+; AVX-NEXT: ret <16 x i1> [[R]]
;
%i0 = insertelement <16 x i8> poison, i8 %x, i32 0
%i1 = insertelement <16 x i8> poison, i8 %y, i32 0
@@ -168,11 +174,17 @@ define <2 x i1> @constant_op1_i64_not_undef_lane(i64 %x) {
; negative test - load prevents the transform
define <2 x i1> @constant_op1_i64_load(ptr %p) {
-; CHECK-LABEL: @constant_op1_i64_load(
-; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[LD]], i32 0
-; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i64> [[INS]], <i64 42, i64 -42>
-; CHECK-NEXT: ret <2 x i1> [[R]]
+; SSE-LABEL: @constant_op1_i64_load(
+; SSE-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
+; SSE-NEXT: [[R_SCALAR:%.*]] = icmp eq i64 [[LD]], 42
+; SSE-NEXT: [[R:%.*]] = insertelement <2 x i1> poison, i1 [[R_SCALAR]], i64 0
+; SSE-NEXT: ret <2 x i1> [[R]]
+;
+; AVX-LABEL: @constant_op1_i64_load(
+; AVX-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
+; AVX-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[LD]], i32 0
+; AVX-NEXT: [[R:%.*]] = icmp eq <2 x i64> [[INS]], <i64 42, i64 -42>
+; AVX-NEXT: ret <2 x i1> [[R]]
;
%ld = load i64, ptr %p
%ins = insertelement <2 x i64> poison, i64 %ld, i32 0
@@ -236,10 +248,15 @@ define <2 x i1> @constant_op1_f64(double %x) {
}
define <4 x i1> @constant_op1_f32_not_undef_lane(float %x) {
-; CHECK-LABEL: @constant_op1_f32_not_undef_lane(
-; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp uge float [[X:%.*]], 4.200000e+01
-; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> poison, i1 [[R_SCALAR]], i64 0
-; CHECK-NEXT: ret <4 x i1> [[R]]
+; SSE-LABEL: @constant_op1_f32_not_undef_lane(
+; SSE-NEXT: [[INS:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i32 0
+; SSE-NEXT: [[R:%.*]] = fcmp uge <4 x float> [[INS]], <float 4.200000e+01, float -4.200000e+01, float 0.000000e+00, float 1.000000e+00>
+; SSE-NEXT: ret <4 x i1> [[R]]
+;
+; AVX-LABEL: @constant_op1_f32_not_undef_lane(
+; AVX-NEXT: [[R_SCALAR:%.*]] = fcmp uge float [[X:%.*]], 4.200000e+01
+; AVX-NEXT: [[R:%.*]] = insertelement <4 x i1> poison, i1 [[R_SCALAR]], i64 0
+; AVX-NEXT: ret <4 x i1> [[R]]
;
%ins = insertelement <4 x float> poison, float %x, i32 0
%r = fcmp uge <4 x float> %ins, <float 42.0, float -42.0, float 0.0, float 1.0>
@@ -279,10 +296,15 @@ define <4 x float> @vec_select_use2(<4 x float> %x, <4 x float> %y, float %a) {
}
define <4 x i1> @vector_of_pointers(ptr %t1) {
-; CHECK-LABEL: @vector_of_pointers(
-; CHECK-NEXT: [[T6_SCALAR:%.*]] = icmp ne ptr [[T1:%.*]], null
-; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x i1> poison, i1 [[T6_SCALAR]], i64 0
-; CHECK-NEXT: ret <4 x i1> [[T6]]
+; SSE-LABEL: @vector_of_pointers(
+; SSE-NEXT: [[T5:%.*]] = insertelement <4 x ptr> poison, ptr [[T1:%.*]], i32 0
+; SSE-NEXT: [[T6:%.*]] = icmp ne <4 x ptr> [[T5]], zeroinitializer
+; SSE-NEXT: ret <4 x i1> [[T6]]
+;
+; AVX-LABEL: @vector_of_pointers(
+; AVX-NEXT: [[T6_SCALAR:%.*]] = icmp ne ptr [[T1:%.*]], null
+; AVX-NEXT: [[T6:%.*]] = insertelement <4 x i1> poison, i1 [[T6_SCALAR]], i64 0
+; AVX-NEXT: ret <4 x i1> [[T6]]
;
%t5 = insertelement <4 x ptr> poison, ptr %t1, i32 0
%t6 = icmp ne <4 x ptr> %t5, zeroinitializer
diff --git a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll
index edd92c3f1c14c..0c585f20470c7 100644
--- a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s
-; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX
declare void @use(<4 x i32>)
declare void @usef(<4 x float>)
@@ -8,10 +8,16 @@ declare void @usef(<4 x float>)
; Eliminating an insert is profitable.
define <16 x i1> @ins0_ins0_i8(i8 %x, i8 %y) {
-; CHECK-LABEL: @ins0_ins0_i8(
-; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <16 x i1> undef, i1 [[R_SCALAR]], i64 0
-; CHECK-NEXT: ret <16 x i1> [[R]]
+; SSE-LABEL: @ins0_ins0_i8(
+; SSE-NEXT: [[I0:%.*]] = insertelement <16 x i8> undef, i8 [[X:%.*]], i32 0
+; SSE-NEXT: [[I1:%.*]] = insertelement <16 x i8> undef, i8 [[Y:%.*]], i32 0
+; SSE-NEXT: [[R:%.*]] = icmp eq <16 x i8> [[I0]], [[I1]]
+; SSE-NEXT: ret <16 x i1> [[R]]
+;
+; AVX-LABEL: @ins0_ins0_i8(
+; AVX-NEXT: [[R_SCALAR:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]]
+; AVX-NEXT: [[R:%.*]] = insertelement <16 x i1> undef, i1 [[R_SCALAR]], i64 0
+; AVX-NEXT: ret <16 x i1> [[R]]
;
%i0 = insertelement <16 x i8> undef, i8 %x, i32 0
%i1 = insertelement <16 x i8> undef, i8 %y, i32 0
@@ -168,11 +174,17 @@ define <2 x i1> @constant_op1_i64_not_undef_lane(i64 %x) {
; negative test - load prevents the transform
define <2 x i1> @constant_op1_i64_load(ptr %p) {
-; CHECK-LABEL: @constant_op1_i64_load(
-; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[LD]], i32 0
-; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i64> [[INS]], <i64 42, i64 -42>
-; CHECK-NEXT: ret <2 x i1> [[R]]
+; SSE-LABEL: @constant_op1_i64_load(
+; SSE-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
+; SSE-NEXT: [[R_SCALAR:%.*]] = icmp eq i64 [[LD]], 42
+; SSE-NEXT: [[R:%.*]] = insertelement <2 x i1> undef, i1 [[R_SCALAR]], i64 0
+; SSE-NEXT: ret <2 x i1> [[R]]
+;
+; AVX-LABEL: @constant_op1_i64_load(
+; AVX-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
+; AVX-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[LD]], i32 0
+; AVX-NEXT: [[R:%.*]] = icmp eq <2 x i64> [[INS]], <i64 42, i64 -42>
+; AVX-NEXT: ret <2 x i1> [[R]]
;
%ld = load i64, ptr %p
%ins = insertelement <2 x i64> undef, i64 %ld, i32 0
@@ -236,10 +248,15 @@ define <2 x i1> @constant_op1_f64(double %x) {
}
define <4 x i1> @constant_op1_f32_not_undef_lane(float %x) {
-; CHECK-LABEL: @constant_op1_f32_not_undef_lane(
-; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp uge float [[X:%.*]], 4.200000e+01
-; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> splat (i1 true), i1 [[R_SCALAR]], i64 0
-; CHECK-NEXT: ret <4 x i1> [[R]]
+; SSE-LABEL: @constant_op1_f32_not_undef_lane(
+; SSE-NEXT: [[INS:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 0
+; SSE-NEXT: [[R:%.*]] = fcmp uge <4 x float> [[INS]], <float 4.200000e+01, float -4.200000e+01, float 0.000000e+00, float 1.000000e+00>
+; SSE-NEXT: ret <4 x i1> [[R]]
+;
+; AVX-LABEL: @constant_op1_f32_not_undef_lane(
+; AVX-NEXT: [[R_SCALAR:%.*]] = fcmp uge float [[X:%.*]], 4.200000e+01
+; AVX-NEXT: [[R:%.*]] = insertelement <4 x i1> splat (i1 true), i1 [[R_SCALAR]], i64 0
+; AVX-NEXT: ret <4 x i1> [[R]]
;
%ins = insertelement <4 x float> undef, float %x, i32 0
%r = fcmp uge <4 x float> %ins, <float 42.0, float -42.0, float 0.0, float 1.0>
@@ -279,10 +296,15 @@ define <4 x float> @vec_select_use2(<4 x float> %x, <4 x float> %y, float %a) {
}
define <4 x i1> @vector_of_pointers(ptr %t1) {
-; CHECK-LABEL: @vector_of_pointers(
-; CHECK-NEXT: [[T6_SCALAR:%.*]] = icmp ne ptr [[T1:%.*]], null
-; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x i1> undef, i1 [[T6_SCALAR]], i64 0
-; CHECK-NEXT: ret <4 x i1> [[T6]]
+; SSE-LABEL: @vector_of_pointers(
+; SSE-NEXT: [[T5:%.*]] = insertelement <4 x ptr> undef, ptr [[T1:%.*]], i32 0
+; SSE-NEXT: [[T6:%.*]] = icmp ne <4 x ptr> [[T5]], zeroinitializer
+; SSE-NEXT: ret <4 x i1> [[T6]]
+;
+; AVX-LABEL: @vector_of_pointers(
+; AVX-NEXT: [[T6_SCALAR:%.*]] = icmp ne ptr [[T1:%.*]], null
+; AVX-NEXT: [[T6:%.*]] = insertelement <4 x i1> undef, i1 [[T6_SCALAR]], i64 0
+; AVX-NEXT: ret <4 x i1> [[T6]]
;
%t5 = insertelement <4 x ptr> undef, ptr %t1, i32 0
%t6 = icmp ne <4 x ptr> %t5, zeroinitializer
>From 7f011c4067083fa989e61f5623597d3fd828dc4b Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 30 Apr 2025 18:47:22 +0800
Subject: [PATCH 2/7] Update phaseordering tests
---
.../X86/scalarization-inseltpoison.ll | 26 ++++++++++++-------
.../PhaseOrdering/X86/scalarization.ll | 26 ++++++++++++-------
2 files changed, 34 insertions(+), 18 deletions(-)
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/scalarization-inseltpoison.ll b/llvm/test/Transforms/PhaseOrdering/X86/scalarization-inseltpoison.ll
index d36da8d028c60..6319e977bf35a 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/scalarization-inseltpoison.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/scalarization-inseltpoison.ll
@@ -12,21 +12,29 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define <4 x i32> @square(<4 x i32> %num, i32 %y, i32 %x, i32 %h, i32 %k, i32 %w, i32 %p, i32 %j, i32 %u) {
; CHECK-LABEL: @square(
; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[K:%.*]], 2
+; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[DIV]], i64 0
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[P:%.*]], 6234
+; CHECK-NEXT: [[SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL]], i64 0
; CHECK-NEXT: [[MUL5:%.*]] = mul nsw i32 [[H:%.*]], 75
+; CHECK-NEXT: [[SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[MUL5]], i64 0
; CHECK-NEXT: [[DIV9:%.*]] = sdiv i32 [[J:%.*]], 3452
+; CHECK-NEXT: [[SPLATINSERT10:%.*]] = insertelement <4 x i32> poison, i32 [[DIV9]], i64 0
; CHECK-NEXT: [[MUL13:%.*]] = mul nsw i32 [[W:%.*]], 53
+; CHECK-NEXT: [[SPLATINSERT14:%.*]] = insertelement <4 x i32> poison, i32 [[MUL13]], i64 0
; CHECK-NEXT: [[DIV17:%.*]] = sdiv i32 [[X:%.*]], 820
+; CHECK-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> poison, i32 [[DIV17]], i64 0
; CHECK-NEXT: [[MUL21:%.*]] = shl nsw i32 [[U:%.*]], 2
-; CHECK-NEXT: [[OP_RDX:%.*]] = add nsw i32 [[DIV17]], 317426
-; CHECK-NEXT: [[OP_RDX9:%.*]] = add nsw i32 [[DIV]], [[DIV9]]
-; CHECK-NEXT: [[OP_RDX10:%.*]] = add i32 [[MUL5]], [[MUL13]]
-; CHECK-NEXT: [[OP_RDX11:%.*]] = add i32 [[MUL]], [[MUL21]]
-; CHECK-NEXT: [[OP_RDX12:%.*]] = add i32 [[OP_RDX]], [[OP_RDX9]]
-; CHECK-NEXT: [[OP_RDX13:%.*]] = add i32 [[OP_RDX10]], [[OP_RDX11]]
-; CHECK-NEXT: [[OP_RDX14:%.*]] = add i32 [[OP_RDX12]], [[OP_RDX13]]
-; CHECK-NEXT: [[OP_RDX15:%.*]] = add i32 [[OP_RDX14]], [[Y:%.*]]
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[OP_RDX15]], i64 0
+; CHECK-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> poison, i32 [[MUL21]], i64 0
+; CHECK-NEXT: [[SPLATINSERT25:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0
+; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[SPLATINSERT25]], <i32 1, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP9]], [[SPLATINSERT18]]
+; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP10]], [[SPLATINSERT6]]
+; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[SPLATINSERT]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[SPLATINSERT14]]
+; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[SPLATINSERT2]]
+; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP6]], [[SPLATINSERT10]]
+; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[SPLATINSERT22]]
+; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[TMP8]], <i32 317425, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[ADD29:%.*]] = add <4 x i32> [[TMP2]], [[NUM:%.*]]
; CHECK-NEXT: ret <4 x i32> [[ADD29]]
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll b/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll
index c3131a41c2b2e..5922b34985815 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll
@@ -12,21 +12,29 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define <4 x i32> @square(<4 x i32> %num, i32 %y, i32 %x, i32 %h, i32 %k, i32 %w, i32 %p, i32 %j, i32 %u) {
; CHECK-LABEL: @square(
; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[K:%.*]], 2
+; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[DIV]], i64 0
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[P:%.*]], 6234
+; CHECK-NEXT: [[SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL]], i64 0
; CHECK-NEXT: [[MUL5:%.*]] = mul nsw i32 [[H:%.*]], 75
+; CHECK-NEXT: [[SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[MUL5]], i64 0
; CHECK-NEXT: [[DIV9:%.*]] = sdiv i32 [[J:%.*]], 3452
+; CHECK-NEXT: [[SPLATINSERT10:%.*]] = insertelement <4 x i32> poison, i32 [[DIV9]], i64 0
; CHECK-NEXT: [[MUL13:%.*]] = mul nsw i32 [[W:%.*]], 53
+; CHECK-NEXT: [[SPLATINSERT14:%.*]] = insertelement <4 x i32> poison, i32 [[MUL13]], i64 0
; CHECK-NEXT: [[DIV17:%.*]] = sdiv i32 [[X:%.*]], 820
+; CHECK-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> poison, i32 [[DIV17]], i64 0
; CHECK-NEXT: [[MUL21:%.*]] = shl nsw i32 [[U:%.*]], 2
-; CHECK-NEXT: [[OP_RDX:%.*]] = add nsw i32 [[DIV17]], 317426
-; CHECK-NEXT: [[OP_RDX9:%.*]] = add nsw i32 [[DIV]], [[DIV9]]
-; CHECK-NEXT: [[OP_RDX10:%.*]] = add i32 [[MUL5]], [[MUL13]]
-; CHECK-NEXT: [[OP_RDX11:%.*]] = add i32 [[MUL]], [[MUL21]]
-; CHECK-NEXT: [[OP_RDX12:%.*]] = add i32 [[OP_RDX]], [[OP_RDX9]]
-; CHECK-NEXT: [[OP_RDX13:%.*]] = add i32 [[OP_RDX10]], [[OP_RDX11]]
-; CHECK-NEXT: [[OP_RDX14:%.*]] = add i32 [[OP_RDX12]], [[OP_RDX13]]
-; CHECK-NEXT: [[OP_RDX15:%.*]] = add i32 [[OP_RDX14]], [[Y:%.*]]
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[OP_RDX15]], i64 0
+; CHECK-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> poison, i32 [[MUL21]], i64 0
+; CHECK-NEXT: [[SPLATINSERT25:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0
+; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[SPLATINSERT25]], <i32 1, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP9]], [[SPLATINSERT18]]
+; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP10]], [[SPLATINSERT6]]
+; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[SPLATINSERT]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[SPLATINSERT14]]
+; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[SPLATINSERT2]]
+; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP6]], [[SPLATINSERT10]]
+; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[SPLATINSERT22]]
+; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[TMP8]], <i32 317425, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[ADD29:%.*]] = add <4 x i32> [[TMP2]], [[NUM:%.*]]
; CHECK-NEXT: ret <4 x i32> [[ADD29]]
>From b155053804be5d8934842fad4307852327889285 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 7 May 2025 18:57:27 +0800
Subject: [PATCH 3/7] Plumb through constant folded base vector to avoid x86
regressions
---
.../Transforms/Vectorize/VectorCombine.cpp | 23 +++--
.../X86/scalarization-inseltpoison.ll | 26 ++---
.../PhaseOrdering/X86/scalarization.ll | 26 ++---
.../X86/insert-binop-inseltpoison.ll | 33 ++-----
...insert-binop-with-constant-inseltpoison.ll | 78 +++++----------
.../X86/insert-binop-with-constant.ll | 94 +++++++++----------
.../VectorCombine/X86/insert-binop.ll | 14 +--
.../X86/scalarize-cmp-inseltpoison.ll | 40 +++-----
.../VectorCombine/X86/scalarize-cmp.ll | 27 ++----
9 files changed, 138 insertions(+), 223 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 9225d36a0cf61..6b91b5584365f 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -1088,20 +1089,25 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy, CostKind);
}
+ // Fold the vector constants in the original vectors into a new base vector.
+ Value *NewVecC =
+ IsCmp ? ConstantFoldCompareInstOperands(Pred, VecC0, VecC1, *DL)
+ : ConstantFoldBinaryOpOperands((Instruction::BinaryOps)Opcode,
+ VecC0, VecC1, *DL);
+
// Get cost estimate for the insert element. This cost will factor into
// both sequences.
- InstructionCost InsertCost = TTI.getVectorInstrCost(
- Instruction::InsertElement, VecTy, CostKind, Index);
+ InstructionCost InsertCostNewVecC = TTI.getVectorInstrCost(
+ Instruction::InsertElement, VecTy, CostKind, Index, NewVecC);
InstructionCost InsertCostV0 = TTI.getVectorInstrCost(
Instruction::InsertElement, VecTy, CostKind, Index, VecC0, V0);
InstructionCost InsertCostV1 = TTI.getVectorInstrCost(
Instruction::InsertElement, VecTy, CostKind, Index, VecC1, V1);
InstructionCost OldCost = (IsConst0 ? 0 : InsertCostV0) +
(IsConst1 ? 0 : InsertCostV1) + VectorOpCost;
- InstructionCost NewCost = ScalarOpCost + InsertCost +
+ InstructionCost NewCost = ScalarOpCost + InsertCostNewVecC +
(IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCostV0) +
(IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCostV1);
-
// We want to scalarize unless the vector variant actually has lower cost.
if (OldCost < NewCost || !NewCost.isValid())
return false;
@@ -1130,10 +1136,11 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
if (auto *ScalarInst = dyn_cast<Instruction>(Scalar))
ScalarInst->copyIRFlags(&I);
- // Fold the vector constants in the original vectors into a new base vector.
- Value *NewVecC =
- IsCmp ? Builder.CreateCmp(Pred, VecC0, VecC1)
- : Builder.CreateBinOp((Instruction::BinaryOps)Opcode, VecC0, VecC1);
+ // Create a new base vector in case the constant folding failed.
+ if (!NewVecC)
+ NewVecC = IsCmp ? Builder.CreateCmp(Pred, VecC0, VecC1)
+ : Builder.CreateBinOp((Instruction::BinaryOps)Opcode, VecC0,
+ VecC1);
Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index);
replaceValue(I, *Insert);
return true;
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/scalarization-inseltpoison.ll b/llvm/test/Transforms/PhaseOrdering/X86/scalarization-inseltpoison.ll
index 6319e977bf35a..d36da8d028c60 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/scalarization-inseltpoison.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/scalarization-inseltpoison.ll
@@ -12,29 +12,21 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define <4 x i32> @square(<4 x i32> %num, i32 %y, i32 %x, i32 %h, i32 %k, i32 %w, i32 %p, i32 %j, i32 %u) {
; CHECK-LABEL: @square(
; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[K:%.*]], 2
-; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[DIV]], i64 0
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[P:%.*]], 6234
-; CHECK-NEXT: [[SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL]], i64 0
; CHECK-NEXT: [[MUL5:%.*]] = mul nsw i32 [[H:%.*]], 75
-; CHECK-NEXT: [[SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[MUL5]], i64 0
; CHECK-NEXT: [[DIV9:%.*]] = sdiv i32 [[J:%.*]], 3452
-; CHECK-NEXT: [[SPLATINSERT10:%.*]] = insertelement <4 x i32> poison, i32 [[DIV9]], i64 0
; CHECK-NEXT: [[MUL13:%.*]] = mul nsw i32 [[W:%.*]], 53
-; CHECK-NEXT: [[SPLATINSERT14:%.*]] = insertelement <4 x i32> poison, i32 [[MUL13]], i64 0
; CHECK-NEXT: [[DIV17:%.*]] = sdiv i32 [[X:%.*]], 820
-; CHECK-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> poison, i32 [[DIV17]], i64 0
; CHECK-NEXT: [[MUL21:%.*]] = shl nsw i32 [[U:%.*]], 2
-; CHECK-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> poison, i32 [[MUL21]], i64 0
-; CHECK-NEXT: [[SPLATINSERT25:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0
-; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[SPLATINSERT25]], <i32 1, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP9]], [[SPLATINSERT18]]
-; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP10]], [[SPLATINSERT6]]
-; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[SPLATINSERT]]
-; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[SPLATINSERT14]]
-; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[SPLATINSERT2]]
-; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP6]], [[SPLATINSERT10]]
-; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[SPLATINSERT22]]
-; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[TMP8]], <i32 317425, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[OP_RDX:%.*]] = add nsw i32 [[DIV17]], 317426
+; CHECK-NEXT: [[OP_RDX9:%.*]] = add nsw i32 [[DIV]], [[DIV9]]
+; CHECK-NEXT: [[OP_RDX10:%.*]] = add i32 [[MUL5]], [[MUL13]]
+; CHECK-NEXT: [[OP_RDX11:%.*]] = add i32 [[MUL]], [[MUL21]]
+; CHECK-NEXT: [[OP_RDX12:%.*]] = add i32 [[OP_RDX]], [[OP_RDX9]]
+; CHECK-NEXT: [[OP_RDX13:%.*]] = add i32 [[OP_RDX10]], [[OP_RDX11]]
+; CHECK-NEXT: [[OP_RDX14:%.*]] = add i32 [[OP_RDX12]], [[OP_RDX13]]
+; CHECK-NEXT: [[OP_RDX15:%.*]] = add i32 [[OP_RDX14]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[OP_RDX15]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[ADD29:%.*]] = add <4 x i32> [[TMP2]], [[NUM:%.*]]
; CHECK-NEXT: ret <4 x i32> [[ADD29]]
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll b/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll
index 5922b34985815..c3131a41c2b2e 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll
@@ -12,29 +12,21 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define <4 x i32> @square(<4 x i32> %num, i32 %y, i32 %x, i32 %h, i32 %k, i32 %w, i32 %p, i32 %j, i32 %u) {
; CHECK-LABEL: @square(
; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[K:%.*]], 2
-; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[DIV]], i64 0
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[P:%.*]], 6234
-; CHECK-NEXT: [[SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL]], i64 0
; CHECK-NEXT: [[MUL5:%.*]] = mul nsw i32 [[H:%.*]], 75
-; CHECK-NEXT: [[SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[MUL5]], i64 0
; CHECK-NEXT: [[DIV9:%.*]] = sdiv i32 [[J:%.*]], 3452
-; CHECK-NEXT: [[SPLATINSERT10:%.*]] = insertelement <4 x i32> poison, i32 [[DIV9]], i64 0
; CHECK-NEXT: [[MUL13:%.*]] = mul nsw i32 [[W:%.*]], 53
-; CHECK-NEXT: [[SPLATINSERT14:%.*]] = insertelement <4 x i32> poison, i32 [[MUL13]], i64 0
; CHECK-NEXT: [[DIV17:%.*]] = sdiv i32 [[X:%.*]], 820
-; CHECK-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> poison, i32 [[DIV17]], i64 0
; CHECK-NEXT: [[MUL21:%.*]] = shl nsw i32 [[U:%.*]], 2
-; CHECK-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> poison, i32 [[MUL21]], i64 0
-; CHECK-NEXT: [[SPLATINSERT25:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0
-; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[SPLATINSERT25]], <i32 1, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP9]], [[SPLATINSERT18]]
-; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP10]], [[SPLATINSERT6]]
-; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[SPLATINSERT]]
-; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[SPLATINSERT14]]
-; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[SPLATINSERT2]]
-; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP6]], [[SPLATINSERT10]]
-; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[SPLATINSERT22]]
-; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[TMP8]], <i32 317425, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[OP_RDX:%.*]] = add nsw i32 [[DIV17]], 317426
+; CHECK-NEXT: [[OP_RDX9:%.*]] = add nsw i32 [[DIV]], [[DIV9]]
+; CHECK-NEXT: [[OP_RDX10:%.*]] = add i32 [[MUL5]], [[MUL13]]
+; CHECK-NEXT: [[OP_RDX11:%.*]] = add i32 [[MUL]], [[MUL21]]
+; CHECK-NEXT: [[OP_RDX12:%.*]] = add i32 [[OP_RDX]], [[OP_RDX9]]
+; CHECK-NEXT: [[OP_RDX13:%.*]] = add i32 [[OP_RDX10]], [[OP_RDX11]]
+; CHECK-NEXT: [[OP_RDX14:%.*]] = add i32 [[OP_RDX12]], [[OP_RDX13]]
+; CHECK-NEXT: [[OP_RDX15:%.*]] = add i32 [[OP_RDX14]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[OP_RDX15]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[ADD29:%.*]] = add <4 x i32> [[TMP2]], [[NUM:%.*]]
; CHECK-NEXT: ret <4 x i32> [[ADD29]]
diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll
index 76440c7047059..c1100780254c1 100644
--- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll
@@ -8,16 +8,10 @@ declare void @usef(<4 x float>)
; Eliminating an insert is profitable.
define <16 x i8> @ins0_ins0_add(i8 %x, i8 %y) {
-; SSE-LABEL: @ins0_ins0_add(
-; SSE-NEXT: [[I0:%.*]] = insertelement <16 x i8> poison, i8 [[X:%.*]], i32 0
-; SSE-NEXT: [[I1:%.*]] = insertelement <16 x i8> poison, i8 [[Y:%.*]], i32 0
-; SSE-NEXT: [[R:%.*]] = add <16 x i8> [[I0]], [[I1]]
-; SSE-NEXT: ret <16 x i8> [[R]]
-;
-; AVX-LABEL: @ins0_ins0_add(
-; AVX-NEXT: [[R_SCALAR:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
-; AVX-NEXT: [[R:%.*]] = insertelement <16 x i8> poison, i8 [[R_SCALAR]], i64 0
-; AVX-NEXT: ret <16 x i8> [[R]]
+; CHECK-LABEL: @ins0_ins0_add(
+; CHECK-NEXT: [[R_SCALAR:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[R:%.*]] = insertelement <16 x i8> poison, i8 [[R_SCALAR]], i64 0
+; CHECK-NEXT: ret <16 x i8> [[R]]
;
%i0 = insertelement <16 x i8> poison, i8 %x, i32 0
%i1 = insertelement <16 x i8> poison, i8 %y, i32 0
@@ -161,19 +155,12 @@ define <2 x i64> @ins1_ins1_urem(i64 %x, i64 %y) {
; Extra use is accounted for in cost calculation.
define <4 x i32> @ins0_ins0_xor(i32 %x, i32 %y) {
-; SSE-LABEL: @ins0_ins0_xor(
-; SSE-NEXT: [[I0:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
-; SSE-NEXT: call void @use(<4 x i32> [[I0]])
-; SSE-NEXT: [[I1:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i32 0
-; SSE-NEXT: [[R:%.*]] = xor <4 x i32> [[I0]], [[I1]]
-; SSE-NEXT: ret <4 x i32> [[R]]
-;
-; AVX-LABEL: @ins0_ins0_xor(
-; AVX-NEXT: [[I0:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
-; AVX-NEXT: call void @use(<4 x i32> [[I0]])
-; AVX-NEXT: [[R_SCALAR:%.*]] = xor i32 [[X]], [[Y:%.*]]
-; AVX-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[R_SCALAR]], i64 0
-; AVX-NEXT: ret <4 x i32> [[R]]
+; CHECK-LABEL: @ins0_ins0_xor(
+; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
+; CHECK-NEXT: call void @use(<4 x i32> [[I0]])
+; CHECK-NEXT: [[R_SCALAR:%.*]] = xor i32 [[X]], [[Y:%.*]]
+; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[R_SCALAR]], i64 0
+; CHECK-NEXT: ret <4 x i32> [[R]]
;
%i0 = insertelement <4 x i32> poison, i32 %x, i32 0
call void @use(<4 x i32> %i0)
diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant-inseltpoison.ll
index f1ce944d9836b..564c9a795a794 100644
--- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant-inseltpoison.ll
@@ -3,15 +3,10 @@
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
define <2 x i64> @add_constant(i64 %x) {
-; SSE-LABEL: @add_constant(
-; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
-; SSE-NEXT: [[BO:%.*]] = add <2 x i64> [[INS]], <i64 42, i64 undef>
-; SSE-NEXT: ret <2 x i64> [[BO]]
-;
-; AVX-LABEL: @add_constant(
-; AVX-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
-; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
-; AVX-NEXT: ret <2 x i64> [[BO]]
+; CHECK-LABEL: @add_constant(
+; CHECK-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
+; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> poison, i64 %x, i32 0
%bo = add <2 x i64> %ins, <i64 42, i64 undef>
@@ -19,15 +14,10 @@ define <2 x i64> @add_constant(i64 %x) {
}
define <2 x i64> @add_constant_not_undef_lane(i64 %x) {
-; SSE-LABEL: @add_constant_not_undef_lane(
-; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
-; SSE-NEXT: [[BO:%.*]] = add <2 x i64> [[INS]], <i64 42, i64 -42>
-; SSE-NEXT: ret <2 x i64> [[BO]]
-;
-; AVX-LABEL: @add_constant_not_undef_lane(
-; AVX-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
-; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
-; AVX-NEXT: ret <2 x i64> [[BO]]
+; CHECK-LABEL: @add_constant_not_undef_lane(
+; CHECK-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
+; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> poison, i64 %x, i32 0
%bo = add <2 x i64> %ins, <i64 42, i64 -42>
@@ -489,15 +479,10 @@ define <2 x i64> @sdiv_constant_op1_not_undef_lane(i64 %x) {
}
define <2 x i64> @and_constant(i64 %x) {
-; SSE-LABEL: @and_constant(
-; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
-; SSE-NEXT: [[BO:%.*]] = and <2 x i64> [[INS]], <i64 42, i64 undef>
-; SSE-NEXT: ret <2 x i64> [[BO]]
-;
-; AVX-LABEL: @and_constant(
-; AVX-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
-; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
-; AVX-NEXT: ret <2 x i64> [[BO]]
+; CHECK-LABEL: @and_constant(
+; CHECK-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
+; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> poison, i64 %x, i32 0
%bo = and <2 x i64> %ins, <i64 42, i64 undef>
@@ -505,15 +490,10 @@ define <2 x i64> @and_constant(i64 %x) {
}
define <2 x i64> @and_constant_not_undef_lane(i64 %x) {
-; SSE-LABEL: @and_constant_not_undef_lane(
-; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
-; SSE-NEXT: [[BO:%.*]] = and <2 x i64> [[INS]], <i64 42, i64 -42>
-; SSE-NEXT: ret <2 x i64> [[BO]]
-;
-; AVX-LABEL: @and_constant_not_undef_lane(
-; AVX-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
-; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
-; AVX-NEXT: ret <2 x i64> [[BO]]
+; CHECK-LABEL: @and_constant_not_undef_lane(
+; CHECK-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
+; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> poison, i64 %x, i32 0
%bo = and <2 x i64> %ins, <i64 42, i64 -42>
@@ -543,15 +523,10 @@ define <2 x i64> @or_constant_not_undef_lane(i64 %x) {
}
define <2 x i64> @xor_constant(i64 %x) {
-; SSE-LABEL: @xor_constant(
-; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
-; SSE-NEXT: [[BO:%.*]] = xor <2 x i64> [[INS]], <i64 42, i64 undef>
-; SSE-NEXT: ret <2 x i64> [[BO]]
-;
-; AVX-LABEL: @xor_constant(
-; AVX-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
-; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
-; AVX-NEXT: ret <2 x i64> [[BO]]
+; CHECK-LABEL: @xor_constant(
+; CHECK-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
+; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> poison, i64 %x, i32 0
%bo = xor <2 x i64> %ins, <i64 42, i64 undef>
@@ -559,15 +534,10 @@ define <2 x i64> @xor_constant(i64 %x) {
}
define <2 x i64> @xor_constant_not_undef_lane(i64 %x) {
-; SSE-LABEL: @xor_constant_not_undef_lane(
-; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
-; SSE-NEXT: [[BO:%.*]] = xor <2 x i64> [[INS]], <i64 42, i64 -42>
-; SSE-NEXT: ret <2 x i64> [[BO]]
-;
-; AVX-LABEL: @xor_constant_not_undef_lane(
-; AVX-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
-; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
-; AVX-NEXT: ret <2 x i64> [[BO]]
+; CHECK-LABEL: @xor_constant_not_undef_lane(
+; CHECK-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
+; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> poison, i64 %x, i32 0
%bo = xor <2 x i64> %ins, <i64 42, i64 -42>
diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
index 7c3a40393c6c6..cf3bd00527f81 100644
--- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
@@ -3,15 +3,10 @@
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
define <2 x i64> @add_constant(i64 %x) {
-; SSE-LABEL: @add_constant(
-; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; SSE-NEXT: [[BO:%.*]] = add <2 x i64> [[INS]], <i64 42, i64 undef>
-; SSE-NEXT: ret <2 x i64> [[BO]]
-;
-; AVX-LABEL: @add_constant(
-; AVX-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
-; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
-; AVX-NEXT: ret <2 x i64> [[BO]]
+; CHECK-LABEL: @add_constant(
+; CHECK-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
+; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
+; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
%bo = add <2 x i64> %ins, <i64 42, i64 undef>
@@ -19,15 +14,10 @@ define <2 x i64> @add_constant(i64 %x) {
}
define <2 x i64> @add_constant_not_undef_lane(i64 %x) {
-; SSE-LABEL: @add_constant_not_undef_lane(
-; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; SSE-NEXT: [[BO:%.*]] = add <2 x i64> [[INS]], <i64 42, i64 -42>
-; SSE-NEXT: ret <2 x i64> [[BO]]
-;
-; AVX-LABEL: @add_constant_not_undef_lane(
-; AVX-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
-; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
-; AVX-NEXT: ret <2 x i64> [[BO]]
+; CHECK-LABEL: @add_constant_not_undef_lane(
+; CHECK-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
+; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
+; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
%bo = add <2 x i64> %ins, <i64 42, i64 -42>
@@ -559,15 +549,10 @@ define <2 x i64> @xor_constant(i64 %x) {
}
define <2 x i64> @xor_constant_not_undef_lane(i64 %x) {
-; SSE-LABEL: @xor_constant_not_undef_lane(
-; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; SSE-NEXT: [[BO:%.*]] = xor <2 x i64> [[INS]], <i64 42, i64 -42>
-; SSE-NEXT: ret <2 x i64> [[BO]]
-;
-; AVX-LABEL: @xor_constant_not_undef_lane(
-; AVX-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
-; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
-; AVX-NEXT: ret <2 x i64> [[BO]]
+; CHECK-LABEL: @xor_constant_not_undef_lane(
+; CHECK-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
+; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
+; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
%bo = xor <2 x i64> %ins, <i64 42, i64 -42>
@@ -576,8 +561,8 @@ define <2 x i64> @xor_constant_not_undef_lane(i64 %x) {
define <2 x double> @fadd_constant(double %x) {
; CHECK-LABEL: @fadd_constant(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = fadd double [[X:%.*]], 4.200000e+01
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
+; CHECK-NEXT: [[BO:%.*]] = fadd <2 x double> [[INS]], <double 4.200000e+01, double undef>
; CHECK-NEXT: ret <2 x double> [[BO]]
;
%ins = insertelement <2 x double> undef, double %x, i32 0
@@ -598,8 +583,8 @@ define <2 x double> @fadd_constant_not_undef_lane(double %x) {
define <2 x double> @fsub_constant_op0(double %x) {
; CHECK-LABEL: @fsub_constant_op0(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = fsub fast double 4.200000e+01, [[X:%.*]]
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
+; CHECK-NEXT: [[BO:%.*]] = fsub fast <2 x double> <double 4.200000e+01, double undef>, [[INS]]
; CHECK-NEXT: ret <2 x double> [[BO]]
;
%ins = insertelement <2 x double> undef, double %x, i32 0
@@ -631,8 +616,8 @@ define <2 x double> @fsub_constant_op1(double %x) {
define <2 x double> @fsub_constant_op1_not_undef_lane(double %x) {
; CHECK-LABEL: @fsub_constant_op1_not_undef_lane(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = fsub double [[X:%.*]], 4.200000e+01
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
+; CHECK-NEXT: [[BO:%.*]] = fsub <2 x double> [[INS]], <double 4.200000e+01, double -4.200000e+01>
; CHECK-NEXT: ret <2 x double> [[BO]]
;
%ins = insertelement <2 x double> undef, double %x, i32 0
@@ -642,8 +627,8 @@ define <2 x double> @fsub_constant_op1_not_undef_lane(double %x) {
define <2 x double> @fmul_constant(double %x) {
; CHECK-LABEL: @fmul_constant(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = fmul reassoc double [[X:%.*]], 4.200000e+01
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
+; CHECK-NEXT: [[BO:%.*]] = fmul reassoc <2 x double> [[INS]], <double 4.200000e+01, double undef>
; CHECK-NEXT: ret <2 x double> [[BO]]
;
%ins = insertelement <2 x double> undef, double %x, i32 0
@@ -674,10 +659,15 @@ define <2 x double> @fdiv_constant_op0(double %x) {
}
define <2 x double> @fdiv_constant_op0_not_undef_lane(double %x) {
-; CHECK-LABEL: @fdiv_constant_op0_not_undef_lane(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv ninf double 4.200000e+01, [[X:%.*]]
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x double> [[BO]]
+; SSE-LABEL: @fdiv_constant_op0_not_undef_lane(
+; SSE-NEXT: [[BO_SCALAR:%.*]] = fdiv ninf double 4.200000e+01, [[X:%.*]]
+; SSE-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
+; SSE-NEXT: ret <2 x double> [[BO]]
+;
+; AVX-LABEL: @fdiv_constant_op0_not_undef_lane(
+; AVX-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
+; AVX-NEXT: [[BO:%.*]] = fdiv ninf <2 x double> <double 4.200000e+01, double -4.200000e+01>, [[INS]]
+; AVX-NEXT: ret <2 x double> [[BO]]
;
%ins = insertelement <2 x double> undef, double %x, i32 0
%bo = fdiv ninf <2 x double> <double 42.0, double -42.0>, %ins
@@ -685,10 +675,15 @@ define <2 x double> @fdiv_constant_op0_not_undef_lane(double %x) {
}
define <2 x double> @fdiv_constant_op1(double %x) {
-; CHECK-LABEL: @fdiv_constant_op1(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x double> [[BO]]
+; SSE-LABEL: @fdiv_constant_op1(
+; SSE-NEXT: [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01
+; SSE-NEXT: [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
+; SSE-NEXT: ret <2 x double> [[BO]]
+;
+; AVX-LABEL: @fdiv_constant_op1(
+; AVX-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
+; AVX-NEXT: [[BO:%.*]] = fdiv <2 x double> [[INS]], <double 4.200000e+01, double undef>
+; AVX-NEXT: ret <2 x double> [[BO]]
;
%ins = insertelement <2 x double> undef, double %x, i32 0
%bo = fdiv <2 x double> %ins, <double 42.0, double undef>
@@ -696,10 +691,15 @@ define <2 x double> @fdiv_constant_op1(double %x) {
}
define <2 x double> @fdiv_constant_op1_not_undef_lane(double %x) {
-; CHECK-LABEL: @fdiv_constant_op1_not_undef_lane(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x double> [[BO]]
+; SSE-LABEL: @fdiv_constant_op1_not_undef_lane(
+; SSE-NEXT: [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01
+; SSE-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
+; SSE-NEXT: ret <2 x double> [[BO]]
+;
+; AVX-LABEL: @fdiv_constant_op1_not_undef_lane(
+; AVX-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
+; AVX-NEXT: [[BO:%.*]] = fdiv <2 x double> [[INS]], <double 4.200000e+01, double -4.200000e+01>
+; AVX-NEXT: ret <2 x double> [[BO]]
;
%ins = insertelement <2 x double> undef, double %x, i32 0
%bo = fdiv <2 x double> %ins, <double 42.0, double -42.0>
diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
index 789ee7b3cdf0d..334ad2e1ed7cc 100644
--- a/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
@@ -8,16 +8,10 @@ declare void @usef(<4 x float>)
; Eliminating an insert is profitable.
define <16 x i8> @ins0_ins0_add(i8 %x, i8 %y) {
-; SSE-LABEL: @ins0_ins0_add(
-; SSE-NEXT: [[I0:%.*]] = insertelement <16 x i8> undef, i8 [[X:%.*]], i32 0
-; SSE-NEXT: [[I1:%.*]] = insertelement <16 x i8> undef, i8 [[Y:%.*]], i32 0
-; SSE-NEXT: [[R:%.*]] = add <16 x i8> [[I0]], [[I1]]
-; SSE-NEXT: ret <16 x i8> [[R]]
-;
-; AVX-LABEL: @ins0_ins0_add(
-; AVX-NEXT: [[R_SCALAR:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
-; AVX-NEXT: [[R:%.*]] = insertelement <16 x i8> undef, i8 [[R_SCALAR]], i64 0
-; AVX-NEXT: ret <16 x i8> [[R]]
+; CHECK-LABEL: @ins0_ins0_add(
+; CHECK-NEXT: [[R_SCALAR:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[R:%.*]] = insertelement <16 x i8> undef, i8 [[R_SCALAR]], i64 0
+; CHECK-NEXT: ret <16 x i8> [[R]]
;
%i0 = insertelement <16 x i8> undef, i8 %x, i32 0
%i1 = insertelement <16 x i8> undef, i8 %y, i32 0
diff --git a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll
index adef56256bc7d..eac918a46474a 100644
--- a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll
@@ -8,16 +8,10 @@ declare void @usef(<4 x float>)
; Eliminating an insert is profitable.
define <16 x i1> @ins0_ins0_i8(i8 %x, i8 %y) {
-; SSE-LABEL: @ins0_ins0_i8(
-; SSE-NEXT: [[I0:%.*]] = insertelement <16 x i8> poison, i8 [[X:%.*]], i32 0
-; SSE-NEXT: [[I1:%.*]] = insertelement <16 x i8> poison, i8 [[Y:%.*]], i32 0
-; SSE-NEXT: [[R:%.*]] = icmp eq <16 x i8> [[I0]], [[I1]]
-; SSE-NEXT: ret <16 x i1> [[R]]
-;
-; AVX-LABEL: @ins0_ins0_i8(
-; AVX-NEXT: [[R_SCALAR:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]]
-; AVX-NEXT: [[R:%.*]] = insertelement <16 x i1> poison, i1 [[R_SCALAR]], i64 0
-; AVX-NEXT: ret <16 x i1> [[R]]
+; CHECK-LABEL: @ins0_ins0_i8(
+; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[R:%.*]] = insertelement <16 x i1> poison, i1 [[R_SCALAR]], i64 0
+; CHECK-NEXT: ret <16 x i1> [[R]]
;
%i0 = insertelement <16 x i8> poison, i8 %x, i32 0
%i1 = insertelement <16 x i8> poison, i8 %y, i32 0
@@ -248,15 +242,10 @@ define <2 x i1> @constant_op1_f64(double %x) {
}
define <4 x i1> @constant_op1_f32_not_undef_lane(float %x) {
-; SSE-LABEL: @constant_op1_f32_not_undef_lane(
-; SSE-NEXT: [[INS:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i32 0
-; SSE-NEXT: [[R:%.*]] = fcmp uge <4 x float> [[INS]], <float 4.200000e+01, float -4.200000e+01, float 0.000000e+00, float 1.000000e+00>
-; SSE-NEXT: ret <4 x i1> [[R]]
-;
-; AVX-LABEL: @constant_op1_f32_not_undef_lane(
-; AVX-NEXT: [[R_SCALAR:%.*]] = fcmp uge float [[X:%.*]], 4.200000e+01
-; AVX-NEXT: [[R:%.*]] = insertelement <4 x i1> poison, i1 [[R_SCALAR]], i64 0
-; AVX-NEXT: ret <4 x i1> [[R]]
+; CHECK-LABEL: @constant_op1_f32_not_undef_lane(
+; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp uge float [[X:%.*]], 4.200000e+01
+; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> poison, i1 [[R_SCALAR]], i64 0
+; CHECK-NEXT: ret <4 x i1> [[R]]
;
%ins = insertelement <4 x float> poison, float %x, i32 0
%r = fcmp uge <4 x float> %ins, <float 42.0, float -42.0, float 0.0, float 1.0>
@@ -296,15 +285,10 @@ define <4 x float> @vec_select_use2(<4 x float> %x, <4 x float> %y, float %a) {
}
define <4 x i1> @vector_of_pointers(ptr %t1) {
-; SSE-LABEL: @vector_of_pointers(
-; SSE-NEXT: [[T5:%.*]] = insertelement <4 x ptr> poison, ptr [[T1:%.*]], i32 0
-; SSE-NEXT: [[T6:%.*]] = icmp ne <4 x ptr> [[T5]], zeroinitializer
-; SSE-NEXT: ret <4 x i1> [[T6]]
-;
-; AVX-LABEL: @vector_of_pointers(
-; AVX-NEXT: [[T6_SCALAR:%.*]] = icmp ne ptr [[T1:%.*]], null
-; AVX-NEXT: [[T6:%.*]] = insertelement <4 x i1> poison, i1 [[T6_SCALAR]], i64 0
-; AVX-NEXT: ret <4 x i1> [[T6]]
+; CHECK-LABEL: @vector_of_pointers(
+; CHECK-NEXT: [[T6_SCALAR:%.*]] = icmp ne ptr [[T1:%.*]], null
+; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x i1> poison, i1 [[T6_SCALAR]], i64 0
+; CHECK-NEXT: ret <4 x i1> [[T6]]
;
%t5 = insertelement <4 x ptr> poison, ptr %t1, i32 0
%t6 = icmp ne <4 x ptr> %t5, zeroinitializer
diff --git a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll
index 0c585f20470c7..22c1c57d71e98 100644
--- a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll
@@ -8,16 +8,10 @@ declare void @usef(<4 x float>)
; Eliminating an insert is profitable.
define <16 x i1> @ins0_ins0_i8(i8 %x, i8 %y) {
-; SSE-LABEL: @ins0_ins0_i8(
-; SSE-NEXT: [[I0:%.*]] = insertelement <16 x i8> undef, i8 [[X:%.*]], i32 0
-; SSE-NEXT: [[I1:%.*]] = insertelement <16 x i8> undef, i8 [[Y:%.*]], i32 0
-; SSE-NEXT: [[R:%.*]] = icmp eq <16 x i8> [[I0]], [[I1]]
-; SSE-NEXT: ret <16 x i1> [[R]]
-;
-; AVX-LABEL: @ins0_ins0_i8(
-; AVX-NEXT: [[R_SCALAR:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]]
-; AVX-NEXT: [[R:%.*]] = insertelement <16 x i1> undef, i1 [[R_SCALAR]], i64 0
-; AVX-NEXT: ret <16 x i1> [[R]]
+; CHECK-LABEL: @ins0_ins0_i8(
+; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[R:%.*]] = insertelement <16 x i1> undef, i1 [[R_SCALAR]], i64 0
+; CHECK-NEXT: ret <16 x i1> [[R]]
;
%i0 = insertelement <16 x i8> undef, i8 %x, i32 0
%i1 = insertelement <16 x i8> undef, i8 %y, i32 0
@@ -296,15 +290,10 @@ define <4 x float> @vec_select_use2(<4 x float> %x, <4 x float> %y, float %a) {
}
define <4 x i1> @vector_of_pointers(ptr %t1) {
-; SSE-LABEL: @vector_of_pointers(
-; SSE-NEXT: [[T5:%.*]] = insertelement <4 x ptr> undef, ptr [[T1:%.*]], i32 0
-; SSE-NEXT: [[T6:%.*]] = icmp ne <4 x ptr> [[T5]], zeroinitializer
-; SSE-NEXT: ret <4 x i1> [[T6]]
-;
-; AVX-LABEL: @vector_of_pointers(
-; AVX-NEXT: [[T6_SCALAR:%.*]] = icmp ne ptr [[T1:%.*]], null
-; AVX-NEXT: [[T6:%.*]] = insertelement <4 x i1> undef, i1 [[T6_SCALAR]], i64 0
-; AVX-NEXT: ret <4 x i1> [[T6]]
+; CHECK-LABEL: @vector_of_pointers(
+; CHECK-NEXT: [[T6_SCALAR:%.*]] = icmp ne ptr [[T1:%.*]], null
+; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x i1> undef, i1 [[T6_SCALAR]], i64 0
+; CHECK-NEXT: ret <4 x i1> [[T6]]
;
%t5 = insertelement <4 x ptr> undef, ptr %t1, i32 0
%t6 = icmp ne <4 x ptr> %t5, zeroinitializer
>From 33afffecb186d66a043a301bb052295696d7231f Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 7 May 2025 19:01:17 +0800
Subject: [PATCH 4/7] Add RISC-V test case
---
.../VectorCombine/RISCV/binop-scalarize.ll | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
create mode 100644 llvm/test/Transforms/VectorCombine/RISCV/binop-scalarize.ll
diff --git a/llvm/test/Transforms/VectorCombine/RISCV/binop-scalarize.ll b/llvm/test/Transforms/VectorCombine/RISCV/binop-scalarize.ll
new file mode 100644
index 0000000000000..ec4f6cc7520d1
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/RISCV/binop-scalarize.ll
@@ -0,0 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -S -p vector-combine -mtriple=riscv64 -mattr=+v | FileCheck %s
+
+define <4 x i32> @add_constant_load(ptr %p) {
+; CHECK-LABEL: define <4 x i32> @add_constant_load(
+; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT: [[V_SCALAR:%.*]] = add i32 [[X]], 42
+; CHECK-NEXT: [[V:%.*]] = insertelement <4 x i32> poison, i32 [[V_SCALAR]], i64 0
+; CHECK-NEXT: ret <4 x i32> [[V]]
+;
+ %x = load i32, ptr %p
+ %ins = insertelement <4 x i32> poison, i32 %x, i32 0
+ %v = add <4 x i32> %ins, splat (i32 42)
+ ret <4 x i32> %v
+}
>From 269892afc31d980a3f8d9d3d78bb7222fee16045 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 7 May 2025 20:22:20 +0800
Subject: [PATCH 5/7] Remove negative test comments
---
.../Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll | 2 --
llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll | 2 --
2 files changed, 4 deletions(-)
diff --git a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll
index eac918a46474a..2c0b38a303aa2 100644
--- a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll
@@ -165,8 +165,6 @@ define <2 x i1> @constant_op1_i64_not_undef_lane(i64 %x) {
ret <2 x i1> %r
}
-; negative test - load prevents the transform
-
define <2 x i1> @constant_op1_i64_load(ptr %p) {
; SSE-LABEL: @constant_op1_i64_load(
; SSE-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
diff --git a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll
index 22c1c57d71e98..3c511e6ecd550 100644
--- a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll
@@ -165,8 +165,6 @@ define <2 x i1> @constant_op1_i64_not_undef_lane(i64 %x) {
ret <2 x i1> %r
}
-; negative test - load prevents the transform
-
define <2 x i1> @constant_op1_i64_load(ptr %p) {
; SSE-LABEL: @constant_op1_i64_load(
; SSE-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
>From 202eea6c6af8e822b37eb77473307c39107b18f2 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 23 May 2025 12:54:25 +0100
Subject: [PATCH 6/7] Precommit tests
---
.../VectorCombine/intrinsic-scalarize.ll | 56 +++++++++++++++++++
.../VectorCombine/unary-op-scalarize.ll | 26 +++++++++
2 files changed, 82 insertions(+)
create mode 100644 llvm/test/Transforms/VectorCombine/unary-op-scalarize.ll
diff --git a/llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll b/llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll
index e7683d72a052d..2a2e37e0ab54b 100644
--- a/llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll
+++ b/llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll
@@ -96,6 +96,62 @@ define <4 x i32> @non_trivially_vectorizable(i32 %x, i32 %y) {
ret <4 x i32> %v
}
+define <4 x float> @fabs_fixed(float %x) {
+; CHECK-LABEL: define <4 x float> @fabs_fixed(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <4 x float> poison, float [[X]], i32 0
+; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[X_INSERT]])
+; CHECK-NEXT: ret <4 x float> [[V]]
+;
+ %x.insert = insertelement <4 x float> poison, float %x, i32 0
+ %v = call <4 x float> @llvm.fabs(<4 x float> %x.insert)
+ ret <4 x float> %v
+}
+
+define <vscale x 4 x float> @fabs_scalable(float %x) {
+; CHECK-LABEL: define <vscale x 4 x float> @fabs_scalable(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[X]], i32 0
+; CHECK-NEXT: [[V:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[X_INSERT]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[V]]
+;
+ %x.insert = insertelement <vscale x 4 x float> poison, float %x, i32 0
+ %v = call <vscale x 4 x float> @llvm.fabs(<vscale x 4 x float> %x.insert)
+ ret <vscale x 4 x float> %v
+}
+
+define <4 x float> @fma_fixed(float %x, float %y, float %z) {
+; CHECK-LABEL: define <4 x float> @fma_fixed(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) {
+; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <4 x float> poison, float [[X]], i32 0
+; CHECK-NEXT: [[Y_INSERT:%.*]] = insertelement <4 x float> poison, float [[Y]], i32 0
+; CHECK-NEXT: [[Z_INSERT:%.*]] = insertelement <4 x float> poison, float [[Z]], i32 0
+; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[X_INSERT]], <4 x float> [[Y_INSERT]], <4 x float> [[Z_INSERT]])
+; CHECK-NEXT: ret <4 x float> [[V]]
+;
+ %x.insert = insertelement <4 x float> poison, float %x, i32 0
+ %y.insert = insertelement <4 x float> poison, float %y, i32 0
+ %z.insert = insertelement <4 x float> poison, float %z, i32 0
+ %v = call <4 x float> @llvm.fma(<4 x float> %x.insert, <4 x float> %y.insert, <4 x float> %z.insert)
+ ret <4 x float> %v
+}
+
+define <vscale x 4 x float> @fma_scalable(float %x, float %y, float %z) {
+; CHECK-LABEL: define <vscale x 4 x float> @fma_scalable(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) {
+; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[X]], i32 0
+; CHECK-NEXT: [[Y_INSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[Y]], i32 0
+; CHECK-NEXT: [[Z_INSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[Z]], i32 0
+; CHECK-NEXT: [[V:%.*]] = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> [[X_INSERT]], <vscale x 4 x float> [[Y_INSERT]], <vscale x 4 x float> [[Z_INSERT]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[V]]
+;
+ %x.insert = insertelement <vscale x 4 x float> poison, float %x, i32 0
+ %y.insert = insertelement <vscale x 4 x float> poison, float %y, i32 0
+ %z.insert = insertelement <vscale x 4 x float> poison, float %z, i32 0
+ %v = call <vscale x 4 x float> @llvm.fma(<vscale x 4 x float> %x.insert, <vscale x 4 x float> %y.insert, <vscale x 4 x float> %z.insert)
+ ret <vscale x 4 x float> %v
+}
+
; TODO: We should be able to scalarize this if we preserve the scalar argument.
define <4 x float> @scalar_argument(float %x) {
; CHECK-LABEL: define <4 x float> @scalar_argument(
diff --git a/llvm/test/Transforms/VectorCombine/unary-op-scalarize.ll b/llvm/test/Transforms/VectorCombine/unary-op-scalarize.ll
new file mode 100644
index 0000000000000..fd40b15706afb
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/unary-op-scalarize.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -S -p vector-combine | FileCheck %s
+
+define <4 x float> @fneg_fixed(float %x) {
+; CHECK-LABEL: define <4 x float> @fneg_fixed(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <4 x float> poison, float [[X]], i32 0
+; CHECK-NEXT: [[V:%.*]] = fneg <4 x float> [[X_INSERT]]
+; CHECK-NEXT: ret <4 x float> [[V]]
+;
+ %x.insert = insertelement <4 x float> poison, float %x, i32 0
+ %v = fneg <4 x float> %x.insert
+ ret <4 x float> %v
+}
+
+define <vscale x 4 x float> @fneg_scalable(float %x) {
+; CHECK-LABEL: define <vscale x 4 x float> @fneg_scalable(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[X]], i32 0
+; CHECK-NEXT: [[V:%.*]] = fneg <vscale x 4 x float> [[X_INSERT]]
+; CHECK-NEXT: ret <vscale x 4 x float> [[V]]
+;
+ %x.insert = insertelement <vscale x 4 x float> poison, float %x, i32 0
+ %v = fneg <vscale x 4 x float> %x.insert
+ ret <vscale x 4 x float> %v
+}
>From 8e12a1fe18dd23ce06de9296b888477cfc4bbf9b Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 23 May 2025 12:54:59 +0100
Subject: [PATCH 7/7] [VectorCombine] Scalarize nary ops and intrinsics
---
.../Transforms/Vectorize/VectorCombine.cpp | 190 +++++++++---------
.../VectorCombine/intrinsic-scalarize.ll | 24 +--
.../VectorCombine/unary-op-scalarize.ll | 8 +-
3 files changed, 114 insertions(+), 108 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 4413284aa3c2a..bf33292544497 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -47,7 +47,7 @@ STATISTIC(NumVecCmp, "Number of vector compares formed");
STATISTIC(NumVecBO, "Number of vector binops formed");
STATISTIC(NumVecCmpBO, "Number of vector compare + binop formed");
STATISTIC(NumShufOfBitcast, "Number of shuffles moved after bitcast");
-STATISTIC(NumScalarBO, "Number of scalar binops formed");
+STATISTIC(NumScalarOps, "Number of scalar unary + binary ops formed");
STATISTIC(NumScalarCmp, "Number of scalar compares formed");
STATISTIC(NumScalarIntrinsic, "Number of scalar intrinsic calls formed");
@@ -114,7 +114,7 @@ class VectorCombine {
bool foldInsExtBinop(Instruction &I);
bool foldInsExtVectorToShuffle(Instruction &I);
bool foldBitcastShuffle(Instruction &I);
- bool scalarizeBinopOrCmp(Instruction &I);
+ bool scalarizeOpOrCmp(Instruction &I);
bool scalarizeVPIntrinsic(Instruction &I);
bool foldExtractedCmps(Instruction &I);
bool foldBinopOfReductions(Instruction &I);
@@ -1018,28 +1018,20 @@ bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) {
return true;
}
-/// Match a vector binop, compare or binop-like intrinsic with at least one
-/// inserted scalar operand and convert to scalar binop/cmp/intrinsic followed
+/// Match a vector op/compare/intrinsic with at least one
+/// inserted scalar operand and convert to scalar op/cmp/intrinsic followed
/// by insertelement.
-bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
- CmpPredicate Pred = CmpInst::BAD_ICMP_PREDICATE;
- Value *Ins0, *Ins1;
- if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))) &&
- !match(&I, m_Cmp(Pred, m_Value(Ins0), m_Value(Ins1)))) {
- // TODO: Allow unary and ternary intrinsics
- // TODO: Allow intrinsics with different argument types
- // TODO: Allow intrinsics with scalar arguments
- if (auto *II = dyn_cast<IntrinsicInst>(&I);
- II && II->arg_size() == 2 &&
- isTriviallyVectorizable(II->getIntrinsicID()) &&
- all_of(II->args(),
- [&II](Value *Arg) { return Arg->getType() == II->getType(); })) {
- Ins0 = II->getArgOperand(0);
- Ins1 = II->getArgOperand(1);
- } else {
+bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {
+ if (!isa<UnaryOperator, BinaryOperator, CmpInst, IntrinsicInst>(I))
+ return false;
+
+ // TODO: Allow intrinsics with different argument types
+ // TODO: Allow intrinsics with scalar arguments
+ if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ if (!isTriviallyVectorizable(II->getIntrinsicID()) ||
+ !all_of(II->args(),
+ [&II](Value *Arg) { return Arg->getType() == II->getType(); }))
return false;
- }
- }
// Do not convert the vector condition of a vector select into a scalar
// condition. That may cause problems for codegen because of differences in
@@ -1050,42 +1042,47 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
if (match(U, m_Select(m_Specific(&I), m_Value(), m_Value())))
return false;
- // Match against one or both scalar values being inserted into constant
- // vectors:
- // vec_op VecC0, (inselt VecC1, V1, Index)
- // vec_op (inselt VecC0, V0, Index), VecC1
- // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index)
- // TODO: Deal with mismatched index constants and variable indexes?
- Constant *VecC0 = nullptr, *VecC1 = nullptr;
- Value *V0 = nullptr, *V1 = nullptr;
- uint64_t Index0 = 0, Index1 = 0;
- if (!match(Ins0, m_InsertElt(m_Constant(VecC0), m_Value(V0),
- m_ConstantInt(Index0))) &&
- !match(Ins0, m_Constant(VecC0)))
- return false;
- if (!match(Ins1, m_InsertElt(m_Constant(VecC1), m_Value(V1),
- m_ConstantInt(Index1))) &&
- !match(Ins1, m_Constant(VecC1)))
- return false;
-
- bool IsConst0 = !V0;
- bool IsConst1 = !V1;
- if (IsConst0 && IsConst1)
- return false;
- if (!IsConst0 && !IsConst1 && Index0 != Index1)
- return false;
+ // Match constant vectors or scalars being inserted into constant vectors:
+ // vec_op [VecC0 | (inselt VecC0, V0, Index)], ...
+ SmallVector<Constant *> VecCs;
+ SmallVector<Value *> ScalarOps;
+ std::optional<uint64_t> Index;
+
+ auto Ops = isa<IntrinsicInst>(I) ? cast<IntrinsicInst>(I).args()
+ : I.operand_values();
+ for (Value *Op : Ops) {
+ Constant *VecC;
+ Value *V;
+ uint64_t InsIdx = 0;
+ VectorType *OpTy = cast<VectorType>(Op->getType());
+ if (match(Op, m_InsertElt(m_Constant(VecC), m_Value(V),
+ m_ConstantInt(InsIdx)))) {
+ // Bail if any inserts are out of bounds.
+ if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
+ return false;
+ // All inserts must have the same index.
+ // TODO: Deal with mismatched index constants and variable indexes?
+ if (!Index)
+ Index = InsIdx;
+ else if (InsIdx != *Index)
+ return false;
+ VecCs.push_back(VecC);
+ ScalarOps.push_back(V);
+ } else if (match(Op, m_Constant(VecC))) {
+ VecCs.push_back(VecC);
+ ScalarOps.push_back(nullptr);
+ } else {
+ return false;
+ }
+ }
- auto *VecTy0 = cast<VectorType>(Ins0->getType());
- auto *VecTy1 = cast<VectorType>(Ins1->getType());
- if (VecTy0->getElementCount().getKnownMinValue() <= Index0 ||
- VecTy1->getElementCount().getKnownMinValue() <= Index1)
+ // Bail if all operands are constant.
+ if (!Index.has_value())
return false;
- uint64_t Index = IsConst0 ? Index1 : Index0;
- Type *ScalarTy = IsConst0 ? V1->getType() : V0->getType();
- Type *VecTy = I.getType();
+ VectorType *VecTy = cast<VectorType>(I.getType());
+ Type *ScalarTy = VecTy->getScalarType();
assert(VecTy->isVectorTy() &&
- (IsConst0 || IsConst1 || V0->getType() == V1->getType()) &&
(ScalarTy->isIntegerTy() || ScalarTy->isFloatingPointTy() ||
ScalarTy->isPointerTy()) &&
"Unexpected types for insert element into binop or cmp");
@@ -1098,7 +1095,7 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
Opcode, ScalarTy, CmpInst::makeCmpResultType(ScalarTy), Pred, CostKind);
VectorOpCost = TTI.getCmpSelInstrCost(
Opcode, VecTy, CmpInst::makeCmpResultType(VecTy), Pred, CostKind);
- } else if (isa<BinaryOperator>(I)) {
+ } else if (isa<UnaryOperator, BinaryOperator>(I)) {
ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy, CostKind);
VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy, CostKind);
} else {
@@ -1115,29 +1112,36 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
// Fold the vector constants in the original vectors into a new base vector to
// get more accurate cost modelling.
- Value *NewVecC;
- if (isa<CmpInst>(I))
- NewVecC = ConstantFoldCompareInstOperands(Pred, VecC0, VecC1, *DL);
+ Value *NewVecC = nullptr;
+ if (auto *CI = dyn_cast<CmpInst>(&I))
+ NewVecC = ConstantFoldCompareInstOperands(CI->getPredicate(), VecCs[0],
+ VecCs[1], *DL);
+ else if (isa<UnaryOperator>(I))
+ NewVecC = ConstantFoldUnaryOpOperand((Instruction::UnaryOps)Opcode,
+ VecCs[0], *DL);
else if (isa<BinaryOperator>(I))
NewVecC = ConstantFoldBinaryOpOperands((Instruction::BinaryOps)Opcode,
- VecC0, VecC1, *DL);
- else
- NewVecC = ConstantFoldBinaryIntrinsic(
- cast<IntrinsicInst>(I).getIntrinsicID(), VecC0, VecC1, I.getType(), &I);
+ VecCs[0], VecCs[1], *DL);
+ else if (isa<IntrinsicInst>(I) && cast<IntrinsicInst>(I).arg_size() == 2)
+ NewVecC =
+ ConstantFoldBinaryIntrinsic(cast<IntrinsicInst>(I).getIntrinsicID(),
+ VecCs[0], VecCs[1], I.getType(), &I);
// Get cost estimate for the insert element. This cost will factor into
// both sequences.
- InstructionCost InsertCostNewVecC = TTI.getVectorInstrCost(
- Instruction::InsertElement, VecTy, CostKind, Index, NewVecC);
- InstructionCost InsertCostV0 = TTI.getVectorInstrCost(
- Instruction::InsertElement, VecTy, CostKind, Index, VecC0, V0);
- InstructionCost InsertCostV1 = TTI.getVectorInstrCost(
- Instruction::InsertElement, VecTy, CostKind, Index, VecC1, V1);
- InstructionCost OldCost = (IsConst0 ? 0 : InsertCostV0) +
- (IsConst1 ? 0 : InsertCostV1) + VectorOpCost;
- InstructionCost NewCost = ScalarOpCost + InsertCostNewVecC +
- (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCostV0) +
- (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCostV1);
+ InstructionCost OldCost = VectorOpCost;
+ InstructionCost NewCost =
+ ScalarOpCost + TTI.getVectorInstrCost(Instruction::InsertElement, VecTy,
+ CostKind, *Index, NewVecC);
+ for (auto [Op, VecC, Scalar] : zip(Ops, VecCs, ScalarOps)) {
+ if (!Scalar)
+ continue;
+ InstructionCost InsertCost = TTI.getVectorInstrCost(
+ Instruction::InsertElement, VecTy, CostKind, *Index, VecC, Scalar);
+ OldCost += InsertCost;
+ NewCost += !Op->hasOneUse() * InsertCost;
+ }
+
// We want to scalarize unless the vector variant actually has lower cost.
if (OldCost < NewCost || !NewCost.isValid())
return false;
@@ -1146,25 +1150,25 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
// inselt NewVecC, (scalar_op V0, V1), Index
if (isa<CmpInst>(I))
++NumScalarCmp;
- else if (isa<BinaryOperator>(I))
- ++NumScalarBO;
+ else if (isa<UnaryOperator, BinaryOperator>(I))
+ ++NumScalarOps;
else if (isa<IntrinsicInst>(I))
++NumScalarIntrinsic;
// For constant cases, extract the scalar element, this should constant fold.
- if (IsConst0)
- V0 = ConstantExpr::getExtractElement(VecC0, Builder.getInt64(Index));
- if (IsConst1)
- V1 = ConstantExpr::getExtractElement(VecC1, Builder.getInt64(Index));
+ for (auto [OpIdx, Scalar, VecC] : enumerate(ScalarOps, VecCs))
+ if (!Scalar)
+ ScalarOps[OpIdx] = ConstantExpr::getExtractElement(
+ cast<Constant>(VecC), Builder.getInt64(*Index));
Value *Scalar;
- if (isa<CmpInst>(I))
- Scalar = Builder.CreateCmp(Pred, V0, V1);
- else if (isa<BinaryOperator>(I))
- Scalar = Builder.CreateBinOp((Instruction::BinaryOps)Opcode, V0, V1);
+ if (auto *CI = dyn_cast<CmpInst>(&I))
+ Scalar = Builder.CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);
+ else if (isa<UnaryOperator, BinaryOperator>(I))
+ Scalar = Builder.CreateNAryOp(Opcode, ScalarOps);
else
Scalar = Builder.CreateIntrinsic(
- ScalarTy, cast<IntrinsicInst>(I).getIntrinsicID(), {V0, V1});
+ ScalarTy, cast<IntrinsicInst>(I).getIntrinsicID(), ScalarOps);
Scalar->setName(I.getName() + ".scalar");
@@ -1175,16 +1179,18 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
// Create a new base vector if the constant folding failed.
if (!NewVecC) {
- if (isa<CmpInst>(I))
- NewVecC = Builder.CreateCmp(Pred, VecC0, VecC1);
- else if (isa<BinaryOperator>(I))
- NewVecC =
- Builder.CreateBinOp((Instruction::BinaryOps)Opcode, VecC0, VecC1);
+ SmallVector<Value *> VecCValues;
+ VecCValues.reserve(VecCs.size());
+ append_range(VecCValues, VecCs);
+ if (auto *CI = dyn_cast<CmpInst>(&I))
+ NewVecC = Builder.CreateCmp(CI->getPredicate(), VecCs[0], VecCs[1]);
+ else if (isa<UnaryOperator, BinaryOperator>(I))
+ NewVecC = Builder.CreateNAryOp(Opcode, VecCValues);
else
NewVecC = Builder.CreateIntrinsic(
- VecTy, cast<IntrinsicInst>(I).getIntrinsicID(), {VecC0, VecC1});
+ VecTy, cast<IntrinsicInst>(I).getIntrinsicID(), VecCValues);
}
- Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index);
+ Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, *Index);
replaceValue(I, *Insert);
return true;
}
@@ -3570,7 +3576,7 @@ bool VectorCombine::run() {
// This transform works with scalable and fixed vectors
// TODO: Identify and allow other scalable transforms
if (IsVectorType) {
- MadeChange |= scalarizeBinopOrCmp(I);
+ MadeChange |= scalarizeOpOrCmp(I);
MadeChange |= scalarizeLoadExtract(I);
MadeChange |= scalarizeVPIntrinsic(I);
MadeChange |= foldInterleaveIntrinsics(I);
diff --git a/llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll b/llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll
index 2a2e37e0ab54b..58b7f8de004d0 100644
--- a/llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll
+++ b/llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll
@@ -99,8 +99,9 @@ define <4 x i32> @non_trivially_vectorizable(i32 %x, i32 %y) {
define <4 x float> @fabs_fixed(float %x) {
; CHECK-LABEL: define <4 x float> @fabs_fixed(
; CHECK-SAME: float [[X:%.*]]) {
-; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <4 x float> poison, float [[X]], i32 0
-; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[X_INSERT]])
+; CHECK-NEXT: [[V_SCALAR:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> poison)
+; CHECK-NEXT: [[V:%.*]] = insertelement <4 x float> [[TMP1]], float [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x float> [[V]]
;
%x.insert = insertelement <4 x float> poison, float %x, i32 0
@@ -111,8 +112,9 @@ define <4 x float> @fabs_fixed(float %x) {
define <vscale x 4 x float> @fabs_scalable(float %x) {
; CHECK-LABEL: define <vscale x 4 x float> @fabs_scalable(
; CHECK-SAME: float [[X:%.*]]) {
-; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[X]], i32 0
-; CHECK-NEXT: [[V:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[X_INSERT]])
+; CHECK-NEXT: [[V_SCALAR:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> poison)
+; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x float> [[TMP1]], float [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <vscale x 4 x float> [[V]]
;
%x.insert = insertelement <vscale x 4 x float> poison, float %x, i32 0
@@ -123,10 +125,9 @@ define <vscale x 4 x float> @fabs_scalable(float %x) {
define <4 x float> @fma_fixed(float %x, float %y, float %z) {
; CHECK-LABEL: define <4 x float> @fma_fixed(
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) {
-; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <4 x float> poison, float [[X]], i32 0
-; CHECK-NEXT: [[Y_INSERT:%.*]] = insertelement <4 x float> poison, float [[Y]], i32 0
-; CHECK-NEXT: [[Z_INSERT:%.*]] = insertelement <4 x float> poison, float [[Z]], i32 0
-; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[X_INSERT]], <4 x float> [[Y_INSERT]], <4 x float> [[Z_INSERT]])
+; CHECK-NEXT: [[V_SCALAR:%.*]] = call float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]])
+; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> poison, <4 x float> poison, <4 x float> poison)
+; CHECK-NEXT: [[V:%.*]] = insertelement <4 x float> [[TMP1]], float [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x float> [[V]]
;
%x.insert = insertelement <4 x float> poison, float %x, i32 0
@@ -139,10 +140,9 @@ define <4 x float> @fma_fixed(float %x, float %y, float %z) {
define <vscale x 4 x float> @fma_scalable(float %x, float %y, float %z) {
; CHECK-LABEL: define <vscale x 4 x float> @fma_scalable(
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) {
-; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[X]], i32 0
-; CHECK-NEXT: [[Y_INSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[Y]], i32 0
-; CHECK-NEXT: [[Z_INSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[Z]], i32 0
-; CHECK-NEXT: [[V:%.*]] = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> [[X_INSERT]], <vscale x 4 x float> [[Y_INSERT]], <vscale x 4 x float> [[Z_INSERT]])
+; CHECK-NEXT: [[V_SCALAR:%.*]] = call float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]])
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison, <vscale x 4 x float> poison)
+; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x float> [[TMP1]], float [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <vscale x 4 x float> [[V]]
;
%x.insert = insertelement <vscale x 4 x float> poison, float %x, i32 0
diff --git a/llvm/test/Transforms/VectorCombine/unary-op-scalarize.ll b/llvm/test/Transforms/VectorCombine/unary-op-scalarize.ll
index fd40b15706afb..45d53c84c870d 100644
--- a/llvm/test/Transforms/VectorCombine/unary-op-scalarize.ll
+++ b/llvm/test/Transforms/VectorCombine/unary-op-scalarize.ll
@@ -4,8 +4,8 @@
define <4 x float> @fneg_fixed(float %x) {
; CHECK-LABEL: define <4 x float> @fneg_fixed(
; CHECK-SAME: float [[X:%.*]]) {
-; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <4 x float> poison, float [[X]], i32 0
-; CHECK-NEXT: [[V:%.*]] = fneg <4 x float> [[X_INSERT]]
+; CHECK-NEXT: [[V_SCALAR:%.*]] = fneg float [[X]]
+; CHECK-NEXT: [[V:%.*]] = insertelement <4 x float> poison, float [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x float> [[V]]
;
%x.insert = insertelement <4 x float> poison, float %x, i32 0
@@ -16,8 +16,8 @@ define <4 x float> @fneg_fixed(float %x) {
define <vscale x 4 x float> @fneg_scalable(float %x) {
; CHECK-LABEL: define <vscale x 4 x float> @fneg_scalable(
; CHECK-SAME: float [[X:%.*]]) {
-; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[X]], i32 0
-; CHECK-NEXT: [[V:%.*]] = fneg <vscale x 4 x float> [[X_INSERT]]
+; CHECK-NEXT: [[V_SCALAR:%.*]] = fneg float [[X]]
+; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x float> poison, float [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <vscale x 4 x float> [[V]]
;
%x.insert = insertelement <vscale x 4 x float> poison, float %x, i32 0
More information about the llvm-commits
mailing list