[llvm] 97f6076 - [VectorCombine][X86] Use updated getVectorInstrCost hook (#137823)
via llvm-commits
llvm-commits at lists.llvm.org
Tue May 27 08:27:31 PDT 2025
Author: Luke Lau
Date: 2025-05-27T16:27:28+01:00
New Revision: 97f6076dedc6ba78ccdb36f53a6953a66abfd395
URL: https://github.com/llvm/llvm-project/commit/97f6076dedc6ba78ccdb36f53a6953a66abfd395
DIFF: https://github.com/llvm/llvm-project/commit/97f6076dedc6ba78ccdb36f53a6953a66abfd395.diff
LOG: [VectorCombine][X86] Use updated getVectorInstrCost hook (#137823)
This addresses a TODO where previously scalarizeBinopOrCmp
conservatively bailed if one of the operands was a load.
getVectorInstrCost was updated to take in values in
https://reviews.llvm.org/D140498 so we can pass in the scalar value to
be inserted, which should return an accurate cost for a gather.
To prevent regressions on x86 this tries to constant fold NewVecC up
front so we can pass it into TTI and get a more accurate cost.
We want to remove this restriction on RISC-V since this is always
profitable whether or not the scalar is a load.
Added:
llvm/test/Transforms/VectorCombine/RISCV/binop-scalarize.ll
Modified:
llvm/lib/Transforms/Vectorize/VectorCombine.cpp
llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant-inseltpoison.ll
llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
llvm/test/Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll
llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index fe1d930f295ce..4413284aa3c2a 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -1080,14 +1081,6 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
VecTy1->getElementCount().getKnownMinValue() <= Index1)
return false;
- // Bail for single insertion if it is a load.
- // TODO: Handle this once getVectorInstrCost can cost for load/stores.
- auto *I0 = dyn_cast_or_null<Instruction>(V0);
- auto *I1 = dyn_cast_or_null<Instruction>(V1);
- if ((IsConst0 && I1 && I1->mayReadFromMemory()) ||
- (IsConst1 && I0 && I0->mayReadFromMemory()))
- return false;
-
uint64_t Index = IsConst0 ? Index1 : Index0;
Type *ScalarTy = IsConst0 ? V1->getType() : V0->getType();
Type *VecTy = I.getType();
@@ -1120,16 +1113,31 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
VectorOpCost = TTI.getIntrinsicInstrCost(VectorICA, CostKind);
}
+ // Fold the vector constants in the original vectors into a new base vector to
+ // get more accurate cost modelling.
+ Value *NewVecC;
+ if (isa<CmpInst>(I))
+ NewVecC = ConstantFoldCompareInstOperands(Pred, VecC0, VecC1, *DL);
+ else if (isa<BinaryOperator>(I))
+ NewVecC = ConstantFoldBinaryOpOperands((Instruction::BinaryOps)Opcode,
+ VecC0, VecC1, *DL);
+ else
+ NewVecC = ConstantFoldBinaryIntrinsic(
+ cast<IntrinsicInst>(I).getIntrinsicID(), VecC0, VecC1, I.getType(), &I);
+
// Get cost estimate for the insert element. This cost will factor into
// both sequences.
- InstructionCost InsertCost = TTI.getVectorInstrCost(
- Instruction::InsertElement, VecTy, CostKind, Index);
- InstructionCost OldCost =
- (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
- InstructionCost NewCost = ScalarOpCost + InsertCost +
- (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) +
- (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost);
-
+ InstructionCost InsertCostNewVecC = TTI.getVectorInstrCost(
+ Instruction::InsertElement, VecTy, CostKind, Index, NewVecC);
+ InstructionCost InsertCostV0 = TTI.getVectorInstrCost(
+ Instruction::InsertElement, VecTy, CostKind, Index, VecC0, V0);
+ InstructionCost InsertCostV1 = TTI.getVectorInstrCost(
+ Instruction::InsertElement, VecTy, CostKind, Index, VecC1, V1);
+ InstructionCost OldCost = (IsConst0 ? 0 : InsertCostV0) +
+ (IsConst1 ? 0 : InsertCostV1) + VectorOpCost;
+ InstructionCost NewCost = ScalarOpCost + InsertCostNewVecC +
+ (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCostV0) +
+ (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCostV1);
// We want to scalarize unless the vector variant actually has lower cost.
if (OldCost < NewCost || !NewCost.isValid())
return false;
@@ -1165,15 +1173,17 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
if (auto *ScalarInst = dyn_cast<Instruction>(Scalar))
ScalarInst->copyIRFlags(&I);
- // Fold the vector constants in the original vectors into a new base vector.
- Value *NewVecC;
- if (isa<CmpInst>(I))
- NewVecC = Builder.CreateCmp(Pred, VecC0, VecC1);
- else if (isa<BinaryOperator>(I))
- NewVecC = Builder.CreateBinOp((Instruction::BinaryOps)Opcode, VecC0, VecC1);
- else
- NewVecC = Builder.CreateIntrinsic(
- VecTy, cast<IntrinsicInst>(I).getIntrinsicID(), {VecC0, VecC1});
+ // Create a new base vector if the constant folding failed.
+ if (!NewVecC) {
+ if (isa<CmpInst>(I))
+ NewVecC = Builder.CreateCmp(Pred, VecC0, VecC1);
+ else if (isa<BinaryOperator>(I))
+ NewVecC =
+ Builder.CreateBinOp((Instruction::BinaryOps)Opcode, VecC0, VecC1);
+ else
+ NewVecC = Builder.CreateIntrinsic(
+ VecTy, cast<IntrinsicInst>(I).getIntrinsicID(), {VecC0, VecC1});
+ }
Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index);
replaceValue(I, *Insert);
return true;
diff --git a/llvm/test/Transforms/VectorCombine/RISCV/binop-scalarize.ll b/llvm/test/Transforms/VectorCombine/RISCV/binop-scalarize.ll
new file mode 100644
index 0000000000000..ec4f6cc7520d1
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/RISCV/binop-scalarize.ll
@@ -0,0 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -S -p vector-combine -mtriple=riscv64 -mattr=+v | FileCheck %s
+
+define <4 x i32> @add_constant_load(ptr %p) {
+; CHECK-LABEL: define <4 x i32> @add_constant_load(
+; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT: [[V_SCALAR:%.*]] = add i32 [[X]], 42
+; CHECK-NEXT: [[V:%.*]] = insertelement <4 x i32> poison, i32 [[V_SCALAR]], i64 0
+; CHECK-NEXT: ret <4 x i32> [[V]]
+;
+ %x = load i32, ptr %p
+ %ins = insertelement <4 x i32> poison, i32 %x, i32 0
+ %v = add <4 x i32> %ins, splat (i32 42)
+ ret <4 x i32> %v
+}
diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant-inseltpoison.ll
index d45d5f4d44ff3..564c9a795a794 100644
--- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant-inseltpoison.ll
@@ -153,8 +153,8 @@ define <2 x i64> @shl_constant_op0_not_undef_lane(i64 %x) {
define <2 x i64> @shl_constant_op0_load(ptr %p) {
; CHECK-LABEL: @shl_constant_op0_load(
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[LD]], i32 1
-; CHECK-NEXT: [[BO:%.*]] = shl <2 x i64> <i64 undef, i64 2>, [[INS]]
+; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl i64 2, [[LD]]
+; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 1
; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ld = load i64, ptr %p
@@ -204,8 +204,8 @@ define <2 x i64> @shl_constant_op1_not_undef_lane(i64 %x) {
define <2 x i64> @shl_constant_op1_load(ptr %p) {
; CHECK-LABEL: @shl_constant_op1_load(
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[LD]], i32 0
-; CHECK-NEXT: [[BO:%.*]] = shl nuw <2 x i64> [[INS]], <i64 5, i64 2>
+; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl nuw i64 [[LD]], 5
+; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ld = load i64, ptr %p
diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
index 2b5a58ea44de4..cf3bd00527f81 100644
--- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
@@ -153,8 +153,8 @@ define <2 x i64> @shl_constant_op0_not_undef_lane(i64 %x) {
define <2 x i64> @shl_constant_op0_load(ptr %p) {
; CHECK-LABEL: @shl_constant_op0_load(
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[LD]], i32 1
-; CHECK-NEXT: [[BO:%.*]] = shl <2 x i64> <i64 undef, i64 2>, [[INS]]
+; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl i64 2, [[LD]]
+; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 1
; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ld = load i64, ptr %p
@@ -204,8 +204,8 @@ define <2 x i64> @shl_constant_op1_not_undef_lane(i64 %x) {
define <2 x i64> @shl_constant_op1_load(ptr %p) {
; CHECK-LABEL: @shl_constant_op1_load(
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[LD]], i32 0
-; CHECK-NEXT: [[BO:%.*]] = shl nuw <2 x i64> [[INS]], <i64 5, i64 2>
+; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl nuw i64 [[LD]], 5
+; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0
; CHECK-NEXT: ret <2 x i64> [[BO]]
;
%ld = load i64, ptr %p
@@ -479,10 +479,15 @@ define <2 x i64> @sdiv_constant_op1_not_undef_lane(i64 %x) {
}
define <2 x i64> @and_constant(i64 %x) {
-; CHECK-LABEL: @and_constant(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @and_constant(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = and <2 x i64> [[INS]], <i64 42, i64 undef>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @and_constant(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
%bo = and <2 x i64> %ins, <i64 42, i64 undef>
@@ -490,10 +495,15 @@ define <2 x i64> @and_constant(i64 %x) {
}
define <2 x i64> @and_constant_not_undef_lane(i64 %x) {
-; CHECK-LABEL: @and_constant_not_undef_lane(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @and_constant_not_undef_lane(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = and <2 x i64> [[INS]], <i64 42, i64 -42>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @and_constant_not_undef_lane(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
%bo = and <2 x i64> %ins, <i64 42, i64 -42>
@@ -523,10 +533,15 @@ define <2 x i64> @or_constant_not_undef_lane(i64 %x) {
}
define <2 x i64> @xor_constant(i64 %x) {
-; CHECK-LABEL: @xor_constant(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x i64> [[BO]]
+; SSE-LABEL: @xor_constant(
+; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
+; SSE-NEXT: [[BO:%.*]] = xor <2 x i64> [[INS]], <i64 42, i64 undef>
+; SSE-NEXT: ret <2 x i64> [[BO]]
+;
+; AVX-LABEL: @xor_constant(
+; AVX-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
+; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 0
+; AVX-NEXT: ret <2 x i64> [[BO]]
;
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
%bo = xor <2 x i64> %ins, <i64 42, i64 undef>
@@ -546,8 +561,8 @@ define <2 x i64> @xor_constant_not_undef_lane(i64 %x) {
define <2 x double> @fadd_constant(double %x) {
; CHECK-LABEL: @fadd_constant(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = fadd double [[X:%.*]], 4.200000e+01
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
+; CHECK-NEXT: [[BO:%.*]] = fadd <2 x double> [[INS]], <double 4.200000e+01, double undef>
; CHECK-NEXT: ret <2 x double> [[BO]]
;
%ins = insertelement <2 x double> undef, double %x, i32 0
@@ -568,8 +583,8 @@ define <2 x double> @fadd_constant_not_undef_lane(double %x) {
define <2 x double> @fsub_constant_op0(double %x) {
; CHECK-LABEL: @fsub_constant_op0(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = fsub fast double 4.200000e+01, [[X:%.*]]
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
+; CHECK-NEXT: [[BO:%.*]] = fsub fast <2 x double> <double 4.200000e+01, double undef>, [[INS]]
; CHECK-NEXT: ret <2 x double> [[BO]]
;
%ins = insertelement <2 x double> undef, double %x, i32 0
@@ -601,8 +616,8 @@ define <2 x double> @fsub_constant_op1(double %x) {
define <2 x double> @fsub_constant_op1_not_undef_lane(double %x) {
; CHECK-LABEL: @fsub_constant_op1_not_undef_lane(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = fsub double [[X:%.*]], 4.200000e+01
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
+; CHECK-NEXT: [[BO:%.*]] = fsub <2 x double> [[INS]], <double 4.200000e+01, double -4.200000e+01>
; CHECK-NEXT: ret <2 x double> [[BO]]
;
%ins = insertelement <2 x double> undef, double %x, i32 0
@@ -612,8 +627,8 @@ define <2 x double> @fsub_constant_op1_not_undef_lane(double %x) {
define <2 x double> @fmul_constant(double %x) {
; CHECK-LABEL: @fmul_constant(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = fmul reassoc double [[X:%.*]], 4.200000e+01
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
+; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
+; CHECK-NEXT: [[BO:%.*]] = fmul reassoc <2 x double> [[INS]], <double 4.200000e+01, double undef>
; CHECK-NEXT: ret <2 x double> [[BO]]
;
%ins = insertelement <2 x double> undef, double %x, i32 0
@@ -644,10 +659,15 @@ define <2 x double> @fdiv_constant_op0(double %x) {
}
define <2 x double> @fdiv_constant_op0_not_undef_lane(double %x) {
-; CHECK-LABEL: @fdiv_constant_op0_not_undef_lane(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv ninf double 4.200000e+01, [[X:%.*]]
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x double> [[BO]]
+; SSE-LABEL: @fdiv_constant_op0_not_undef_lane(
+; SSE-NEXT: [[BO_SCALAR:%.*]] = fdiv ninf double 4.200000e+01, [[X:%.*]]
+; SSE-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
+; SSE-NEXT: ret <2 x double> [[BO]]
+;
+; AVX-LABEL: @fdiv_constant_op0_not_undef_lane(
+; AVX-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
+; AVX-NEXT: [[BO:%.*]] = fdiv ninf <2 x double> <double 4.200000e+01, double -4.200000e+01>, [[INS]]
+; AVX-NEXT: ret <2 x double> [[BO]]
;
%ins = insertelement <2 x double> undef, double %x, i32 0
%bo = fdiv ninf <2 x double> <double 42.0, double -42.0>, %ins
@@ -655,10 +675,15 @@ define <2 x double> @fdiv_constant_op0_not_undef_lane(double %x) {
}
define <2 x double> @fdiv_constant_op1(double %x) {
-; CHECK-LABEL: @fdiv_constant_op1(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x double> [[BO]]
+; SSE-LABEL: @fdiv_constant_op1(
+; SSE-NEXT: [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01
+; SSE-NEXT: [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
+; SSE-NEXT: ret <2 x double> [[BO]]
+;
+; AVX-LABEL: @fdiv_constant_op1(
+; AVX-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
+; AVX-NEXT: [[BO:%.*]] = fdiv <2 x double> [[INS]], <double 4.200000e+01, double undef>
+; AVX-NEXT: ret <2 x double> [[BO]]
;
%ins = insertelement <2 x double> undef, double %x, i32 0
%bo = fdiv <2 x double> %ins, <double 42.0, double undef>
@@ -666,10 +691,15 @@ define <2 x double> @fdiv_constant_op1(double %x) {
}
define <2 x double> @fdiv_constant_op1_not_undef_lane(double %x) {
-; CHECK-LABEL: @fdiv_constant_op1_not_undef_lane(
-; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01
-; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
-; CHECK-NEXT: ret <2 x double> [[BO]]
+; SSE-LABEL: @fdiv_constant_op1_not_undef_lane(
+; SSE-NEXT: [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01
+; SSE-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
+; SSE-NEXT: ret <2 x double> [[BO]]
+;
+; AVX-LABEL: @fdiv_constant_op1_not_undef_lane(
+; AVX-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
+; AVX-NEXT: [[BO:%.*]] = fdiv <2 x double> [[INS]], <double 4.200000e+01, double -4.200000e+01>
+; AVX-NEXT: ret <2 x double> [[BO]]
;
%ins = insertelement <2 x double> undef, double %x, i32 0
%bo = fdiv <2 x double> %ins, <double 42.0, double -42.0>
diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
index cd7e2ad2ca2c6..334ad2e1ed7cc 100644
--- a/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
@@ -155,12 +155,19 @@ define <2 x i64> @ins1_ins1_urem(i64 %x, i64 %y) {
; Extra use is accounted for in cost calculation.
define <4 x i32> @ins0_ins0_xor(i32 %x, i32 %y) {
-; CHECK-LABEL: @ins0_ins0_xor(
-; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
-; CHECK-NEXT: call void @use(<4 x i32> [[I0]])
-; CHECK-NEXT: [[R_SCALAR:%.*]] = xor i32 [[X]], [[Y:%.*]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[R_SCALAR]], i64 0
-; CHECK-NEXT: ret <4 x i32> [[R]]
+; SSE-LABEL: @ins0_ins0_xor(
+; SSE-NEXT: [[I0:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
+; SSE-NEXT: call void @use(<4 x i32> [[I0]])
+; SSE-NEXT: [[I1:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0
+; SSE-NEXT: [[R:%.*]] = xor <4 x i32> [[I0]], [[I1]]
+; SSE-NEXT: ret <4 x i32> [[R]]
+;
+; AVX-LABEL: @ins0_ins0_xor(
+; AVX-NEXT: [[I0:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
+; AVX-NEXT: call void @use(<4 x i32> [[I0]])
+; AVX-NEXT: [[R_SCALAR:%.*]] = xor i32 [[X]], [[Y:%.*]]
+; AVX-NEXT: [[R:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[R_SCALAR]], i64 0
+; AVX-NEXT: ret <4 x i32> [[R]]
;
%i0 = insertelement <4 x i32> undef, i32 %x, i32 0
call void @use(<4 x i32> %i0)
diff --git a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll
index 14b517d613de4..2c0b38a303aa2 100644
--- a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s
-; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX
declare void @use(<4 x i32>)
declare void @usef(<4 x float>)
@@ -165,14 +165,18 @@ define <2 x i1> @constant_op1_i64_not_undef_lane(i64 %x) {
ret <2 x i1> %r
}
-; negative test - load prevents the transform
-
define <2 x i1> @constant_op1_i64_load(ptr %p) {
-; CHECK-LABEL: @constant_op1_i64_load(
-; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[LD]], i32 0
-; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i64> [[INS]], <i64 42, i64 -42>
-; CHECK-NEXT: ret <2 x i1> [[R]]
+; SSE-LABEL: @constant_op1_i64_load(
+; SSE-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
+; SSE-NEXT: [[R_SCALAR:%.*]] = icmp eq i64 [[LD]], 42
+; SSE-NEXT: [[R:%.*]] = insertelement <2 x i1> poison, i1 [[R_SCALAR]], i64 0
+; SSE-NEXT: ret <2 x i1> [[R]]
+;
+; AVX-LABEL: @constant_op1_i64_load(
+; AVX-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
+; AVX-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[LD]], i32 0
+; AVX-NEXT: [[R:%.*]] = icmp eq <2 x i64> [[INS]], <i64 42, i64 -42>
+; AVX-NEXT: ret <2 x i1> [[R]]
;
%ld = load i64, ptr %p
%ins = insertelement <2 x i64> poison, i64 %ld, i32 0
diff --git a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll
index edd92c3f1c14c..3c511e6ecd550 100644
--- a/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/scalarize-cmp.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s
-; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX
declare void @use(<4 x i32>)
declare void @usef(<4 x float>)
@@ -165,14 +165,18 @@ define <2 x i1> @constant_op1_i64_not_undef_lane(i64 %x) {
ret <2 x i1> %r
}
-; negative test - load prevents the transform
-
define <2 x i1> @constant_op1_i64_load(ptr %p) {
-; CHECK-LABEL: @constant_op1_i64_load(
-; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
-; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[LD]], i32 0
-; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i64> [[INS]], <i64 42, i64 -42>
-; CHECK-NEXT: ret <2 x i1> [[R]]
+; SSE-LABEL: @constant_op1_i64_load(
+; SSE-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
+; SSE-NEXT: [[R_SCALAR:%.*]] = icmp eq i64 [[LD]], 42
+; SSE-NEXT: [[R:%.*]] = insertelement <2 x i1> undef, i1 [[R_SCALAR]], i64 0
+; SSE-NEXT: ret <2 x i1> [[R]]
+;
+; AVX-LABEL: @constant_op1_i64_load(
+; AVX-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
+; AVX-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[LD]], i32 0
+; AVX-NEXT: [[R:%.*]] = icmp eq <2 x i64> [[INS]], <i64 42, i64 -42>
+; AVX-NEXT: ret <2 x i1> [[R]]
;
%ld = load i64, ptr %p
%ins = insertelement <2 x i64> undef, i64 %ld, i32 0
@@ -236,10 +240,15 @@ define <2 x i1> @constant_op1_f64(double %x) {
}
define <4 x i1> @constant_op1_f32_not_undef_lane(float %x) {
-; CHECK-LABEL: @constant_op1_f32_not_undef_lane(
-; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp uge float [[X:%.*]], 4.200000e+01
-; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> splat (i1 true), i1 [[R_SCALAR]], i64 0
-; CHECK-NEXT: ret <4 x i1> [[R]]
+; SSE-LABEL: @constant_op1_f32_not_undef_lane(
+; SSE-NEXT: [[INS:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 0
+; SSE-NEXT: [[R:%.*]] = fcmp uge <4 x float> [[INS]], <float 4.200000e+01, float -4.200000e+01, float 0.000000e+00, float 1.000000e+00>
+; SSE-NEXT: ret <4 x i1> [[R]]
+;
+; AVX-LABEL: @constant_op1_f32_not_undef_lane(
+; AVX-NEXT: [[R_SCALAR:%.*]] = fcmp uge float [[X:%.*]], 4.200000e+01
+; AVX-NEXT: [[R:%.*]] = insertelement <4 x i1> splat (i1 true), i1 [[R_SCALAR]], i64 0
+; AVX-NEXT: ret <4 x i1> [[R]]
;
%ins = insertelement <4 x float> undef, float %x, i32 0
%r = fcmp uge <4 x float> %ins, <float 42.0, float -42.0, float 0.0, float 1.0>
More information about the llvm-commits
mailing list