[llvm] [SelectionDAG] Scalarize binary ops of splats before legal types (PR #100749)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 14 06:47:45 PDT 2024
https://github.com/Fros1er updated https://github.com/llvm/llvm-project/pull/100749
>From 3d5223f6061395c1b58a29bbdab13dce4ddfa79f Mon Sep 17 00:00:00 2001
From: Fros1er <34234343+Fros1er at users.noreply.github.com>
Date: Fri, 26 Jul 2024 21:29:12 +0800
Subject: [PATCH 1/4] scalarize binary ops of splats by not check isTypeLegal
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 15 +-
.../AArch64/dag-combine-concat-vectors.ll | 13 +-
llvm/test/CodeGen/RISCV/rvv/binop-splats.ll | 309 +++++++++---------
.../RISCV/rvv/fixed-vectors-binop-splats.ll | 63 ++--
llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll | 13 +-
llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll | 7 +-
llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll | 16 -
llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll | 7 +-
llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll | 11 +-
llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll | 7 +-
.../WebAssembly/simd-shift-complex-splats.ll | 25 +-
11 files changed, 221 insertions(+), 265 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8cc920c16552ed..972be0946a7080 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -26975,7 +26975,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
/// If a vector binop is performed on splat values, it may be profitable to
/// extract, scalarize, and insert/splat.
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
- const SDLoc &DL) {
+ const SDLoc &DL, bool LegalTypes) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
unsigned Opcode = N->getOpcode();
@@ -26993,11 +26993,20 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
// TODO: use DAG.isSplatValue instead?
bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
N1.getOpcode() == ISD::SPLAT_VECTOR;
+
+ // If binop is legal or custom on EltVT, scalarize should be profitable. The
+ // check is the same as isOperationLegalOrCustom without isTypeLegal. We
+ // can do this only before LegalTypes, because it may generate illegal `op
+ // EltVT` from legal `op VT (splat EltVT)`, where EltVT is not legal type but
+ // the result type of splat is legal.
+ auto EltAction = TLI.getOperationAction(Opcode, EltVT);
if (!Src0 || !Src1 || Index0 != Index1 ||
Src0.getValueType().getVectorElementType() != EltVT ||
Src1.getValueType().getVectorElementType() != EltVT ||
!(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
- !TLI.isOperationLegalOrCustom(Opcode, EltVT))
+ (LegalTypes && !TLI.isOperationLegalOrCustom(Opcode, EltVT)) ||
+ !(EltAction == TargetLoweringBase::Legal ||
+ EltAction == TargetLoweringBase::Custom))
return SDValue();
SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
@@ -27163,7 +27172,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
}
}
- if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
+ if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL, LegalTypes))
return V;
return SDValue();
diff --git a/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll b/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll
index 764f148ecd3aab..5a5dee0b53d439 100644
--- a/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll
+++ b/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll
@@ -16,14 +16,13 @@ define fastcc i8 @allocno_reload_assign() {
; CHECK-NEXT: uzp1 p0.h, p0.h, p0.h
; CHECK-NEXT: uzp1 p0.b, p0.b, p0.b
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
-; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: mov z0.b, #0 // =0x0
-; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: uunpklo z1.h, z0.b
; CHECK-NEXT: uunpkhi z0.h, z0.b
-; CHECK-NEXT: whilelo p1.b, xzr, x8
-; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: mvn w8, w8
+; CHECK-NEXT: sbfx x8, x8, #0, #1
+; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: uunpklo z2.s, z1.h
; CHECK-NEXT: uunpkhi z3.s, z1.h
; CHECK-NEXT: uunpklo z5.s, z0.h
@@ -31,15 +30,15 @@ define fastcc i8 @allocno_reload_assign() {
; CHECK-NEXT: punpklo p1.h, p0.b
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: punpklo p2.h, p1.b
+; CHECK-NEXT: punpkhi p3.h, p1.b
; CHECK-NEXT: uunpklo z0.d, z2.s
; CHECK-NEXT: uunpkhi z1.d, z2.s
-; CHECK-NEXT: punpkhi p3.h, p1.b
+; CHECK-NEXT: punpklo p5.h, p0.b
; CHECK-NEXT: uunpklo z2.d, z3.s
; CHECK-NEXT: uunpkhi z3.d, z3.s
-; CHECK-NEXT: punpklo p5.h, p0.b
+; CHECK-NEXT: punpkhi p7.h, p0.b
; CHECK-NEXT: uunpklo z4.d, z5.s
; CHECK-NEXT: uunpkhi z5.d, z5.s
-; CHECK-NEXT: punpkhi p7.h, p0.b
; CHECK-NEXT: uunpklo z6.d, z7.s
; CHECK-NEXT: uunpkhi z7.d, z7.s
; CHECK-NEXT: punpklo p0.h, p2.b
diff --git a/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll b/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll
index 6875925adad834..f26e57b5a0b733 100644
--- a/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll
@@ -5,14 +5,11 @@
define <vscale x 1 x i1> @nxv1i1(i1 %x, i1 %y) {
; CHECK-LABEL: nxv1i1:
; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: andi a0, a0, 1
-; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vmsne.vi v8, v8, 0
-; CHECK-NEXT: andi a1, a1, 1
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vmsne.vi v9, v9, 0
-; CHECK-NEXT: vmxor.mm v0, v8, v9
+; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 1 x i1> poison, i1 %x, i32 0
%splat.x = shufflevector <vscale x 1 x i1> %head.x, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
@@ -25,14 +22,11 @@ define <vscale x 1 x i1> @nxv1i1(i1 %x, i1 %y) {
define <vscale x 2 x i1> @nxv2i1(i1 %x, i1 %y) {
; CHECK-LABEL: nxv2i1:
; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: andi a0, a0, 1
-; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vmsne.vi v8, v8, 0
-; CHECK-NEXT: andi a1, a1, 1
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vmsne.vi v9, v9, 0
-; CHECK-NEXT: vmxor.mm v0, v8, v9
+; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 2 x i1> poison, i1 %x, i32 0
%splat.x = shufflevector <vscale x 2 x i1> %head.x, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
@@ -45,14 +39,11 @@ define <vscale x 2 x i1> @nxv2i1(i1 %x, i1 %y) {
define <vscale x 4 x i1> @nxv4i1(i1 %x, i1 %y) {
; CHECK-LABEL: nxv4i1:
; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: andi a0, a0, 1
-; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vmsne.vi v8, v8, 0
-; CHECK-NEXT: andi a1, a1, 1
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vmsne.vi v9, v9, 0
-; CHECK-NEXT: vmxor.mm v0, v8, v9
+; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 4 x i1> poison, i1 %x, i32 0
%splat.x = shufflevector <vscale x 4 x i1> %head.x, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
@@ -65,14 +56,11 @@ define <vscale x 4 x i1> @nxv4i1(i1 %x, i1 %y) {
define <vscale x 8 x i1> @nxv8i1(i1 %x, i1 %y) {
; CHECK-LABEL: nxv8i1:
; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: andi a0, a0, 1
-; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vmsne.vi v8, v8, 0
-; CHECK-NEXT: andi a1, a1, 1
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vmsne.vi v9, v9, 0
-; CHECK-NEXT: vmxor.mm v0, v8, v9
+; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 8 x i1> poison, i1 %x, i32 0
%splat.x = shufflevector <vscale x 8 x i1> %head.x, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
@@ -85,14 +73,11 @@ define <vscale x 8 x i1> @nxv8i1(i1 %x, i1 %y) {
define <vscale x 16 x i1> @nxv16i1(i1 %x, i1 %y) {
; CHECK-LABEL: nxv16i1:
; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: andi a0, a0, 1
-; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vmsne.vi v10, v8, 0
-; CHECK-NEXT: andi a1, a1, 1
-; CHECK-NEXT: vmv.v.x v8, a1
-; CHECK-NEXT: vmsne.vi v11, v8, 0
-; CHECK-NEXT: vmxor.mm v0, v10, v11
+; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 16 x i1> poison, i1 %x, i32 0
%splat.x = shufflevector <vscale x 16 x i1> %head.x, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
@@ -105,14 +90,11 @@ define <vscale x 16 x i1> @nxv16i1(i1 %x, i1 %y) {
define <vscale x 32 x i1> @nxv32i1(i1 %x, i1 %y) {
; CHECK-LABEL: nxv32i1:
; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: andi a0, a0, 1
-; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vmsne.vi v12, v8, 0
-; CHECK-NEXT: andi a1, a1, 1
-; CHECK-NEXT: vmv.v.x v8, a1
-; CHECK-NEXT: vmsne.vi v13, v8, 0
-; CHECK-NEXT: vmxor.mm v0, v12, v13
+; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 32 x i1> poison, i1 %x, i32 0
%splat.x = shufflevector <vscale x 32 x i1> %head.x, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
@@ -125,14 +107,11 @@ define <vscale x 32 x i1> @nxv32i1(i1 %x, i1 %y) {
define <vscale x 64 x i1> @nxv64i1(i1 %x, i1 %y) {
; CHECK-LABEL: nxv64i1:
; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: andi a0, a0, 1
-; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vmsne.vi v16, v8, 0
-; CHECK-NEXT: andi a1, a1, 1
-; CHECK-NEXT: vmv.v.x v8, a1
-; CHECK-NEXT: vmsne.vi v17, v8, 0
-; CHECK-NEXT: vmxor.mm v0, v16, v17
+; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 64 x i1> poison, i1 %x, i32 0
%splat.x = shufflevector <vscale x 64 x i1> %head.x, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
@@ -145,9 +124,9 @@ define <vscale x 64 x i1> @nxv64i1(i1 %x, i1 %y) {
define <vscale x 1 x i8> @nxv1i8(i8 %x, i8 %y) {
; CHECK-LABEL: nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 1 x i8> poison, i8 %x, i32 0
%splat.x = shufflevector <vscale x 1 x i8> %head.x, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
@@ -160,9 +139,9 @@ define <vscale x 1 x i8> @nxv1i8(i8 %x, i8 %y) {
define <vscale x 2 x i8> @nxv2i8(i8 %x, i8 %y) {
; CHECK-LABEL: nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 2 x i8> poison, i8 %x, i32 0
%splat.x = shufflevector <vscale x 2 x i8> %head.x, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -175,9 +154,9 @@ define <vscale x 2 x i8> @nxv2i8(i8 %x, i8 %y) {
define <vscale x 4 x i8> @nxv4i8(i8 %x, i8 %y) {
; CHECK-LABEL: nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 4 x i8> poison, i8 %x, i32 0
%splat.x = shufflevector <vscale x 4 x i8> %head.x, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -190,9 +169,9 @@ define <vscale x 4 x i8> @nxv4i8(i8 %x, i8 %y) {
define <vscale x 8 x i8> @nxv8i8(i8 %x, i8 %y) {
; CHECK-LABEL: nxv8i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 8 x i8> poison, i8 %x, i32 0
%splat.x = shufflevector <vscale x 8 x i8> %head.x, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -205,9 +184,9 @@ define <vscale x 8 x i8> @nxv8i8(i8 %x, i8 %y) {
define <vscale x 16 x i8> @nxv16i8(i8 %x, i8 %y) {
; CHECK-LABEL: nxv16i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 16 x i8> poison, i8 %x, i32 0
%splat.x = shufflevector <vscale x 16 x i8> %head.x, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
@@ -220,9 +199,9 @@ define <vscale x 16 x i8> @nxv16i8(i8 %x, i8 %y) {
define <vscale x 32 x i8> @nxv32i8(i8 %x, i8 %y) {
; CHECK-LABEL: nxv32i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 32 x i8> poison, i8 %x, i32 0
%splat.x = shufflevector <vscale x 32 x i8> %head.x, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
@@ -235,9 +214,9 @@ define <vscale x 32 x i8> @nxv32i8(i8 %x, i8 %y) {
define <vscale x 64 x i8> @nxv64i8(i8 %x, i8 %y) {
; CHECK-LABEL: nxv64i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 64 x i8> poison, i8 %x, i32 0
%splat.x = shufflevector <vscale x 64 x i8> %head.x, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
@@ -250,9 +229,9 @@ define <vscale x 64 x i8> @nxv64i8(i8 %x, i8 %y) {
define <vscale x 1 x i16> @nxv1i16(i16 %x, i16 %y) {
; CHECK-LABEL: nxv1i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 1 x i16> poison, i16 %x, i32 0
%splat.x = shufflevector <vscale x 1 x i16> %head.x, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
@@ -265,9 +244,9 @@ define <vscale x 1 x i16> @nxv1i16(i16 %x, i16 %y) {
define <vscale x 2 x i16> @nxv2i16(i16 %x, i16 %y) {
; CHECK-LABEL: nxv2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 2 x i16> poison, i16 %x, i32 0
%splat.x = shufflevector <vscale x 2 x i16> %head.x, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -280,9 +259,9 @@ define <vscale x 2 x i16> @nxv2i16(i16 %x, i16 %y) {
define <vscale x 4 x i16> @nxv4i16(i16 %x, i16 %y) {
; CHECK-LABEL: nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 4 x i16> poison, i16 %x, i32 0
%splat.x = shufflevector <vscale x 4 x i16> %head.x, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -295,9 +274,9 @@ define <vscale x 4 x i16> @nxv4i16(i16 %x, i16 %y) {
define <vscale x 8 x i16> @nxv8i16(i16 %x, i16 %y) {
; CHECK-LABEL: nxv8i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 8 x i16> poison, i16 %x, i32 0
%splat.x = shufflevector <vscale x 8 x i16> %head.x, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -310,9 +289,9 @@ define <vscale x 8 x i16> @nxv8i16(i16 %x, i16 %y) {
define <vscale x 16 x i16> @nxv16i16(i16 %x, i16 %y) {
; CHECK-LABEL: nxv16i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 16 x i16> poison, i16 %x, i32 0
%splat.x = shufflevector <vscale x 16 x i16> %head.x, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
@@ -325,9 +304,9 @@ define <vscale x 16 x i16> @nxv16i16(i16 %x, i16 %y) {
define <vscale x 32 x i16> @nxv32i16(i16 %x, i16 %y) {
; CHECK-LABEL: nxv32i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 32 x i16> poison, i16 %x, i32 0
%splat.x = shufflevector <vscale x 32 x i16> %head.x, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
@@ -338,19 +317,12 @@ define <vscale x 32 x i16> @nxv32i16(i16 %x, i16 %y) {
}
define <vscale x 1 x i32> @nxv1i32(i32 %x, i32 %y) {
-; RV32-LABEL: nxv1i32:
-; RV32: # %bb.0:
-; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: nxv1i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vadd.vx v8, v8, a1
-; RV64-NEXT: ret
+; CHECK-LABEL: nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
%head.x = insertelement <vscale x 1 x i32> poison, i32 %x, i32 0
%splat.x = shufflevector <vscale x 1 x i32> %head.x, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
%head.y = insertelement <vscale x 1 x i32> poison, i32 %y, i32 0
@@ -360,19 +332,12 @@ define <vscale x 1 x i32> @nxv1i32(i32 %x, i32 %y) {
}
define <vscale x 2 x i32> @nxv2i32(i32 %x, i32 %y) {
-; RV32-LABEL: nxv2i32:
-; RV32: # %bb.0:
-; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: nxv2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a2, zero, e32, m1, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vadd.vx v8, v8, a1
-; RV64-NEXT: ret
+; CHECK-LABEL: nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
%head.x = insertelement <vscale x 2 x i32> poison, i32 %x, i32 0
%splat.x = shufflevector <vscale x 2 x i32> %head.x, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
%head.y = insertelement <vscale x 2 x i32> poison, i32 %y, i32 0
@@ -382,19 +347,12 @@ define <vscale x 2 x i32> @nxv2i32(i32 %x, i32 %y) {
}
define <vscale x 4 x i32> @nxv4i32(i32 %x, i32 %y) {
-; RV32-LABEL: nxv4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: nxv4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vadd.vx v8, v8, a1
-; RV64-NEXT: ret
+; CHECK-LABEL: nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
%head.x = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
%splat.x = shufflevector <vscale x 4 x i32> %head.x, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
%head.y = insertelement <vscale x 4 x i32> poison, i32 %y, i32 0
@@ -404,19 +362,12 @@ define <vscale x 4 x i32> @nxv4i32(i32 %x, i32 %y) {
}
define <vscale x 8 x i32> @nxv8i32(i32 %x, i32 %y) {
-; RV32-LABEL: nxv8i32:
-; RV32: # %bb.0:
-; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: nxv8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vadd.vx v8, v8, a1
-; RV64-NEXT: ret
+; CHECK-LABEL: nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
%head.x = insertelement <vscale x 8 x i32> poison, i32 %x, i32 0
%splat.x = shufflevector <vscale x 8 x i32> %head.x, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
%head.y = insertelement <vscale x 8 x i32> poison, i32 %y, i32 0
@@ -426,19 +377,12 @@ define <vscale x 8 x i32> @nxv8i32(i32 %x, i32 %y) {
}
define <vscale x 16 x i32> @nxv16i32(i32 %x, i32 %y) {
-; RV32-LABEL: nxv16i32:
-; RV32: # %bb.0:
-; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: nxv16i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a2, zero, e32, m8, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vadd.vx v8, v8, a1
-; RV64-NEXT: ret
+; CHECK-LABEL: nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
%head.x = insertelement <vscale x 16 x i32> poison, i32 %x, i32 0
%splat.x = shufflevector <vscale x 16 x i32> %head.x, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
%head.y = insertelement <vscale x 16 x i32> poison, i32 %y, i32 0
@@ -452,16 +396,15 @@ define <vscale x 1 x i64> @nxv1i64(i64 %x, i64 %y) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: add a2, a0, a2
+; RV32-NEXT: sw a2, 8(sp)
+; RV32-NEXT: add a1, a1, a3
+; RV32-NEXT: sltu a0, a2, a0
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
-; RV32-NEXT: sw a3, 4(sp)
-; RV32-NEXT: sw a2, 0(sp)
-; RV32-NEXT: mv a0, sp
-; RV32-NEXT: vlse64.v v9, (a0), zero
-; RV32-NEXT: vadd.vv v8, v8, v9
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
@@ -484,16 +427,15 @@ define <vscale x 2 x i64> @nxv2i64(i64 %x, i64 %y) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: add a2, a0, a2
+; RV32-NEXT: sw a2, 8(sp)
+; RV32-NEXT: add a1, a1, a3
+; RV32-NEXT: sltu a0, a2, a0
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
-; RV32-NEXT: sw a3, 4(sp)
-; RV32-NEXT: sw a2, 0(sp)
-; RV32-NEXT: mv a0, sp
-; RV32-NEXT: vlse64.v v10, (a0), zero
-; RV32-NEXT: vadd.vv v8, v8, v10
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
@@ -516,16 +458,15 @@ define <vscale x 4 x i64> @nxv4i64(i64 %x, i64 %y) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: add a2, a0, a2
+; RV32-NEXT: sw a2, 8(sp)
+; RV32-NEXT: add a1, a1, a3
+; RV32-NEXT: sltu a0, a2, a0
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
-; RV32-NEXT: sw a3, 4(sp)
-; RV32-NEXT: sw a2, 0(sp)
-; RV32-NEXT: mv a0, sp
-; RV32-NEXT: vlse64.v v12, (a0), zero
-; RV32-NEXT: vadd.vv v8, v8, v12
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
@@ -548,16 +489,15 @@ define <vscale x 8 x i64> @nxv8i64(i64 %x, i64 %y) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: add a2, a0, a2
+; RV32-NEXT: sw a2, 8(sp)
+; RV32-NEXT: add a1, a1, a3
+; RV32-NEXT: sltu a0, a2, a0
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
-; RV32-NEXT: sw a3, 4(sp)
-; RV32-NEXT: sw a2, 0(sp)
-; RV32-NEXT: mv a0, sp
-; RV32-NEXT: vlse64.v v16, (a0), zero
-; RV32-NEXT: vadd.vv v8, v8, v16
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
@@ -619,3 +559,50 @@ define <vscale x 1 x double> @nxv2f64(double %x, double %y) {
%v = fadd <vscale x 1 x double> %splat.x, %splat.y
ret <vscale x 1 x double> %v
}
+
+define <vscale x 4 x i8> @uaddsatnxv4i8(i8 %x, i8 %y) {
+; CHECK-LABEL: uaddsatnxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vsaddu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %head.x = insertelement <vscale x 4 x i8> poison, i8 %x, i32 0
+ %splat.x = shufflevector <vscale x 4 x i8> %head.x, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+ %head.y = insertelement <vscale x 4 x i8> poison, i8 %y, i32 0
+ %splat.y = shufflevector <vscale x 4 x i8> %head.y, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.uadd.sat.nxv4i8(<vscale x 4 x i8> %splat.x, <vscale x 4 x i8> %splat.y)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 1 x i64> @uaddsatnxv1i64(i64 %x, i64 %y) {
+; RV32-LABEL: uaddsatnxv1i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: sw a3, 4(sp)
+; RV32-NEXT: sw a2, 0(sp)
+; RV32-NEXT: mv a0, sp
+; RV32-NEXT: vlse64.v v9, (a0), zero
+; RV32-NEXT: vsaddu.vv v8, v8, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: uaddsatnxv1i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; RV64-NEXT: vmv.v.x v8, a0
+; RV64-NEXT: vsaddu.vx v8, v8, a1
+; RV64-NEXT: ret
+ %head.x = insertelement <vscale x 1 x i64> poison, i64 %x, i32 0
+ %splat.x = shufflevector <vscale x 1 x i64> %head.x, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %head.y = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %splat.y = shufflevector <vscale x 1 x i64> %head.y, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.uadd.sat.nxv4i8(<vscale x 1 x i64> %splat.x, <vscale x 1 x i64> %splat.y)
+ ret <vscale x 1 x i64> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll
index ee8c322961c7bd..8f40b02423094a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll
@@ -461,16 +461,15 @@ define <1 x i64> @v1i64(i64 %x, i64 %y) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: add a2, a0, a2
+; RV32-NEXT: sw a2, 8(sp)
+; RV32-NEXT: add a1, a1, a3
+; RV32-NEXT: sltu a0, a2, a0
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
-; RV32-NEXT: sw a3, 4(sp)
-; RV32-NEXT: sw a2, 0(sp)
-; RV32-NEXT: mv a0, sp
-; RV32-NEXT: vlse64.v v9, (a0), zero
-; RV32-NEXT: vadd.vv v8, v8, v9
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
@@ -493,17 +492,15 @@ define <2 x i64> @v2i64(i64 %x, i64 %y) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: add a2, a0, a2
+; RV32-NEXT: sw a2, 8(sp)
+; RV32-NEXT: add a1, a1, a3
+; RV32-NEXT: sltu a0, a2, a0
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
-; RV32-NEXT: sw a3, 4(sp)
-; RV32-NEXT: sw a2, 0(sp)
-; RV32-NEXT: mv a0, sp
-; RV32-NEXT: vlse64.v v9, (a0), zero
-; RV32-NEXT: vadd.vv v9, v8, v9
-; RV32-NEXT: vrgather.vi v8, v9, 0
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
@@ -527,17 +524,15 @@ define <4 x i64> @v4i64(i64 %x, i64 %y) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: add a2, a0, a2
+; RV32-NEXT: sw a2, 8(sp)
+; RV32-NEXT: add a1, a1, a3
+; RV32-NEXT: sltu a0, a2, a0
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
-; RV32-NEXT: sw a3, 4(sp)
-; RV32-NEXT: sw a2, 0(sp)
-; RV32-NEXT: mv a0, sp
-; RV32-NEXT: vlse64.v v10, (a0), zero
-; RV32-NEXT: vadd.vv v10, v8, v10
-; RV32-NEXT: vrgather.vi v8, v10, 0
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
@@ -561,17 +556,15 @@ define <8 x i64> @v8i64(i64 %x, i64 %y) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: add a2, a0, a2
+; RV32-NEXT: sw a2, 8(sp)
+; RV32-NEXT: add a1, a1, a3
+; RV32-NEXT: sltu a0, a2, a0
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
-; RV32-NEXT: sw a3, 4(sp)
-; RV32-NEXT: sw a2, 0(sp)
-; RV32-NEXT: mv a0, sp
-; RV32-NEXT: vlse64.v v12, (a0), zero
-; RV32-NEXT: vadd.vv v12, v8, v12
-; RV32-NEXT: vrgather.vi v8, v12, 0
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll
index 27fceb0112ae32..2b141097366cfb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll
@@ -773,16 +773,15 @@ define <vscale x 8 x i64> @vadd_xx_nxv8i64(i64 %a, i64 %b) nounwind {
; RV32-LABEL: vadd_xx_nxv8i64:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: add a2, a0, a2
+; RV32-NEXT: sw a2, 8(sp)
+; RV32-NEXT: add a1, a1, a3
+; RV32-NEXT: sltu a0, a2, a0
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
-; RV32-NEXT: sw a3, 4(sp)
-; RV32-NEXT: sw a2, 0(sp)
-; RV32-NEXT: mv a0, sp
-; RV32-NEXT: vlse64.v v16, (a0), zero
-; RV32-NEXT: vadd.vv v8, v8, v16
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll
index 40d0d9aa9d1d6b..a84e2c984f669c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll
@@ -1224,16 +1224,13 @@ define <vscale x 8 x i64> @vand_xx_nxv8i64(i64 %a, i64 %b) nounwind {
; RV32-LABEL: vand_xx_nxv8i64:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: and a1, a1, a3
; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: and a0, a0, a2
; RV32-NEXT: sw a0, 8(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
-; RV32-NEXT: sw a3, 4(sp)
-; RV32-NEXT: sw a2, 0(sp)
-; RV32-NEXT: mv a0, sp
-; RV32-NEXT: vlse64.v v16, (a0), zero
-; RV32-NEXT: vand.vv v8, v8, v16
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll
index 1a6d5a1d0029da..f45694a88a8b00 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll
@@ -864,22 +864,6 @@ define <vscale x 8 x i64> @vmul_vi_nxv8i64_2(<vscale x 8 x i64> %va) {
}
define <vscale x 8 x i64> @vmul_xx_nxv8i64(i64 %a, i64 %b) nounwind {
-; RV32-LABEL: vmul_xx_nxv8i64:
-; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a0, 8(sp)
-; RV32-NEXT: addi a0, sp, 8
-; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v8, (a0), zero
-; RV32-NEXT: sw a3, 4(sp)
-; RV32-NEXT: sw a2, 0(sp)
-; RV32-NEXT: mv a0, sp
-; RV32-NEXT: vlse64.v v16, (a0), zero
-; RV32-NEXT: vmul.vv v8, v8, v16
-; RV32-NEXT: addi sp, sp, 16
-; RV32-NEXT: ret
-;
; RV64NOM-LABEL: vmul_xx_nxv8i64:
; RV64NOM: # %bb.0:
; RV64NOM-NEXT: vsetvli a2, zero, e64, m8, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll
index fbbd71cb35445f..dcfe07c1fba658 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll
@@ -1015,16 +1015,13 @@ define <vscale x 8 x i64> @vor_xx_nxv8i64(i64 %a, i64 %b) nounwind {
; RV32-LABEL: vor_xx_nxv8i64:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: or a1, a1, a3
; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: or a0, a0, a2
; RV32-NEXT: sw a0, 8(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
-; RV32-NEXT: sw a3, 4(sp)
-; RV32-NEXT: sw a2, 0(sp)
-; RV32-NEXT: mv a0, sp
-; RV32-NEXT: vlse64.v v16, (a0), zero
-; RV32-NEXT: vor.vv v8, v8, v16
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll
index b7f404c8e5ac92..c2173c9a291fcf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll
@@ -795,16 +795,15 @@ define <vscale x 8 x i64> @vsub_xx_nxv8i64(i64 %a, i64 %b) nounwind {
; RV32-LABEL: vsub_xx_nxv8i64:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sub a4, a0, a2
+; RV32-NEXT: sw a4, 8(sp)
+; RV32-NEXT: sltu a0, a0, a2
+; RV32-NEXT: sub a1, a1, a3
+; RV32-NEXT: sub a1, a1, a0
; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a0, 8(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
-; RV32-NEXT: sw a3, 4(sp)
-; RV32-NEXT: sw a2, 0(sp)
-; RV32-NEXT: mv a0, sp
-; RV32-NEXT: vlse64.v v16, (a0), zero
-; RV32-NEXT: vsub.vv v8, v8, v16
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll
index 3f10b10675ca70..b03a105610dfdf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll
@@ -1224,16 +1224,13 @@ define <vscale x 8 x i64> @vxor_xx_nxv8i64(i64 %a, i64 %b) nounwind {
; RV32-LABEL: vxor_xx_nxv8i64:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: xor a1, a1, a3
; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: xor a0, a0, a2
; RV32-NEXT: sw a0, 8(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
-; RV32-NEXT: sw a3, 4(sp)
-; RV32-NEXT: sw a2, 0(sp)
-; RV32-NEXT: mv a0, sp
-; RV32-NEXT: vlse64.v v16, (a0), zero
-; RV32-NEXT: vxor.vv v8, v8, v16
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
diff --git a/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll b/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll
index 2b08f1c23b59ad..8b30473983d8c8 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll
@@ -5,16 +5,11 @@
target triple = "wasm32-unknown-unknown"
-;; TODO: Optimize this further by scalarizing the add
-
; CHECK-LABEL: shl_add:
; CHECK-NEXT: .functype shl_add (v128, i32, i32) -> (v128)
-; CHECK-NEXT: i8x16.splat $push1=, $1
-; CHECK-NEXT: i8x16.splat $push0=, $2
-; CHECK-NEXT: i8x16.add $push2=, $pop1, $pop0
-; CHECK-NEXT: i8x16.extract_lane_u $push3=, $pop2, 0
-; CHECK-NEXT: i8x16.shl $push4=, $0, $pop3
-; CHECK-NEXT: return $pop4
+; CHECK-NEXT: i32.add $push0=, $1, $2
+; CHECK-NEXT: i8x16.shl $push1=, $0, $pop0
+; CHECK-NEXT: return $pop1
define <16 x i8> @shl_add(<16 x i8> %v, i8 %a, i8 %b) {
%t1 = insertelement <16 x i8> undef, i8 %a, i32 0
%va = shufflevector <16 x i8> %t1, <16 x i8> undef, <16 x i32> zeroinitializer
@@ -44,13 +39,13 @@ define <16 x i8> @shl_abs(<16 x i8> %v, i8 %a) {
; CHECK-LABEL: shl_abs_add:
; CHECK-NEXT: .functype shl_abs_add (v128, i32, i32) -> (v128)
-; CHECK-NEXT: i8x16.splat $push1=, $1
-; CHECK-NEXT: i8x16.splat $push0=, $2
-; CHECK-NEXT: i8x16.add $push2=, $pop1, $pop0
-; CHECK-NEXT: i8x16.abs $push3=, $pop2
-; CHECK-NEXT: i8x16.extract_lane_u $push4=, $pop3, 0
-; CHECK-NEXT: i8x16.shl $push5=, $0, $pop4
-; CHECK-NEXT: return $pop5
+; CHECK-NEXT: i32.add $push0=, $1, $2
+; CHECK-NEXT: i8x16.splat $push1=, $pop0
+; CHECK-NEXT: i8x16.abs $push2=, $pop1
+; CHECK-NEXT: i8x16.extract_lane_u $push3=, $pop2, 0
+; CHECK-NEXT: i8x16.shl $push4=, $0, $pop3
+; CHECK-NEXT: return $pop4
+
define <16 x i8> @shl_abs_add(<16 x i8> %v, i8 %a, i8 %b) {
%t1 = insertelement <16 x i8> undef, i8 %a, i32 0
%va = shufflevector <16 x i8> %t1, <16 x i8> undef, <16 x i32> zeroinitializer
>From 866514067a3d73d6333b5a564a15530c1fc3c5ed Mon Sep 17 00:00:00 2001
From: Fros1er <34234343+Fros1er at users.noreply.github.com>
Date: Mon, 29 Jul 2024 16:24:36 +0800
Subject: [PATCH 2/4] optimize check and fix conflict in test
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +-
llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll | 35 +++++++++++++++++--
2 files changed, 34 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 972be0946a7080..9f1839c6b17d8e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -27004,7 +27004,7 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
Src0.getValueType().getVectorElementType() != EltVT ||
Src1.getValueType().getVectorElementType() != EltVT ||
!(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
- (LegalTypes && !TLI.isOperationLegalOrCustom(Opcode, EltVT)) ||
+ (LegalTypes && !TLI.isTypeLegal(EltVT)) ||
!(EltAction == TargetLoweringBase::Legal ||
EltAction == TargetLoweringBase::Custom))
return SDValue();
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll
index f45694a88a8b00..ac97b1ae5bddc4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32NOM
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64NOM
-; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32M
; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64M
define <vscale x 1 x i8> @vmul_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
@@ -864,6 +864,20 @@ define <vscale x 8 x i64> @vmul_vi_nxv8i64_2(<vscale x 8 x i64> %va) {
}
define <vscale x 8 x i64> @vmul_xx_nxv8i64(i64 %a, i64 %b) nounwind {
+; RV32NOM-LABEL: vmul_xx_nxv8i64:
+; RV32NOM: # %bb.0:
+; RV32NOM-NEXT: addi sp, sp, -16
+; RV32NOM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32NOM-NEXT: call __muldi3
+; RV32NOM-NEXT: sw a1, 4(sp)
+; RV32NOM-NEXT: sw a0, 0(sp)
+; RV32NOM-NEXT: mv a0, sp
+; RV32NOM-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV32NOM-NEXT: vlse64.v v8, (a0), zero
+; RV32NOM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32NOM-NEXT: addi sp, sp, 16
+; RV32NOM-NEXT: ret
+;
; RV64NOM-LABEL: vmul_xx_nxv8i64:
; RV64NOM: # %bb.0:
; RV64NOM-NEXT: vsetvli a2, zero, e64, m8, ta, ma
@@ -871,6 +885,23 @@ define <vscale x 8 x i64> @vmul_xx_nxv8i64(i64 %a, i64 %b) nounwind {
; RV64NOM-NEXT: vmul.vx v8, v8, a1
; RV64NOM-NEXT: ret
;
+; RV32M-LABEL: vmul_xx_nxv8i64:
+; RV32M: # %bb.0:
+; RV32M-NEXT: addi sp, sp, -16
+; RV32M-NEXT: mul a4, a0, a2
+; RV32M-NEXT: sw a4, 8(sp)
+; RV32M-NEXT: mul a3, a0, a3
+; RV32M-NEXT: mulhu a0, a0, a2
+; RV32M-NEXT: add a0, a0, a3
+; RV32M-NEXT: mul a1, a1, a2
+; RV32M-NEXT: add a0, a0, a1
+; RV32M-NEXT: sw a0, 12(sp)
+; RV32M-NEXT: addi a0, sp, 8
+; RV32M-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV32M-NEXT: vlse64.v v8, (a0), zero
+; RV32M-NEXT: addi sp, sp, 16
+; RV32M-NEXT: ret
+;
; RV64M-LABEL: vmul_xx_nxv8i64:
; RV64M: # %bb.0:
; RV64M-NEXT: mul a0, a0, a1
>From 526ffe7d664d5965c9626b6530ae3797fe99d251 Mon Sep 17 00:00:00 2001
From: Fros1er <34234343+Fros1er at users.noreply.github.com>
Date: Sat, 10 Aug 2024 12:33:02 +0800
Subject: [PATCH 3/4] Check if scalar types will be made legal
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 ++++++----
llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll | 14 ++++++++------
2 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9f1839c6b17d8e..4ff8622d63f274 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -26999,14 +26999,16 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
// can do this only before LegalTypes, because it may generate illegal `op
// EltVT` from legal `op VT (splat EltVT)`, where EltVT is not legal type but
// the result type of splat is legal.
- auto EltAction = TLI.getOperationAction(Opcode, EltVT);
if (!Src0 || !Src1 || Index0 != Index1 ||
Src0.getValueType().getVectorElementType() != EltVT ||
Src1.getValueType().getVectorElementType() != EltVT ||
!(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
- (LegalTypes && !TLI.isTypeLegal(EltVT)) ||
- !(EltAction == TargetLoweringBase::Legal ||
- EltAction == TargetLoweringBase::Custom))
+ // If before type legalization, allow scalar types that will eventually be
+ // made legal.
+ !TLI.isOperationLegalOrCustom(
+ Opcode, LegalTypes
+ ? EltVT
+ : TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)))
return SDValue();
SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll
index ac97b1ae5bddc4..0b8620c90c62e0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll
@@ -867,14 +867,16 @@ define <vscale x 8 x i64> @vmul_xx_nxv8i64(i64 %a, i64 %b) nounwind {
; RV32NOM-LABEL: vmul_xx_nxv8i64:
; RV32NOM: # %bb.0:
; RV32NOM-NEXT: addi sp, sp, -16
-; RV32NOM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32NOM-NEXT: call __muldi3
-; RV32NOM-NEXT: sw a1, 4(sp)
-; RV32NOM-NEXT: sw a0, 0(sp)
-; RV32NOM-NEXT: mv a0, sp
+; RV32NOM-NEXT: sw a1, 12(sp)
+; RV32NOM-NEXT: sw a0, 8(sp)
+; RV32NOM-NEXT: addi a0, sp, 8
; RV32NOM-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32NOM-NEXT: vlse64.v v8, (a0), zero
-; RV32NOM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32NOM-NEXT: sw a3, 4(sp)
+; RV32NOM-NEXT: sw a2, 0(sp)
+; RV32NOM-NEXT: mv a0, sp
+; RV32NOM-NEXT: vlse64.v v16, (a0), zero
+; RV32NOM-NEXT: vmul.vv v8, v8, v16
; RV32NOM-NEXT: addi sp, sp, 16
; RV32NOM-NEXT: ret
;
>From a299886fb23d971180b180b3382e4e43e4ea78c3 Mon Sep 17 00:00:00 2001
From: Fros1er <34234343+Fros1er at users.noreply.github.com>
Date: Wed, 14 Aug 2024 21:47:20 +0800
Subject: [PATCH 4/4] Remove unnecessary comments
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 -----
1 file changed, 5 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4ff8622d63f274..6ffa322b7fe4d5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -26994,11 +26994,6 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
N1.getOpcode() == ISD::SPLAT_VECTOR;
- // If binop is legal or custom on EltVT, scalarize should be profitable. The
- // check is the same as isOperationLegalOrCustom without isTypeLegal. We
- // can do this only before LegalTypes, because it may generate illegal `op
- // EltVT` from legal `op VT (splat EltVT)`, where EltVT is not legal type but
- // the result type of splat is legal.
if (!Src0 || !Src1 || Index0 != Index1 ||
Src0.getValueType().getVectorElementType() != EltVT ||
Src1.getValueType().getVectorElementType() != EltVT ||
More information about the llvm-commits
mailing list