[llvm] [RISCV][Isel] Use vaaddu with rounding mode rnu for ISD::AVGCEILU. (PR #77473)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 9 05:50:01 PST 2024
https://github.com/sun-jacobi created https://github.com/llvm/llvm-project/pull/77473
Similar to #76550, but for `ISD::AVGCEILU`.
Specifically, this patch aims to use `vaaddu` with rounding mode rnu (i.e vxrm[1:0] = 0b00) for `ISD::AVGCEILU`.
### Source code
```
define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_ceil(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
%xzv = zext <vscale x 8 x i8> %x to <vscale x 8 x i16>
%yzv = zext <vscale x 8 x i8> %y to <vscale x 8 x i16>
%add = add nuw nsw <vscale x 8 x i16> %xzv, %yzv
%one = insertelement <vscale x 8 x i16> poison, i16 1, i32 0
%splat = shufflevector <vscale x 8 x i16> %one, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
%add1 = add nuw nsw <vscale x 8 x i16> %add, %splat
%div = lshr <vscale x 8 x i16> %add1, %splat
%ret = trunc <vscale x 8 x i16> %div to <vscale x 8 x i8>
ret <vscale x 8 x i8> %ret
}
```
### Before this patch
```
vaaddu_vv_nxv8i8_ceil:
vsetvli a0, zero, e8, m1, ta, ma
vwaddu.vv v10, v8, v9
vsetvli zero, zero, e16, m2, ta, ma
vadd.vi v10, v10, 1
vsetvli zero, zero, e8, m1, ta, ma
vnsrl.wi v8, v10, 1
ret
```
### After this patch
```
vaaddu_vv_nxv8i8_ceil:
vsetvli a0, zero, e8, m1, ta, ma
csrwi vxrm, 0
vaaddu.vv v8, v8, v9
ret
```
>From 3c24ae1f62bde1602f88ba56211bf719eb626ff3 Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Tue, 9 Jan 2024 22:41:20 +0900
Subject: [PATCH] [RISCV][Isel] Use vaaddu with rounding mode rnu for
ISD::AVGCEILU
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 15 +-
llvm/lib/Target/RISCV/RISCVISelLowering.h | 2 +
.../Target/RISCV/RISCVInstrInfoVSDPatterns.td | 33 +-
.../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 38 ++-
.../CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll | 305 ++++++++++++++++--
llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll | 295 +++++++++++++++--
6 files changed, 604 insertions(+), 84 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a5b33e8e293a17..1a5886f6d00f2a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -814,8 +814,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
Custom);
setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
- setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT,
- ISD::SSUBSAT, ISD::USUBSAT},
+ setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT,
+ ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT},
VT, Legal);
// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
@@ -1185,8 +1185,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
- setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT,
- ISD::SSUBSAT, ISD::USUBSAT},
+ setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT,
+ ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT},
VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
@@ -5467,6 +5467,7 @@ static unsigned getRISCVVLOp(SDValue Op) {
OP_CASE(SSUBSAT)
OP_CASE(USUBSAT)
OP_CASE(AVGFLOORU)
+ OP_CASE(AVGCEILU)
OP_CASE(FADD)
OP_CASE(FSUB)
OP_CASE(FMUL)
@@ -5571,7 +5572,7 @@ static bool hasMergeOp(unsigned Opcode) {
Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
- 125 &&
+ 126 &&
RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
ISD::FIRST_TARGET_STRICTFP_OPCODE ==
21 &&
@@ -5597,7 +5598,7 @@ static bool hasMaskOp(unsigned Opcode) {
Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
- 125 &&
+ 126 &&
RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
ISD::FIRST_TARGET_STRICTFP_OPCODE ==
21 &&
@@ -6462,6 +6463,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return SplitVectorOp(Op, DAG);
[[fallthrough]];
case ISD::AVGFLOORU:
+ case ISD::AVGCEILU:
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
@@ -18599,6 +18601,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(UREM_VL)
NODE_NAME_CASE(XOR_VL)
NODE_NAME_CASE(AVGFLOORU_VL)
+ NODE_NAME_CASE(AVGCEILU_VL)
NODE_NAME_CASE(SADDSAT_VL)
NODE_NAME_CASE(UADDSAT_VL)
NODE_NAME_CASE(SSUBSAT_VL)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 5d51fe168b04de..0d14e5b757bdd1 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -255,6 +255,8 @@ enum NodeType : unsigned {
// Averaging adds of unsigned integers.
AVGFLOORU_VL,
+ // Rounding averaging adds of unsigned integers.
+ AVGCEILU_VL,
MULHS_VL,
MULHU_VL,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 4f87c36506e520..8ebd8b89c11929 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -877,6 +877,23 @@ multiclass VPatMultiplyAddSDNode_VV_VX<SDNode op, string instruction_name> {
}
}
+multiclass VPatAVGADD_VV_VX_RM<SDNode vop, int vxrm> {
+ foreach vti = AllIntegerVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vop (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2,
+ vxrm, vti.AVL, vti.Log2SEW, TA_MA)>;
+ def : Pat<(vop (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (SplatPat (XLenVT GPR:$rs2)))),
+ (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2,
+ vxrm, vti.AVL, vti.Log2SEW, TA_MA)>;
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// Patterns.
//===----------------------------------------------------------------------===//
@@ -1132,20 +1149,8 @@ defm : VPatBinarySDNode_VV_VX<ssubsat, "PseudoVSSUB">;
defm : VPatBinarySDNode_VV_VX<usubsat, "PseudoVSSUBU">;
// 12.2. Vector Single-Width Averaging Add and Subtract
-foreach vti = AllIntegerVectors in {
- let Predicates = GetVTypePredicates<vti>.Predicates in {
- def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1),
- (vti.Vector vti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX)
- (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2,
- 0b10, vti.AVL, vti.Log2SEW, TA_MA)>;
- def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1),
- (vti.Vector (SplatPat (XLenVT GPR:$rs2)))),
- (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX)
- (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2,
- 0b10, vti.AVL, vti.Log2SEW, TA_MA)>;
- }
-}
+defm : VPatAVGADD_VV_VX_RM<avgflooru, 0b10>;
+defm : VPatAVGADD_VV_VX_RM<avgceilu, 0b00>;
// 15. Vector Mask Instructions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index d60ff4b5fab018..1deb9a709463e8 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -112,6 +112,7 @@ def riscv_cttz_vl : SDNode<"RISCVISD::CTTZ_VL", SDT_RISCVIntUnOp_VL>
def riscv_ctpop_vl : SDNode<"RISCVISD::CTPOP_VL", SDT_RISCVIntUnOp_VL>;
def riscv_avgflooru_vl : SDNode<"RISCVISD::AVGFLOORU_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
+def riscv_avgceilu_vl : SDNode<"RISCVISD::AVGCEILU_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_saddsat_vl : SDNode<"RISCVISD::SADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_uaddsat_vl : SDNode<"RISCVISD::UADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_ssubsat_vl : SDNode<"RISCVISD::SSUBSAT_VL", SDT_RISCVIntBinOp_VL>;
@@ -2031,6 +2032,25 @@ multiclass VPatSlide1VL_VF<SDNode vop, string instruction_name> {
}
}
+multiclass VPatAVGADDVL_VV_VX_RM<SDNode vop, int vxrm> {
+ foreach vti = AllIntegerVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vop (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2),
+ vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0), vxrm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(vop (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (SplatPat (XLenVT GPR:$rs2))),
+ vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs1, GPR:$rs2,
+ (vti.Mask V0), vxrm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// Patterns.
//===----------------------------------------------------------------------===//
@@ -2308,22 +2328,8 @@ defm : VPatBinaryVL_VV_VX<riscv_ssubsat_vl, "PseudoVSSUB">;
defm : VPatBinaryVL_VV_VX<riscv_usubsat_vl, "PseudoVSSUBU">;
// 12.2. Vector Single-Width Averaging Add and Subtract
-foreach vti = AllIntegerVectors in {
- let Predicates = GetVTypePredicates<vti>.Predicates in {
- def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1),
- (vti.Vector vti.RegClass:$rs2),
- vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
- (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX#"_MASK")
- vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1),
- (vti.Vector (SplatPat (XLenVT GPR:$rs2))),
- vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
- (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX#"_MASK")
- vti.RegClass:$merge, vti.RegClass:$rs1, GPR:$rs2,
- (vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- }
-}
+defm : VPatAVGADDVL_VV_VX_RM<riscv_avgflooru_vl, 0b10>;
+defm : VPatAVGADDVL_VV_VX_RM<riscv_avgceilu_vl, 0b00>;
// 12.5. Vector Narrowing Fixed-Point Clip Instructions
class VPatTruncSatClipMaxMinBase<string inst,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll
index f6bdeda946c40a..954edf872aff8d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll
@@ -2,8 +2,8 @@
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
-define <8 x i8> @vaaddu_vv_v8i8(<8 x i8> %x, <8 x i8> %y) {
-; CHECK-LABEL: vaaddu_vv_v8i8:
+define <8 x i8> @vaaddu_vv_v8i8_floor(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_floor:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -17,8 +17,8 @@ define <8 x i8> @vaaddu_vv_v8i8(<8 x i8> %x, <8 x i8> %y) {
ret <8 x i8> %ret
}
-define <8 x i8> @vaaddu_vx_v8i8(<8 x i8> %x, i8 %y) {
-; CHECK-LABEL: vaaddu_vx_v8i8:
+define <8 x i8> @vaaddu_vx_v8i8_floor(<8 x i8> %x, i8 %y) {
+; CHECK-LABEL: vaaddu_vx_v8i8_floor:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -37,8 +37,8 @@ define <8 x i8> @vaaddu_vx_v8i8(<8 x i8> %x, i8 %y) {
}
-define <8 x i8> @vaaddu_vv_v8i8_sexti16(<8 x i8> %x, <8 x i8> %y) {
-; CHECK-LABEL: vaaddu_vv_v8i8_sexti16:
+define <8 x i8> @vaaddu_vv_v8i8_floor_sexti16(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_floor_sexti16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vwadd.vv v10, v8, v9
@@ -52,8 +52,8 @@ define <8 x i8> @vaaddu_vv_v8i8_sexti16(<8 x i8> %x, <8 x i8> %y) {
ret <8 x i8> %ret
}
-define <8 x i8> @vaaddu_vv_v8i8_zexti32(<8 x i8> %x, <8 x i8> %y) {
-; CHECK-LABEL: vaaddu_vv_v8i8_zexti32:
+define <8 x i8> @vaaddu_vv_v8i8_floor_zexti32(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_floor_zexti32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -67,8 +67,8 @@ define <8 x i8> @vaaddu_vv_v8i8_zexti32(<8 x i8> %x, <8 x i8> %y) {
ret <8 x i8> %ret
}
-define <8 x i8> @vaaddu_vv_v8i8_lshr2(<8 x i8> %x, <8 x i8> %y) {
-; CHECK-LABEL: vaaddu_vv_v8i8_lshr2:
+define <8 x i8> @vaaddu_vv_v8i8_floor_lshr2(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_floor_lshr2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vwaddu.vv v10, v8, v9
@@ -82,8 +82,8 @@ define <8 x i8> @vaaddu_vv_v8i8_lshr2(<8 x i8> %x, <8 x i8> %y) {
ret <8 x i8> %ret
}
-define <8 x i16> @vaaddu_vv_v8i16(<8 x i16> %x, <8 x i16> %y) {
-; CHECK-LABEL: vaaddu_vv_v8i16:
+define <8 x i16> @vaaddu_vv_v8i16_floor(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i16_floor:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -97,8 +97,8 @@ define <8 x i16> @vaaddu_vv_v8i16(<8 x i16> %x, <8 x i16> %y) {
ret <8 x i16> %ret
}
-define <8 x i16> @vaaddu_vx_v8i16(<8 x i16> %x, i16 %y) {
-; CHECK-LABEL: vaaddu_vx_v8i16:
+define <8 x i16> @vaaddu_vx_v8i16_floor(<8 x i16> %x, i16 %y) {
+; CHECK-LABEL: vaaddu_vx_v8i16_floor:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -116,8 +116,8 @@ define <8 x i16> @vaaddu_vx_v8i16(<8 x i16> %x, i16 %y) {
ret <8 x i16> %ret
}
-define <8 x i32> @vaaddu_vv_v8i32(<8 x i32> %x, <8 x i32> %y) {
-; CHECK-LABEL: vaaddu_vv_v8i32:
+define <8 x i32> @vaaddu_vv_v8i32_floor(<8 x i32> %x, <8 x i32> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i32_floor:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -131,8 +131,8 @@ define <8 x i32> @vaaddu_vv_v8i32(<8 x i32> %x, <8 x i32> %y) {
ret <8 x i32> %ret
}
-define <8 x i32> @vaaddu_vx_v8i32(<8 x i32> %x, i32 %y) {
-; CHECK-LABEL: vaaddu_vx_v8i32:
+define <8 x i32> @vaaddu_vx_v8i32_floor(<8 x i32> %x, i32 %y) {
+; CHECK-LABEL: vaaddu_vx_v8i32_floor:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -150,8 +150,8 @@ define <8 x i32> @vaaddu_vx_v8i32(<8 x i32> %x, i32 %y) {
ret <8 x i32> %ret
}
-define <8 x i64> @vaaddu_vv_v8i64(<8 x i64> %x, <8 x i64> %y) {
-; CHECK-LABEL: vaaddu_vv_v8i64:
+define <8 x i64> @vaaddu_vv_v8i64_floor(<8 x i64> %x, <8 x i64> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i64_floor:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -165,8 +165,8 @@ define <8 x i64> @vaaddu_vv_v8i64(<8 x i64> %x, <8 x i64> %y) {
ret <8 x i64> %ret
}
-define <8 x i1> @vaaddu_vv_v8i1(<8 x i1> %x, <8 x i1> %y) {
-; CHECK-LABEL: vaaddu_vv_v8i1:
+define <8 x i1> @vaaddu_vv_v8i1_floor(<8 x i1> %x, <8 x i1> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i1_floor:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v9, 0
@@ -186,8 +186,8 @@ define <8 x i1> @vaaddu_vv_v8i1(<8 x i1> %x, <8 x i1> %y) {
ret <8 x i1> %ret
}
-define <8 x i64> @vaaddu_vx_v8i64(<8 x i64> %x, i64 %y) {
-; RV32-LABEL: vaaddu_vx_v8i64:
+define <8 x i64> @vaaddu_vx_v8i64_floor(<8 x i64> %x, i64 %y) {
+; RV32-LABEL: vaaddu_vx_v8i64_floor:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
@@ -201,7 +201,7 @@ define <8 x i64> @vaaddu_vx_v8i64(<8 x i64> %x, i64 %y) {
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
-; RV64-LABEL: vaaddu_vx_v8i64:
+; RV64-LABEL: vaaddu_vx_v8i64_floor:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: csrwi vxrm, 2
@@ -218,3 +218,258 @@ define <8 x i64> @vaaddu_vx_v8i64(<8 x i64> %x, i64 %y) {
%ret = trunc <8 x i128> %div to <8 x i64>
ret <8 x i64> %ret
}
+
+define <8 x i8> @vaaddu_vv_v8i8_ceil(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_ceil:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vaaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i8> %x to <8 x i16>
+ %yzv = zext <8 x i8> %y to <8 x i16>
+ %add = add nuw nsw <8 x i16> %xzv, %yzv
+ %add1 = add nuw nsw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %div = lshr <8 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %ret = trunc <8 x i16> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i8> @vaaddu_vx_v8i8_ceil(<8 x i8> %x, i8 %y) {
+; CHECK-LABEL: vaaddu_vx_v8i8_ceil:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vaaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i8> %x to <8 x i16>
+ %yhead = insertelement <8 x i8> poison, i8 %y, i32 0
+ %ysplat = shufflevector <8 x i8> %yhead, <8 x i8> poison, <8 x i32> zeroinitializer
+ %yzv = zext <8 x i8> %ysplat to <8 x i16>
+ %add = add nuw nsw <8 x i16> %xzv, %yzv
+ %add1 = add nuw nsw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %one = insertelement <8 x i16> poison, i16 1, i32 0
+ %splat = shufflevector <8 x i16> %one, <8 x i16> poison, <8 x i32> zeroinitializer
+ %div = lshr <8 x i16> %add1, %splat
+ %ret = trunc <8 x i16> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i8> @vaaddu_vv_v8i8_ceil_sexti16(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_ceil_sexti16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vwadd.vv v10, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vadd.vi v8, v10, 1
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 1
+; CHECK-NEXT: ret
+ %xzv = sext <8 x i8> %x to <8 x i16>
+ %yzv = sext <8 x i8> %y to <8 x i16>
+ %add = add nuw nsw <8 x i16> %xzv, %yzv
+ %add1 = add nuw nsw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %div = lshr <8 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %ret = trunc <8 x i16> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i8> @vaaddu_vv_v8i8_ceil_zexti32(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_ceil_zexti32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vaaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i8> %x to <8 x i32>
+ %yzv = zext <8 x i8> %y to <8 x i32>
+ %add = add nuw nsw <8 x i32> %xzv, %yzv
+ %add1 = add nuw nsw <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %div = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %ret = trunc <8 x i32> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i8> @vaaddu_vv_v8i8_ceil_lshr2(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_ceil_lshr2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vwaddu.vv v10, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vadd.vi v8, v10, 2
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 2
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i8> %x to <8 x i16>
+ %yzv = zext <8 x i8> %y to <8 x i16>
+ %add = add nuw nsw <8 x i16> %xzv, %yzv
+ %add1 = add nuw nsw <8 x i16> %add, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+ %div = lshr <8 x i16> %add1, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+ %ret = trunc <8 x i16> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i8> @vaaddu_vv_v8i8_ceil_add2(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_ceil_add2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vwaddu.vv v10, v8, v9
+; CHECK-NEXT: li a0, 2
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vx v8, v10, a0
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i8> %x to <8 x i16>
+ %yzv = zext <8 x i8> %y to <8 x i16>
+ %add = add nuw nsw <8 x i16> %xzv, %yzv
+ %add1 = add nuw nsw <8 x i16> %add, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+ %div = lshr <8 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %ret = trunc <8 x i16> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i16> @vaaddu_vv_v8i16_ceil(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i16_ceil:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vaaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i16> %x to <8 x i32>
+ %yzv = zext <8 x i16> %y to <8 x i32>
+ %add = add nuw nsw <8 x i32> %xzv, %yzv
+ %add1 = add nuw nsw <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %div = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %ret = trunc <8 x i32> %div to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+define <8 x i16> @vaaddu_vx_v8i16_ceil(<8 x i16> %x, i16 %y) {
+; CHECK-LABEL: vaaddu_vx_v8i16_ceil:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vaaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i16> %x to <8 x i32>
+ %yhead = insertelement <8 x i16> poison, i16 %y, i16 0
+ %ysplat = shufflevector <8 x i16> %yhead, <8 x i16> poison, <8 x i32> zeroinitializer
+ %yzv = zext <8 x i16> %ysplat to <8 x i32>
+ %add = add nuw nsw <8 x i32> %xzv, %yzv
+ %add1 = add nuw nsw <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %one = insertelement <8 x i32> poison, i32 1, i32 0
+ %splat = shufflevector <8 x i32> %one, <8 x i32> poison, <8 x i32> zeroinitializer
+ %div = lshr <8 x i32> %add1, %splat
+ %ret = trunc <8 x i32> %div to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+define <8 x i32> @vaaddu_vv_v8i32_ceil(<8 x i32> %x, <8 x i32> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i32_ceil:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vaaddu.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i32> %x to <8 x i64>
+ %yzv = zext <8 x i32> %y to <8 x i64>
+ %add = add nuw nsw <8 x i64> %xzv, %yzv
+ %add1 = add nuw nsw <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %div = lshr <8 x i64> %add1, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %ret = trunc <8 x i64> %div to <8 x i32>
+ ret <8 x i32> %ret
+}
+
+define <8 x i32> @vaaddu_vx_v8i32_ceil(<8 x i32> %x, i32 %y) {
+; CHECK-LABEL: vaaddu_vx_v8i32_ceil:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vaaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i32> %x to <8 x i64>
+ %yhead = insertelement <8 x i32> poison, i32 %y, i32 0
+ %ysplat = shufflevector <8 x i32> %yhead, <8 x i32> poison, <8 x i32> zeroinitializer
+ %yzv = zext <8 x i32> %ysplat to <8 x i64>
+ %add = add nuw nsw <8 x i64> %xzv, %yzv
+ %add1 = add nuw nsw <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %one = insertelement <8 x i64> poison, i64 1, i64 0
+ %splat = shufflevector <8 x i64> %one, <8 x i64> poison, <8 x i32> zeroinitializer
+ %div = lshr <8 x i64> %add1, %splat
+ %ret = trunc <8 x i64> %div to <8 x i32>
+ ret <8 x i32> %ret
+}
+
+define <8 x i64> @vaaddu_vv_v8i64_ceil(<8 x i64> %x, <8 x i64> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i64_ceil:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vaaddu.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i64> %x to <8 x i128>
+ %yzv = zext <8 x i64> %y to <8 x i128>
+ %add = add nuw nsw <8 x i128> %xzv, %yzv
+ %add1 = add nuw nsw <8 x i128> %add, <i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1>
+ %div = lshr <8 x i128> %add1, <i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1>
+ %ret = trunc <8 x i128> %div to <8 x i64>
+ ret <8 x i64> %ret
+}
+
+define <8 x i1> @vaaddu_vv_v8i1_ceil(<8 x i1> %x, <8 x i1> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i1_ceil:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v10, v9, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vim v8, v9, 1, v0
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vaaddu.vv v8, v10, v8
+; CHECK-NEXT: vand.vi v8, v8, 1
+; CHECK-NEXT: vmsne.vi v0, v8, 0
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i1> %x to <8 x i8>
+ %yzv = zext <8 x i1> %y to <8 x i8>
+ %add = add nuw nsw <8 x i8> %xzv, %yzv
+ %add1 = add nuw nsw <8 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %div = lshr <8 x i8> %add1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %ret = trunc <8 x i8> %div to <8 x i1>
+ ret <8 x i1> %ret
+}
+
+define <8 x i64> @vaaddu_vx_v8i64_ceil(<8 x i64> %x, i64 %y) {
+; RV32-LABEL: vaaddu_vx_v8i64_ceil:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT: vlse64.v v12, (a0), zero
+; RV32-NEXT: csrwi vxrm, 0
+; RV32-NEXT: vaaddu.vv v8, v8, v12
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vaaddu_vx_v8i64_ceil:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: csrwi vxrm, 0
+; RV64-NEXT: vaaddu.vx v8, v8, a0
+; RV64-NEXT: ret
+ %xzv = zext <8 x i64> %x to <8 x i128>
+ %yhead = insertelement <8 x i64> poison, i64 %y, i64 0
+ %ysplat = shufflevector <8 x i64> %yhead, <8 x i64> poison, <8 x i32> zeroinitializer
+ %yzv = zext <8 x i64> %ysplat to <8 x i128>
+ %add = add nuw nsw <8 x i128> %xzv, %yzv
+ %add1 = add nuw nsw <8 x i128> %add, <i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1>
+ %one = insertelement <8 x i128> poison, i128 1, i128 0
+ %splat = shufflevector <8 x i128> %one, <8 x i128> poison, <8 x i32> zeroinitializer
+ %div = lshr <8 x i128> %add1, %splat
+ %ret = trunc <8 x i128> %div to <8 x i64>
+ ret <8 x i64> %ret
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll
index 883d605e77e262..1cf57371455cfb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll
@@ -2,8 +2,8 @@
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
-define <vscale x 8 x i8> @vaaddu_vv_nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
-; CHECK-LABEL: vaaddu_vv_nxv8i8:
+define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_floor(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i8_floor:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -19,8 +19,8 @@ define <vscale x 8 x i8> @vaaddu_vv_nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i
ret <vscale x 8 x i8> %ret
}
-define <vscale x 8 x i8> @vaaddu_vx_nxv8i8(<vscale x 8 x i8> %x, i8 %y) {
-; CHECK-LABEL: vaaddu_vx_nxv8i8:
+define <vscale x 8 x i8> @vaaddu_vx_nxv8i8_floor(<vscale x 8 x i8> %x, i8 %y) {
+; CHECK-LABEL: vaaddu_vx_nxv8i8_floor:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -38,8 +38,8 @@ define <vscale x 8 x i8> @vaaddu_vx_nxv8i8(<vscale x 8 x i8> %x, i8 %y) {
ret <vscale x 8 x i8> %ret
}
-define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_sexti16(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
-; CHECK-LABEL: vaaddu_vv_nxv8i8_sexti16:
+define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_floor_sexti16(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i8_floor_sexti16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; CHECK-NEXT: vwadd.vv v10, v8, v9
@@ -55,8 +55,8 @@ define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_sexti16(<vscale x 8 x i8> %x, <vscale
ret <vscale x 8 x i8> %ret
}
-define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_zexti32(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
-; CHECK-LABEL: vaaddu_vv_nxv8i8_zexti32:
+define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_floor_zexti32(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i8_floor_zexti32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -72,8 +72,8 @@ define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_zexti32(<vscale x 8 x i8> %x, <vscale
ret <vscale x 8 x i8> %ret
}
-define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_lshr2(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
-; CHECK-LABEL: vaaddu_vv_nxv8i8_lshr2:
+define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_floor_lshr2(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i8_floor_lshr2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; CHECK-NEXT: vwaddu.vv v10, v8, v9
@@ -89,8 +89,8 @@ define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_lshr2(<vscale x 8 x i8> %x, <vscale x
ret <vscale x 8 x i8> %ret
}
-define <vscale x 8 x i16> @vaaddu_vv_nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
-; CHECK-LABEL: vaaddu_vv_nxv8i16:
+define <vscale x 8 x i16> @vaaddu_vv_nxv8i16_floor(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i16_floor:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -106,8 +106,8 @@ define <vscale x 8 x i16> @vaaddu_vv_nxv8i16(<vscale x 8 x i16> %x, <vscale x 8
ret <vscale x 8 x i16> %ret
}
-define <vscale x 8 x i16> @vaaddu_vx_nxv8i16(<vscale x 8 x i16> %x, i16 %y) {
-; CHECK-LABEL: vaaddu_vx_nxv8i16:
+define <vscale x 8 x i16> @vaaddu_vx_nxv8i16_floor(<vscale x 8 x i16> %x, i16 %y) {
+; CHECK-LABEL: vaaddu_vx_nxv8i16_floor:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -125,8 +125,8 @@ define <vscale x 8 x i16> @vaaddu_vx_nxv8i16(<vscale x 8 x i16> %x, i16 %y) {
ret <vscale x 8 x i16> %ret
}
-define <vscale x 8 x i32> @vaaddu_vv_nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) {
-; CHECK-LABEL: vaaddu_vv_nxv8i32:
+define <vscale x 8 x i32> @vaaddu_vv_nxv8i32_floor(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i32_floor:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -142,8 +142,8 @@ define <vscale x 8 x i32> @vaaddu_vv_nxv8i32(<vscale x 8 x i32> %x, <vscale x 8
ret <vscale x 8 x i32> %ret
}
-define <vscale x 8 x i32> @vaaddu_vx_nxv8i32(<vscale x 8 x i32> %x, i32 %y) {
-; CHECK-LABEL: vaaddu_vx_nxv8i32:
+define <vscale x 8 x i32> @vaaddu_vx_nxv8i32_floor(<vscale x 8 x i32> %x, i32 %y) {
+; CHECK-LABEL: vaaddu_vx_nxv8i32_floor:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -161,8 +161,8 @@ define <vscale x 8 x i32> @vaaddu_vx_nxv8i32(<vscale x 8 x i32> %x, i32 %y) {
ret <vscale x 8 x i32> %ret
}
-define <vscale x 8 x i64> @vaaddu_vv_nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %y) {
-; CHECK-LABEL: vaaddu_vv_nxv8i64:
+define <vscale x 8 x i64> @vaaddu_vv_nxv8i64_floor(<vscale x 8 x i64> %x, <vscale x 8 x i64> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i64_floor:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
@@ -178,8 +178,8 @@ define <vscale x 8 x i64> @vaaddu_vv_nxv8i64(<vscale x 8 x i64> %x, <vscale x 8
ret <vscale x 8 x i64> %ret
}
-define <vscale x 8 x i64> @vaaddu_vx_nxv8i64(<vscale x 8 x i64> %x, i64 %y) {
-; RV32-LABEL: vaaddu_vx_nxv8i64:
+define <vscale x 8 x i64> @vaaddu_vx_nxv8i64_floor(<vscale x 8 x i64> %x, i64 %y) {
+; RV32-LABEL: vaaddu_vx_nxv8i64_floor:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
@@ -193,7 +193,7 @@ define <vscale x 8 x i64> @vaaddu_vx_nxv8i64(<vscale x 8 x i64> %x, i64 %y) {
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
-; RV64-LABEL: vaaddu_vx_nxv8i64:
+; RV64-LABEL: vaaddu_vx_nxv8i64_floor:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV64-NEXT: csrwi vxrm, 2
@@ -210,3 +210,252 @@ define <vscale x 8 x i64> @vaaddu_vx_nxv8i64(<vscale x 8 x i64> %x, i64 %y) {
%ret = trunc <vscale x 8 x i128> %div to <vscale x 8 x i64>
ret <vscale x 8 x i64> %ret
}
+
+define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_ceil(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i8_ceil:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vaaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %xzv = zext <vscale x 8 x i8> %x to <vscale x 8 x i16>
+ %yzv = zext <vscale x 8 x i8> %y to <vscale x 8 x i16>
+ %add = add nuw nsw <vscale x 8 x i16> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i16> poison, i16 1, i32 0
+ %splat = shufflevector <vscale x 8 x i16> %one, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %add1 = add nuw nsw <vscale x 8 x i16> %add, %splat
+ %div = lshr <vscale x 8 x i16> %add1, %splat
+ %ret = trunc <vscale x 8 x i16> %div to <vscale x 8 x i8>
+ ret <vscale x 8 x i8> %ret
+}
+
+define <vscale x 8 x i8> @vaaddu_vx_nxv8i8_ceil(<vscale x 8 x i8> %x, i8 %y) {
+; CHECK-LABEL: vaaddu_vx_nxv8i8_ceil:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vaaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %xzv = zext <vscale x 8 x i8> %x to <vscale x 8 x i16>
+ %yhead = insertelement <vscale x 8 x i8> poison, i8 %y, i32 0
+ %ysplat = shufflevector <vscale x 8 x i8> %yhead, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %yzv = zext <vscale x 8 x i8> %ysplat to <vscale x 8 x i16>
+ %add = add nuw nsw <vscale x 8 x i16> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i16> poison, i16 1, i32 0
+ %splat = shufflevector <vscale x 8 x i16> %one, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %add1 = add nuw nsw <vscale x 8 x i16> %add, %splat
+ %div = lshr <vscale x 8 x i16> %add1, %splat
+ %ret = trunc <vscale x 8 x i16> %div to <vscale x 8 x i8>
+ ret <vscale x 8 x i8> %ret
+}
+
+define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_ceil_sexti16(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i8_ceil_sexti16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vwadd.vv v10, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vadd.vi v10, v10, 1
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v10, 1
+; CHECK-NEXT: ret
+ %xzv = sext <vscale x 8 x i8> %x to <vscale x 8 x i16>
+ %yzv = sext <vscale x 8 x i8> %y to <vscale x 8 x i16>
+ %add = add nuw nsw <vscale x 8 x i16> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i16> poison, i16 1, i32 0
+ %splat = shufflevector <vscale x 8 x i16> %one, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %add1 = add nuw nsw <vscale x 8 x i16> %add, %splat
+ %div = lshr <vscale x 8 x i16> %add1, %splat
+ %ret = trunc <vscale x 8 x i16> %div to <vscale x 8 x i8>
+ ret <vscale x 8 x i8> %ret
+}
+
+define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_ceil_zexti32(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i8_ceil_zexti32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vaaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %xzv = zext <vscale x 8 x i8> %x to <vscale x 8 x i32>
+ %yzv = zext <vscale x 8 x i8> %y to <vscale x 8 x i32>
+ %add = add nuw nsw <vscale x 8 x i32> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i32> poison, i32 1, i32 0
+ %splat = shufflevector <vscale x 8 x i32> %one, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %add1 = add nuw nsw <vscale x 8 x i32> %add, %splat
+ %div = lshr <vscale x 8 x i32> %add1, %splat
+ %ret = trunc <vscale x 8 x i32> %div to <vscale x 8 x i8>
+ ret <vscale x 8 x i8> %ret
+}
+
+define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_ceil_lshr2(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i8_ceil_lshr2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vwaddu.vv v10, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vadd.vi v10, v10, 2
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v10, 2
+; CHECK-NEXT: ret
+ %xzv = zext <vscale x 8 x i8> %x to <vscale x 8 x i16>
+ %yzv = zext <vscale x 8 x i8> %y to <vscale x 8 x i16>
+ %add = add nuw nsw <vscale x 8 x i16> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i16> poison, i16 2, i32 0
+ %splat = shufflevector <vscale x 8 x i16> %one, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %add1 = add nuw nsw <vscale x 8 x i16> %add, %splat
+ %div = lshr <vscale x 8 x i16> %add1, %splat
+ %ret = trunc <vscale x 8 x i16> %div to <vscale x 8 x i8>
+ ret <vscale x 8 x i8> %ret
+}
+
+define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_ceil_add2(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i8_ceil_add2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vwaddu.vv v10, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vadd.vi v10, v10, 2
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v10, 2
+; CHECK-NEXT: ret
+ %xzv = zext <vscale x 8 x i8> %x to <vscale x 8 x i16>
+ %yzv = zext <vscale x 8 x i8> %y to <vscale x 8 x i16>
+ %add = add nuw nsw <vscale x 8 x i16> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i16> poison, i16 2, i32 0
+ %splat1 = shufflevector <vscale x 8 x i16> %one, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %two = insertelement <vscale x 8 x i16> poison, i16 2, i32 0
+ %splat2 = shufflevector <vscale x 8 x i16> %two, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %add2 = add nuw nsw <vscale x 8 x i16> %add, %splat2
+ %div = lshr <vscale x 8 x i16> %add2, %splat1
+ %ret = trunc <vscale x 8 x i16> %div to <vscale x 8 x i8>
+ ret <vscale x 8 x i8> %ret
+}
+
+define <vscale x 8 x i16> @vaaddu_vv_nxv8i16_ceil(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i16_ceil:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vaaddu.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %xzv = zext <vscale x 8 x i16> %x to <vscale x 8 x i32>
+ %yzv = zext <vscale x 8 x i16> %y to <vscale x 8 x i32>
+ %add = add nuw nsw <vscale x 8 x i32> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i32> poison, i32 1, i32 0
+ %splat = shufflevector <vscale x 8 x i32> %one, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %add1 = add nuw nsw <vscale x 8 x i32> %add, %splat
+ %div = lshr <vscale x 8 x i32> %add1, %splat
+ %ret = trunc <vscale x 8 x i32> %div to <vscale x 8 x i16>
+ ret <vscale x 8 x i16> %ret
+}
+
+define <vscale x 8 x i16> @vaaddu_vx_nxv8i16_ceil(<vscale x 8 x i16> %x, i16 %y) {
+; CHECK-LABEL: vaaddu_vx_nxv8i16_ceil:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vaaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %xzv = zext <vscale x 8 x i16> %x to <vscale x 8 x i32>
+ %yhead = insertelement <vscale x 8 x i16> poison, i16 %y, i16 0
+ %ysplat = shufflevector <vscale x 8 x i16> %yhead, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %yzv = zext <vscale x 8 x i16> %ysplat to <vscale x 8 x i32>
+ %add = add nuw nsw <vscale x 8 x i32> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i32> poison, i32 1, i32 0
+ %splat = shufflevector <vscale x 8 x i32> %one, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %add1 = add nuw nsw <vscale x 8 x i32> %add, %splat
+ %div = lshr <vscale x 8 x i32> %add1, %splat
+ %ret = trunc <vscale x 8 x i32> %div to <vscale x 8 x i16>
+ ret <vscale x 8 x i16> %ret
+}
+
+define <vscale x 8 x i32> @vaaddu_vv_nxv8i32_ceil(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i32_ceil:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vaaddu.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %xzv = zext <vscale x 8 x i32> %x to <vscale x 8 x i64>
+ %yzv = zext <vscale x 8 x i32> %y to <vscale x 8 x i64>
+ %add = add nuw nsw <vscale x 8 x i64> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i64> poison, i64 1, i64 0
+ %splat = shufflevector <vscale x 8 x i64> %one, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %add1 = add nuw nsw <vscale x 8 x i64> %add, %splat
+ %div = lshr <vscale x 8 x i64> %add1, %splat
+ %ret = trunc <vscale x 8 x i64> %div to <vscale x 8 x i32>
+ ret <vscale x 8 x i32> %ret
+}
+
+define <vscale x 8 x i32> @vaaddu_vx_nxv8i32_ceil(<vscale x 8 x i32> %x, i32 %y) {
+; CHECK-LABEL: vaaddu_vx_nxv8i32_ceil:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vaaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %xzv = zext <vscale x 8 x i32> %x to <vscale x 8 x i64>
+ %yhead = insertelement <vscale x 8 x i32> poison, i32 %y, i32 0
+ %ysplat = shufflevector <vscale x 8 x i32> %yhead, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %yzv = zext <vscale x 8 x i32> %ysplat to <vscale x 8 x i64>
+ %add = add nuw nsw <vscale x 8 x i64> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i64> poison, i64 1, i64 0
+ %splat = shufflevector <vscale x 8 x i64> %one, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %add1 = add nuw nsw <vscale x 8 x i64> %add, %splat
+ %div = lshr <vscale x 8 x i64> %add1, %splat
+ %ret = trunc <vscale x 8 x i64> %div to <vscale x 8 x i32>
+ ret <vscale x 8 x i32> %ret
+}
+
+define <vscale x 8 x i64> @vaaddu_vv_nxv8i64_ceil(<vscale x 8 x i64> %x, <vscale x 8 x i64> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i64_ceil:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT: csrwi vxrm, 0
+; CHECK-NEXT: vaaddu.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %xzv = zext <vscale x 8 x i64> %x to <vscale x 8 x i128>
+ %yzv = zext <vscale x 8 x i64> %y to <vscale x 8 x i128>
+ %add = add nuw nsw <vscale x 8 x i128> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i128> poison, i128 1, i128 0
+ %splat = shufflevector <vscale x 8 x i128> %one, <vscale x 8 x i128> poison, <vscale x 8 x i32> zeroinitializer
+ %add1 = add nuw nsw <vscale x 8 x i128> %add, %splat
+ %div = lshr <vscale x 8 x i128> %add1, %splat
+ %ret = trunc <vscale x 8 x i128> %div to <vscale x 8 x i64>
+ ret <vscale x 8 x i64> %ret
+}
+
+define <vscale x 8 x i64> @vaaddu_vx_nxv8i64_ceil(<vscale x 8 x i64> %x, i64 %y) {
+; RV32-LABEL: vaaddu_vx_nxv8i64_ceil:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: csrwi vxrm, 0
+; RV32-NEXT: vaaddu.vv v8, v8, v16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vaaddu_vx_nxv8i64_ceil:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV64-NEXT: csrwi vxrm, 0
+; RV64-NEXT: vaaddu.vx v8, v8, a0
+; RV64-NEXT: ret
+ %xzv = zext <vscale x 8 x i64> %x to <vscale x 8 x i128>
+ %yhead = insertelement <vscale x 8 x i64> poison, i64 %y, i64 0
+ %ysplat = shufflevector <vscale x 8 x i64> %yhead, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %yzv = zext <vscale x 8 x i64> %ysplat to <vscale x 8 x i128>
+ %add = add nuw nsw <vscale x 8 x i128> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i128> poison, i128 1, i128 0
+ %splat = shufflevector <vscale x 8 x i128> %one, <vscale x 8 x i128> poison, <vscale x 8 x i32> zeroinitializer
+ %add1 = add nuw nsw <vscale x 8 x i128> %add, %splat
+ %div = lshr <vscale x 8 x i128> %add1, %splat
+ %ret = trunc <vscale x 8 x i128> %div to <vscale x 8 x i64>
+ ret <vscale x 8 x i64> %ret
+}
More information about the llvm-commits
mailing list