[llvm] [RISCV][ISel] Use vaaddu with rounding mode rdn for ISD::AVGFLOORU. (PR #76550)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 8 19:56:51 PST 2024
https://github.com/sun-jacobi updated https://github.com/llvm/llvm-project/pull/76550
>From 24797e60b48c8e52f59dcd3e6b7c0d97366d12bf Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Tue, 9 Jan 2024 12:54:51 +0900
Subject: [PATCH] [RISCV][ISel] Use vaaddu with rounding mode rdn for
ISD::AVGFLOORU.
This patch aims to use `vaaddu` with rounding mode rdn (i.e `vxrm[1:0] = 0b10`) for `ISD::AVGFLOORU`.
Source code
```
define <8 x i8> @vaaddu_auto(ptr %x, ptr %y, ptr %z) {
%xv = load <8 x i8>, ptr %x, align 2
%yv = load <8 x i8>, ptr %y, align 2
%xzv = zext <8 x i8> %xv to <8 x i16>
%yzv = zext <8 x i8> %yv to <8 x i16>
%add = add nuw nsw <8 x i16> %xzv, %yzv
%div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%ret = trunc <8 x i16> %div to <8 x i8>
ret <8 x i8> %ret
}
```
Before this patch
```
vaaddu_auto:
vsetivli zero, 8, e8, mf2, ta, ma
vle8.v v8, (a0)
vle8.v v9, (a1)
vwaddu.vv v10, v8, v9
vnsrl.wi v8, v10, 1
ret
```
After this patch
```
vaaddu_auto:
vsetivli zero, 8, e8, mf2, ta, ma
vle8.v v8, (a0)
vle8.v v9, (a1)
csrwi vxrm, 2
vaaddu.vv v8, v8, v9
ret
```
Note on signed averaging addition
Based on the rvv spec, there is also a variant for signed averaging addition called `vaadd`.
But AFAIU, no matter in which rounding mode, we cannot achieve the semantic of signed averaging addition through `vaadd`. Thus this patch only introduces `vaaddu`.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 18 +-
llvm/lib/Target/RISCV/RISCVISelLowering.h | 4 +
.../Target/RISCV/RISCVInstrInfoVSDPatterns.td | 16 ++
.../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 19 ++
.../CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll | 220 ++++++++++++++++++
llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll | 212 +++++++++++++++++
6 files changed, 482 insertions(+), 7 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 135b41c7a08502..9eef9538c6b920 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -814,8 +814,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
Custom);
setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
- setOperationAction(
- {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal);
+ setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT,
+ ISD::SSUBSAT, ISD::USUBSAT},
+ VT, Legal);
// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
// nodes which truncate by one power of two at a time.
@@ -1184,9 +1185,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
- setOperationAction(
- {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT,
- Custom);
+ setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT,
+ ISD::SSUBSAT, ISD::USUBSAT},
+ VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
@@ -5465,6 +5466,7 @@ static unsigned getRISCVVLOp(SDValue Op) {
OP_CASE(UADDSAT)
OP_CASE(SSUBSAT)
OP_CASE(USUBSAT)
+ OP_CASE(AVGFLOORU)
OP_CASE(FADD)
OP_CASE(FSUB)
OP_CASE(FMUL)
@@ -5569,7 +5571,7 @@ static bool hasMergeOp(unsigned Opcode) {
Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
- 124 &&
+ 125 &&
RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
ISD::FIRST_TARGET_STRICTFP_OPCODE ==
21 &&
@@ -5595,7 +5597,7 @@ static bool hasMaskOp(unsigned Opcode) {
Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
- 124 &&
+ 125 &&
RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
ISD::FIRST_TARGET_STRICTFP_OPCODE ==
21 &&
@@ -6459,6 +6461,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
!Subtarget.hasVInstructionsF16()))
return SplitVectorOp(Op, DAG);
[[fallthrough]];
+ case ISD::AVGFLOORU:
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
@@ -18595,6 +18598,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(UDIV_VL)
NODE_NAME_CASE(UREM_VL)
NODE_NAME_CASE(XOR_VL)
+ NODE_NAME_CASE(AVGFLOORU_VL)
NODE_NAME_CASE(SADDSAT_VL)
NODE_NAME_CASE(UADDSAT_VL)
NODE_NAME_CASE(SSUBSAT_VL)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 18f58057558166..5d51fe168b04de 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -253,6 +253,9 @@ enum NodeType : unsigned {
SSUBSAT_VL,
USUBSAT_VL,
+ // Averaging adds of unsigned integers.
+ AVGFLOORU_VL,
+
MULHS_VL,
MULHU_VL,
FADD_VL,
@@ -902,6 +905,7 @@ class RISCVTargetLowering : public TargetLowering {
SDValue lowerFixedLengthVectorSelectToRVV(SDValue Op,
SelectionDAG &DAG) const;
SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerUnsignedAvgFloor(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index b7c8457037947c..4f87c36506e520 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -1131,6 +1131,22 @@ defm : VPatBinarySDNode_VV_VX_VI<uaddsat, "PseudoVSADDU">;
defm : VPatBinarySDNode_VV_VX<ssubsat, "PseudoVSSUB">;
defm : VPatBinarySDNode_VV_VX<usubsat, "PseudoVSSUBU">;
+// 12.2. Vector Single-Width Averaging Add and Subtract
+foreach vti = AllIntegerVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2,
+ 0b10, vti.AVL, vti.Log2SEW, TA_MA)>;
+ def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (SplatPat (XLenVT GPR:$rs2)))),
+ (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2,
+ 0b10, vti.AVL, vti.Log2SEW, TA_MA)>;
+ }
+}
+
// 15. Vector Mask Instructions
// 15.1. Vector Mask-Register Logical Instructions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index ca9e37b9144b7a..d60ff4b5fab018 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -111,6 +111,7 @@ def riscv_ctlz_vl : SDNode<"RISCVISD::CTLZ_VL", SDT_RISCVIntUnOp_VL>
def riscv_cttz_vl : SDNode<"RISCVISD::CTTZ_VL", SDT_RISCVIntUnOp_VL>;
def riscv_ctpop_vl : SDNode<"RISCVISD::CTPOP_VL", SDT_RISCVIntUnOp_VL>;
+def riscv_avgflooru_vl : SDNode<"RISCVISD::AVGFLOORU_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_saddsat_vl : SDNode<"RISCVISD::SADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_uaddsat_vl : SDNode<"RISCVISD::UADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_ssubsat_vl : SDNode<"RISCVISD::SSUBSAT_VL", SDT_RISCVIntBinOp_VL>;
@@ -2306,6 +2307,24 @@ defm : VPatBinaryVL_VV_VX_VI<riscv_uaddsat_vl, "PseudoVSADDU">;
defm : VPatBinaryVL_VV_VX<riscv_ssubsat_vl, "PseudoVSSUB">;
defm : VPatBinaryVL_VV_VX<riscv_usubsat_vl, "PseudoVSSUBU">;
+// 12.2. Vector Single-Width Averaging Add and Subtract
+foreach vti = AllIntegerVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2),
+ vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (SplatPat (XLenVT GPR:$rs2))),
+ vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs1, GPR:$rs2,
+ (vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
+}
+
// 12.5. Vector Narrowing Fixed-Point Clip Instructions
class VPatTruncSatClipMaxMinBase<string inst,
VTypeInfo vti,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll
new file mode 100644
index 00000000000000..f6bdeda946c40a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll
@@ -0,0 +1,220 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+define <8 x i8> @vaaddu_vv_v8i8(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i8> %x to <8 x i16>
+ %yzv = zext <8 x i8> %y to <8 x i16>
+ %add = add nuw nsw <8 x i16> %xzv, %yzv
+ %div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %ret = trunc <8 x i16> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i8> @vaaddu_vx_v8i8(<8 x i8> %x, i8 %y) {
+; CHECK-LABEL: vaaddu_vx_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i8> %x to <8 x i16>
+ %yhead = insertelement <8 x i8> poison, i8 %y, i32 0
+ %ysplat = shufflevector <8 x i8> %yhead, <8 x i8> poison, <8 x i32> zeroinitializer
+ %yzv = zext <8 x i8> %ysplat to <8 x i16>
+ %add = add nuw nsw <8 x i16> %xzv, %yzv
+ %one = insertelement <8 x i16> poison, i16 1, i32 0
+ %splat = shufflevector <8 x i16> %one, <8 x i16> poison, <8 x i32> zeroinitializer
+ %div = lshr <8 x i16> %add, %splat
+ %ret = trunc <8 x i16> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+
+define <8 x i8> @vaaddu_vv_v8i8_sexti16(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_sexti16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vwadd.vv v10, v8, v9
+; CHECK-NEXT: vnsrl.wi v8, v10, 1
+; CHECK-NEXT: ret
+ %xzv = sext <8 x i8> %x to <8 x i16>
+ %yzv = sext <8 x i8> %y to <8 x i16>
+ %add = add nuw nsw <8 x i16> %xzv, %yzv
+ %div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %ret = trunc <8 x i16> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i8> @vaaddu_vv_v8i8_zexti32(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_zexti32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i8> %x to <8 x i32>
+ %yzv = zext <8 x i8> %y to <8 x i32>
+ %add = add nuw nsw <8 x i32> %xzv, %yzv
+ %div = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %ret = trunc <8 x i32> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i8> @vaaddu_vv_v8i8_lshr2(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_lshr2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vwaddu.vv v10, v8, v9
+; CHECK-NEXT: vnsrl.wi v8, v10, 2
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i8> %x to <8 x i16>
+ %yzv = zext <8 x i8> %y to <8 x i16>
+ %add = add nuw nsw <8 x i16> %xzv, %yzv
+ %div = lshr <8 x i16> %add, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+ %ret = trunc <8 x i16> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i16> @vaaddu_vv_v8i16(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i16> %x to <8 x i32>
+ %yzv = zext <8 x i16> %y to <8 x i32>
+ %add = add nuw nsw <8 x i32> %xzv, %yzv
+ %div = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %ret = trunc <8 x i32> %div to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+define <8 x i16> @vaaddu_vx_v8i16(<8 x i16> %x, i16 %y) {
+; CHECK-LABEL: vaaddu_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i16> %x to <8 x i32>
+ %yhead = insertelement <8 x i16> poison, i16 %y, i16 0
+ %ysplat = shufflevector <8 x i16> %yhead, <8 x i16> poison, <8 x i32> zeroinitializer
+ %yzv = zext <8 x i16> %ysplat to <8 x i32>
+ %add = add nuw nsw <8 x i32> %xzv, %yzv
+ %one = insertelement <8 x i32> poison, i32 1, i32 0
+ %splat = shufflevector <8 x i32> %one, <8 x i32> poison, <8 x i32> zeroinitializer
+ %div = lshr <8 x i32> %add, %splat
+ %ret = trunc <8 x i32> %div to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+define <8 x i32> @vaaddu_vv_v8i32(<8 x i32> %x, <8 x i32> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i32> %x to <8 x i64>
+ %yzv = zext <8 x i32> %y to <8 x i64>
+ %add = add nuw nsw <8 x i64> %xzv, %yzv
+ %div = lshr <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %ret = trunc <8 x i64> %div to <8 x i32>
+ ret <8 x i32> %ret
+}
+
+define <8 x i32> @vaaddu_vx_v8i32(<8 x i32> %x, i32 %y) {
+; CHECK-LABEL: vaaddu_vx_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i32> %x to <8 x i64>
+ %yhead = insertelement <8 x i32> poison, i32 %y, i32 0
+ %ysplat = shufflevector <8 x i32> %yhead, <8 x i32> poison, <8 x i32> zeroinitializer
+ %yzv = zext <8 x i32> %ysplat to <8 x i64>
+ %add = add nuw nsw <8 x i64> %xzv, %yzv
+ %one = insertelement <8 x i64> poison, i64 1, i64 0
+ %splat = shufflevector <8 x i64> %one, <8 x i64> poison, <8 x i32> zeroinitializer
+ %div = lshr <8 x i64> %add, %splat
+ %ret = trunc <8 x i64> %div to <8 x i32>
+ ret <8 x i32> %ret
+}
+
+define <8 x i64> @vaaddu_vv_v8i64(<8 x i64> %x, <8 x i64> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i64> %x to <8 x i128>
+ %yzv = zext <8 x i64> %y to <8 x i128>
+ %add = add nuw nsw <8 x i128> %xzv, %yzv
+ %div = lshr <8 x i128> %add, <i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1>
+ %ret = trunc <8 x i128> %div to <8 x i64>
+ ret <8 x i64> %ret
+}
+
+define <8 x i1> @vaaddu_vv_v8i1(<8 x i1> %x, <8 x i1> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v10, v9, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vim v8, v9, 1, v0
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v10, v8
+; CHECK-NEXT: vand.vi v8, v8, 1
+; CHECK-NEXT: vmsne.vi v0, v8, 0
+; CHECK-NEXT: ret
+ %xzv = zext <8 x i1> %x to <8 x i8>
+ %yzv = zext <8 x i1> %y to <8 x i8>
+ %add = add nuw nsw <8 x i8> %xzv, %yzv
+ %div = lshr <8 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %ret = trunc <8 x i8> %div to <8 x i1>
+ ret <8 x i1> %ret
+}
+
+define <8 x i64> @vaaddu_vx_v8i64(<8 x i64> %x, i64 %y) {
+; RV32-LABEL: vaaddu_vx_v8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT: vlse64.v v12, (a0), zero
+; RV32-NEXT: csrwi vxrm, 2
+; RV32-NEXT: vaaddu.vv v8, v8, v12
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vaaddu_vx_v8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT: csrwi vxrm, 2
+; RV64-NEXT: vaaddu.vx v8, v8, a0
+; RV64-NEXT: ret
+ %xzv = zext <8 x i64> %x to <8 x i128>
+ %yhead = insertelement <8 x i64> poison, i64 %y, i64 0
+ %ysplat = shufflevector <8 x i64> %yhead, <8 x i64> poison, <8 x i32> zeroinitializer
+ %yzv = zext <8 x i64> %ysplat to <8 x i128>
+ %add = add nuw nsw <8 x i128> %xzv, %yzv
+ %one = insertelement <8 x i128> poison, i128 1, i128 0
+ %splat = shufflevector <8 x i128> %one, <8 x i128> poison, <8 x i32> zeroinitializer
+ %div = lshr <8 x i128> %add, %splat
+ %ret = trunc <8 x i128> %div to <8 x i64>
+ ret <8 x i64> %ret
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll
new file mode 100644
index 00000000000000..883d605e77e262
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll
@@ -0,0 +1,212 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+define <vscale x 8 x i8> @vaaddu_vv_nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %xzv = zext <vscale x 8 x i8> %x to <vscale x 8 x i16>
+ %yzv = zext <vscale x 8 x i8> %y to <vscale x 8 x i16>
+ %add = add nuw nsw <vscale x 8 x i16> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i16> poison, i16 1, i32 0
+ %splat = shufflevector <vscale x 8 x i16> %one, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %div = lshr <vscale x 8 x i16> %add, %splat
+ %ret = trunc <vscale x 8 x i16> %div to <vscale x 8 x i8>
+ ret <vscale x 8 x i8> %ret
+}
+
+define <vscale x 8 x i8> @vaaddu_vx_nxv8i8(<vscale x 8 x i8> %x, i8 %y) {
+; CHECK-LABEL: vaaddu_vx_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %xzv = zext <vscale x 8 x i8> %x to <vscale x 8 x i16>
+ %yhead = insertelement <vscale x 8 x i8> poison, i8 %y, i32 0
+ %ysplat = shufflevector <vscale x 8 x i8> %yhead, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %yzv = zext <vscale x 8 x i8> %ysplat to <vscale x 8 x i16>
+ %add = add nuw nsw <vscale x 8 x i16> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i16> poison, i16 1, i32 0
+ %splat = shufflevector <vscale x 8 x i16> %one, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %div = lshr <vscale x 8 x i16> %add, %splat
+ %ret = trunc <vscale x 8 x i16> %div to <vscale x 8 x i8>
+ ret <vscale x 8 x i8> %ret
+}
+
+define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_sexti16(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i8_sexti16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vwadd.vv v10, v8, v9
+; CHECK-NEXT: vnsrl.wi v8, v10, 1
+; CHECK-NEXT: ret
+ %xzv = sext <vscale x 8 x i8> %x to <vscale x 8 x i16>
+ %yzv = sext <vscale x 8 x i8> %y to <vscale x 8 x i16>
+ %add = add nuw nsw <vscale x 8 x i16> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i16> poison, i16 1, i32 0
+ %splat = shufflevector <vscale x 8 x i16> %one, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %div = lshr <vscale x 8 x i16> %add, %splat
+ %ret = trunc <vscale x 8 x i16> %div to <vscale x 8 x i8>
+ ret <vscale x 8 x i8> %ret
+}
+
+define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_zexti32(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i8_zexti32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %xzv = zext <vscale x 8 x i8> %x to <vscale x 8 x i32>
+ %yzv = zext <vscale x 8 x i8> %y to <vscale x 8 x i32>
+ %add = add nuw nsw <vscale x 8 x i32> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i32> poison, i32 1, i32 0
+ %splat = shufflevector <vscale x 8 x i32> %one, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %div = lshr <vscale x 8 x i32> %add, %splat
+ %ret = trunc <vscale x 8 x i32> %div to <vscale x 8 x i8>
+ ret <vscale x 8 x i8> %ret
+}
+
+define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_lshr2(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i8_lshr2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vwaddu.vv v10, v8, v9
+; CHECK-NEXT: vnsrl.wi v8, v10, 2
+; CHECK-NEXT: ret
+ %xzv = zext <vscale x 8 x i8> %x to <vscale x 8 x i16>
+ %yzv = zext <vscale x 8 x i8> %y to <vscale x 8 x i16>
+ %add = add nuw nsw <vscale x 8 x i16> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i16> poison, i16 2, i32 0
+ %splat = shufflevector <vscale x 8 x i16> %one, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %div = lshr <vscale x 8 x i16> %add, %splat
+ %ret = trunc <vscale x 8 x i16> %div to <vscale x 8 x i8>
+ ret <vscale x 8 x i8> %ret
+}
+
+define <vscale x 8 x i16> @vaaddu_vv_nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %xzv = zext <vscale x 8 x i16> %x to <vscale x 8 x i32>
+ %yzv = zext <vscale x 8 x i16> %y to <vscale x 8 x i32>
+ %add = add nuw nsw <vscale x 8 x i32> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i32> poison, i32 1, i32 0
+ %splat = shufflevector <vscale x 8 x i32> %one, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %div = lshr <vscale x 8 x i32> %add, %splat
+ %ret = trunc <vscale x 8 x i32> %div to <vscale x 8 x i16>
+ ret <vscale x 8 x i16> %ret
+}
+
+define <vscale x 8 x i16> @vaaddu_vx_nxv8i16(<vscale x 8 x i16> %x, i16 %y) {
+; CHECK-LABEL: vaaddu_vx_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %xzv = zext <vscale x 8 x i16> %x to <vscale x 8 x i32>
+ %yhead = insertelement <vscale x 8 x i16> poison, i16 %y, i16 0
+ %ysplat = shufflevector <vscale x 8 x i16> %yhead, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %yzv = zext <vscale x 8 x i16> %ysplat to <vscale x 8 x i32>
+ %add = add nuw nsw <vscale x 8 x i32> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i32> poison, i32 1, i32 0
+ %splat = shufflevector <vscale x 8 x i32> %one, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %div = lshr <vscale x 8 x i32> %add, %splat
+ %ret = trunc <vscale x 8 x i32> %div to <vscale x 8 x i16>
+ ret <vscale x 8 x i16> %ret
+}
+
+define <vscale x 8 x i32> @vaaddu_vv_nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %xzv = zext <vscale x 8 x i32> %x to <vscale x 8 x i64>
+ %yzv = zext <vscale x 8 x i32> %y to <vscale x 8 x i64>
+ %add = add nuw nsw <vscale x 8 x i64> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i64> poison, i64 1, i64 0
+ %splat = shufflevector <vscale x 8 x i64> %one, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %div = lshr <vscale x 8 x i64> %add, %splat
+ %ret = trunc <vscale x 8 x i64> %div to <vscale x 8 x i32>
+ ret <vscale x 8 x i32> %ret
+}
+
+define <vscale x 8 x i32> @vaaddu_vx_nxv8i32(<vscale x 8 x i32> %x, i32 %y) {
+; CHECK-LABEL: vaaddu_vx_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %xzv = zext <vscale x 8 x i32> %x to <vscale x 8 x i64>
+ %yhead = insertelement <vscale x 8 x i32> poison, i32 %y, i32 0
+ %ysplat = shufflevector <vscale x 8 x i32> %yhead, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %yzv = zext <vscale x 8 x i32> %ysplat to <vscale x 8 x i64>
+ %add = add nuw nsw <vscale x 8 x i64> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i64> poison, i64 1, i64 0
+ %splat = shufflevector <vscale x 8 x i64> %one, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %div = lshr <vscale x 8 x i64> %add, %splat
+ %ret = trunc <vscale x 8 x i64> %div to <vscale x 8 x i32>
+ ret <vscale x 8 x i32> %ret
+}
+
+define <vscale x 8 x i64> @vaaddu_vv_nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %xzv = zext <vscale x 8 x i64> %x to <vscale x 8 x i128>
+ %yzv = zext <vscale x 8 x i64> %y to <vscale x 8 x i128>
+ %add = add nuw nsw <vscale x 8 x i128> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i128> poison, i128 1, i128 0
+ %splat = shufflevector <vscale x 8 x i128> %one, <vscale x 8 x i128> poison, <vscale x 8 x i32> zeroinitializer
+ %div = lshr <vscale x 8 x i128> %add, %splat
+ %ret = trunc <vscale x 8 x i128> %div to <vscale x 8 x i64>
+ ret <vscale x 8 x i64> %ret
+}
+
+define <vscale x 8 x i64> @vaaddu_vx_nxv8i64(<vscale x 8 x i64> %x, i64 %y) {
+; RV32-LABEL: vaaddu_vx_nxv8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: csrwi vxrm, 2
+; RV32-NEXT: vaaddu.vv v8, v8, v16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vaaddu_vx_nxv8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV64-NEXT: csrwi vxrm, 2
+; RV64-NEXT: vaaddu.vx v8, v8, a0
+; RV64-NEXT: ret
+ %xzv = zext <vscale x 8 x i64> %x to <vscale x 8 x i128>
+ %yhead = insertelement <vscale x 8 x i64> poison, i64 %y, i64 0
+ %ysplat = shufflevector <vscale x 8 x i64> %yhead, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %yzv = zext <vscale x 8 x i64> %ysplat to <vscale x 8 x i128>
+ %add = add nuw nsw <vscale x 8 x i128> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i128> poison, i128 1, i128 0
+ %splat = shufflevector <vscale x 8 x i128> %one, <vscale x 8 x i128> poison, <vscale x 8 x i32> zeroinitializer
+ %div = lshr <vscale x 8 x i128> %add, %splat
+ %ret = trunc <vscale x 8 x i128> %div to <vscale x 8 x i64>
+ ret <vscale x 8 x i64> %ret
+}
More information about the llvm-commits
mailing list