[llvm] [RISCV][ISel] Fold trunc (lshr (add (zext X), (zext Y)), 1) -> vaaddu X, Y (PR #76550)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 8 03:57:16 PST 2024
https://github.com/sun-jacobi updated https://github.com/llvm/llvm-project/pull/76550
>From 73947743e193fb3b6148a184df3e4a59cb69475a Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Fri, 5 Jan 2024 00:23:30 +0900
Subject: [PATCH 1/8] [RISCV][ISel] Implement combineUnsignedAvgFloor.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 97 +++++++++++++++++--
llvm/lib/Target/RISCV/RISCVISelLowering.h | 4 +
.../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 24 +++++
3 files changed, 119 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 27bb69dc9868c8..5fb1b9bfcfb74f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -859,6 +859,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
+ setOperationAction(ISD::AVGFLOORU, VT, Custom);
+
// Splice
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
@@ -1177,6 +1179,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL},
VT, Custom);
+ setOperationAction(ISD::AVGFLOORU, VT, Custom);
+
setOperationAction(
{ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom);
@@ -1375,7 +1379,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,
- ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
+ ISD::TRUNCATE, ISD::OR, ISD::XOR, ISD::SETCC,
+ ISD::SELECT});
if (Subtarget.is64Bit())
setTargetDAGCombine(ISD::SRA);
@@ -1385,9 +1390,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.hasStdExtZbb())
setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN});
- if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
- setTargetDAGCombine(ISD::TRUNCATE);
-
if (Subtarget.hasStdExtZbkb())
setTargetDAGCombine(ISD::BITREVERSE);
if (Subtarget.hasStdExtZfhminOrZhinxmin())
@@ -5501,6 +5503,8 @@ static unsigned getRISCVVLOp(SDValue Op) {
VP_CASE(CTLZ) // VP_CTLZ
VP_CASE(CTTZ) // VP_CTTZ
VP_CASE(CTPOP) // VP_CTPOP
+ case ISD::AVGFLOORU:
+ return RISCVISD::UAVGADD_VL;
case ISD::CTLZ_ZERO_UNDEF:
case ISD::VP_CTLZ_ZERO_UNDEF:
return RISCVISD::CTLZ_VL;
@@ -5563,7 +5567,7 @@ static bool hasMergeOp(unsigned Opcode) {
Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
- 125 &&
+ 126 &&
RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
ISD::FIRST_TARGET_STRICTFP_OPCODE ==
21 &&
@@ -5589,7 +5593,7 @@ static bool hasMaskOp(unsigned Opcode) {
Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
- 125 &&
+ 126 &&
RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
ISD::FIRST_TARGET_STRICTFP_OPCODE ==
21 &&
@@ -6438,6 +6442,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
return SDValue();
+ case ISD::AVGFLOORU:
+ return lowerUnsignedAvgFloor(Op, DAG);
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
@@ -10298,6 +10304,36 @@ SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
}
+// Lower vector AVGFLOORU(X, Y)
+SDValue RISCVTargetLowering::lowerUnsignedAvgFloor(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ assert((Op.getOpcode() == ISD::AVGFLOORU) &&
+ "Opcode should be ISD::AVGFLOORU");
+
+ MVT VT = Op.getSimpleValueType();
+ SDValue X = Op.getOperand(0);
+ SDValue Y = Op.getOperand(1);
+
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
+ Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
+ }
+
+ auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+
+ SDValue RM = DAG.getTargetConstant(0b10, DL, Subtarget.getXLenVT());
+ SDValue Result = DAG.getNode(RISCVISD::UAVGADD_VL, DL, ContainerVT,
+ {X, Y, DAG.getUNDEF(ContainerVT), Mask, VL, RM});
+
+ if (VT.isFixedLengthVector())
+ Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
+
+ return Result;
+}
+
// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
// * Operands of each node are assumed to be in the same order.
// * The EVL operand is promoted from i32 to i64 on RV64.
@@ -12357,6 +12393,51 @@ static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) {
N0.getOperand(0));
}
+static SDValue combineUnsignedAvgFloor(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+
+ if (!Subtarget.hasVInstructions())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ assert(N->getOpcode() == ISD::TRUNCATE && "Opcode should be ISD::TRUNCATE");
+
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ SDValue Srl = N->getOperand(0);
+
+ // (lshr X, 1)
+ if (!Srl.hasOneUse() || Srl.getOpcode() != ISD::SRL ||
+ !isOneOrOneSplat(Srl->getOperand(1)))
+ return SDValue();
+
+ SDValue WiddenAdd = Srl.getOperand(0);
+
+ if (!WiddenAdd.hasOneUse() || WiddenAdd.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ SDValue N0 = WiddenAdd.getOperand(0);
+ SDValue N1 = WiddenAdd.getOperand(1);
+
+ auto IsZext = [&](SDValue V) {
+ if (V.getOpcode() != ISD::ZERO_EXTEND)
+ return false;
+
+ return V.getOperand(0)->getValueType(0) == VT;
+ };
+
+ if (!IsZext(N0) || !IsZext(N1))
+ return SDValue();
+
+ SDLoc DL(N);
+ return DAG.getNode(ISD::AVGFLOORU, DL, VT, N0->getOperand(0),
+ N1->getOperand(0));
+}
+
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
if (SDValue V = combineAddOfBooleanXor(N, DAG))
@@ -12490,6 +12571,9 @@ static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) {
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
+ if (SDValue V = combineUnsignedAvgFloor(N, DAG, Subtarget))
+ return V;
+
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -18619,6 +18703,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(SMAX_VL)
NODE_NAME_CASE(UMIN_VL)
NODE_NAME_CASE(UMAX_VL)
+ NODE_NAME_CASE(UAVGADD_VL)
NODE_NAME_CASE(BITREVERSE_VL)
NODE_NAME_CASE(BSWAP_VL)
NODE_NAME_CASE(CTLZ_VL)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 58ed611efc83d1..911b2fcf2aec05 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -252,6 +252,9 @@ enum NodeType : unsigned {
UADDSAT_VL,
SSUBSAT_VL,
USUBSAT_VL,
+
+ // Averaging adds of unsigned integers.
+ UAVGADD_VL,
MULHS_VL,
MULHU_VL,
@@ -903,6 +906,7 @@ class RISCVTargetLowering : public TargetLowering {
SDValue lowerFixedLengthVectorSelectToRVV(SDValue Op,
SelectionDAG &DAG) const;
SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerUnsignedAvgFloor(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 5b50a4a78c018b..570bca5ca49086 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -57,6 +57,15 @@ def SDT_RISCVCopySign_VL : SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>,
SDTCisSameNumEltsAs<0, 4>,
SDTCisVT<5, XLenVT>]>;
+def SDT_RISCVIntBinOp_RM_VL : SDTypeProfile<1, 6, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisVec<0>, SDTCisInt<0>,
+ SDTCisSameAs<0, 3>,
+ SDTCVecEltisVT<4, i1>,
+ SDTCisSameNumEltsAs<0, 4>,
+ SDTCisVT<5, XLenVT>,
+ SDTCisVT<6, XLenVT>]>; // Rounding Mode
+
def riscv_vmv_v_v_vl : SDNode<"RISCVISD::VMV_V_V_VL",
SDTypeProfile<1, 3, [SDTCisVec<0>,
SDTCisSameAs<0, 1>,
@@ -115,6 +124,7 @@ def riscv_saddsat_vl : SDNode<"RISCVISD::SADDSAT_VL", SDT_RISCVIntBinOp_VL, [S
def riscv_uaddsat_vl : SDNode<"RISCVISD::UADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_ssubsat_vl : SDNode<"RISCVISD::SSUBSAT_VL", SDT_RISCVIntBinOp_VL>;
def riscv_usubsat_vl : SDNode<"RISCVISD::USUBSAT_VL", SDT_RISCVIntBinOp_VL>;
+def riscv_uavgadd_vl : SDNode<"RISCVISD::UAVGADD_VL", SDT_RISCVIntBinOp_RM_VL, [SDNPCommutative]>;
def riscv_fadd_vl : SDNode<"RISCVISD::FADD_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>;
def riscv_fsub_vl : SDNode<"RISCVISD::FSUB_VL", SDT_RISCVFPBinOp_VL>;
@@ -2338,6 +2348,20 @@ defm : VPatBinaryVL_VV_VX_VI<riscv_uaddsat_vl, "PseudoVSADDU">;
defm : VPatBinaryVL_VV_VX<riscv_ssubsat_vl, "PseudoVSSUB">;
defm : VPatBinaryVL_VV_VX<riscv_usubsat_vl, "PseudoVSSUBU">;
+// 12.2. Vector Single-Width Averaging Add and Subtract
+foreach vti = AllIntegerVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(riscv_uavgadd_vl (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2),
+ vti.RegClass:$merge, (vti.Mask V0), VLOpFrag,
+ (XLenVT timm:$rounding_mode)),
+ (!cast<Instruction>("PseudoVAADDU_VV_"# vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0), (XLenVT timm:$rounding_mode),
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
+}
+
// 12.5. Vector Narrowing Fixed-Point Clip Instructions
class VPatTruncSatClipMaxMinBase<string inst,
VTypeInfo vti,
>From e6b47bff4da74cf90a410ba5274732910d96dc87 Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Fri, 5 Jan 2024 00:32:47 +0900
Subject: [PATCH 2/8] [RISCV][ISel] add vaadd autogen test.
---
llvm/test/CodeGen/RISCV/rvv/vaadd-autogen.ll | 336 +++++++++++++++++++
1 file changed, 336 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vaadd-autogen.ll
diff --git a/llvm/test/CodeGen/RISCV/rvv/vaadd-autogen.ll b/llvm/test/CodeGen/RISCV/rvv/vaadd-autogen.ll
new file mode 100644
index 00000000000000..9fe865146c0eaf
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vaadd-autogen.ll
@@ -0,0 +1,336 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+
+
+define <8 x i8> @vaaddu_i8(ptr %x, ptr %y) {
+; CHECK-LABEL: vaaddu_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vle8.v v9, (a1)
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %xv = load <8 x i8>, ptr %x, align 2
+ %yv = load <8 x i8>, ptr %y, align 2
+ %xzv = zext <8 x i8> %xv to <8 x i16>
+ %yzv = zext <8 x i8> %yv to <8 x i16>
+ %add = add nuw nsw <8 x i16> %xzv, %yzv
+ %div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %ret = trunc <8 x i16> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+
+define <8 x i8> @vaaddu_i8_arg(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_i8_arg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %xz = zext <8 x i8> %x to <8 x i16>
+ %yz = zext <8 x i8> %y to <8 x i16>
+ %add = add nuw nsw <8 x i16> %xz, %yz
+ %div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %ret = trunc <8 x i16> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i8> @vaaddu_i8_sext(ptr %x, ptr %y) {
+; CHECK-LABEL: vaaddu_i8_sext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vle8.v v9, (a1)
+; CHECK-NEXT: vwadd.vv v10, v8, v9
+; CHECK-NEXT: vnsrl.wi v8, v10, 1
+; CHECK-NEXT: ret
+ %xv = load <8 x i8>, ptr %x, align 2
+ %yv = load <8 x i8>, ptr %y, align 2
+ %xzv = sext <8 x i8> %xv to <8 x i16>
+ %yzv = sext <8 x i8> %yv to <8 x i16>
+ %add = add nuw nsw <8 x i16> %xzv, %yzv
+ %div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %ret = trunc <8 x i16> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i8> @vaaddu_i8_zexti32(ptr %x, ptr %y) {
+; CHECK-LABEL: vaaddu_i8_zexti32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vle8.v v9, (a1)
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %xv = load <8 x i8>, ptr %x, align 2
+ %yv = load <8 x i8>, ptr %y, align 2
+ %xzv = zext <8 x i8> %xv to <8 x i32>
+ %yzv = zext <8 x i8> %yv to <8 x i32>
+ %add = add nuw nsw <8 x i32> %xzv, %yzv
+ %div = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %ret = trunc <8 x i32> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i8> @vaaddu_i8_lshr2(ptr %x, ptr %y) {
+; CHECK-LABEL: vaaddu_i8_lshr2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vle8.v v9, (a1)
+; CHECK-NEXT: vwaddu.vv v10, v8, v9
+; CHECK-NEXT: vnsrl.wi v8, v10, 2
+; CHECK-NEXT: ret
+ %xv = load <8 x i8>, ptr %x, align 2
+ %yv = load <8 x i8>, ptr %y, align 2
+ %xzv = zext <8 x i8> %xv to <8 x i16>
+ %yzv = zext <8 x i8> %yv to <8 x i16>
+ %add = add nuw nsw <8 x i16> %xzv, %yzv
+ %div = lshr <8 x i16> %add, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+ %ret = trunc <8 x i16> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i16> @vaaddu_i16(ptr %x, ptr %y) {
+; CHECK-LABEL: vaaddu_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %xv = load <8 x i16>, ptr %x, align 2
+ %yv = load <8 x i16>, ptr %y, align 2
+ %xzv = zext <8 x i16> %xv to <8 x i32>
+ %yzv = zext <8 x i16> %yv to <8 x i32>
+ %add = add nuw nsw <8 x i32> %xzv, %yzv
+ %div = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %ret = trunc <8 x i32> %div to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+
+define <8 x i32> @vaaddu_i32(ptr %x, ptr %y) {
+; CHECK-LABEL: vaaddu_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vle8.v v10, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %xv = load <8 x i32>, ptr %x, align 2
+ %yv = load <8 x i32>, ptr %y, align 2
+ %xzv = zext <8 x i32> %xv to <8 x i64>
+ %yzv = zext <8 x i32> %yv to <8 x i64>
+ %add = add nuw nsw <8 x i64> %xzv, %yzv
+ %div = lshr <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %ret = trunc <8 x i64> %div to <8 x i32>
+ ret <8 x i32> %ret
+}
+
+define <8 x i64> @vaaddu_i64(ptr %x, ptr %y) {
+; CHECK-LABEL: vaaddu_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 64
+; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vle8.v v12, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %xv = load <8 x i64>, ptr %x, align 2
+ %yv = load <8 x i64>, ptr %y, align 2
+ %xzv = zext <8 x i64> %xv to <8 x i128>
+ %yzv = zext <8 x i64> %yv to <8 x i128>
+ %add = add nuw nsw <8 x i128> %xzv, %yzv
+ %div = lshr <8 x i128> %add, <i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1>
+ %ret = trunc <8 x i128> %div to <8 x i64>
+ ret <8 x i64> %ret
+}
+
+
+define <8 x i1> @vaaddu_i1(ptr %x, ptr %y) {
+; CHECK-LABEL: vaaddu_i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vlm.v v0, (a0)
+; CHECK-NEXT: vlm.v v8, (a1)
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v10, v9, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vim v8, v9, 1, v0
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v10, v8
+; CHECK-NEXT: vand.vi v8, v8, 1
+; CHECK-NEXT: vmsne.vi v0, v8, 0
+; CHECK-NEXT: ret
+ %xv = load <8 x i1>, ptr %x, align 2
+ %yv = load <8 x i1>, ptr %y, align 2
+ %xzv = zext <8 x i1> %xv to <8 x i8>
+ %yzv = zext <8 x i1> %yv to <8 x i8>
+ %add = add nuw nsw <8 x i8> %xzv, %yzv
+ %div = lshr <8 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %ret = trunc <8 x i8> %div to <8 x i1>
+ ret <8 x i1> %ret
+}
+
+
+define <vscale x 8 x i8> @vaaddu_i8_vscale(ptr %x, ptr %y) {
+; CHECK-LABEL: vaaddu_i8_vscale:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1r.v v8, (a0)
+; CHECK-NEXT: vl1r.v v9, (a1)
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %xv = load <vscale x 8 x i8>, ptr %x, align 2
+ %yv = load <vscale x 8 x i8>, ptr %y, align 2
+ %xzv = zext <vscale x 8 x i8> %xv to <vscale x 8 x i16>
+ %yzv = zext <vscale x 8 x i8> %yv to <vscale x 8 x i16>
+ %add = add nuw nsw <vscale x 8 x i16> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i16> poison, i16 1, i32 0
+ %splat = shufflevector <vscale x 8 x i16> %one, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %div = lshr <vscale x 8 x i16> %add, %splat
+ %ret = trunc <vscale x 8 x i16> %div to <vscale x 8 x i8>
+ ret <vscale x 8 x i8> %ret
+}
+
+
+define <vscale x 8 x i8> @vaaddu_i8_sext_vscale(ptr %x, ptr %y) {
+; CHECK-LABEL: vaaddu_i8_sext_vscale:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1r.v v8, (a0)
+; CHECK-NEXT: vl1r.v v9, (a1)
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vwadd.vv v10, v8, v9
+; CHECK-NEXT: vnsrl.wi v8, v10, 1
+; CHECK-NEXT: ret
+ %xv = load <vscale x 8 x i8>, ptr %x, align 2
+ %yv = load <vscale x 8 x i8>, ptr %y, align 2
+ %xzv = sext <vscale x 8 x i8> %xv to <vscale x 8 x i16>
+ %yzv = sext <vscale x 8 x i8> %yv to <vscale x 8 x i16>
+ %add = add nuw nsw <vscale x 8 x i16> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i16> poison, i16 1, i32 0
+ %splat = shufflevector <vscale x 8 x i16> %one, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %div = lshr <vscale x 8 x i16> %add, %splat
+ %ret = trunc <vscale x 8 x i16> %div to <vscale x 8 x i8>
+ ret <vscale x 8 x i8> %ret
+}
+
+define <vscale x 8 x i8> @vaaddu_i8_zexti32_vscale(ptr %x, ptr %y) {
+; CHECK-LABEL: vaaddu_i8_zexti32_vscale:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1r.v v8, (a0)
+; CHECK-NEXT: vl1r.v v9, (a1)
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %xv = load <vscale x 8 x i8>, ptr %x, align 2
+ %yv = load <vscale x 8 x i8>, ptr %y, align 2
+ %xzv = zext <vscale x 8 x i8> %xv to <vscale x 8 x i32>
+ %yzv = zext <vscale x 8 x i8> %yv to <vscale x 8 x i32>
+ %add = add nuw nsw <vscale x 8 x i32> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i32> poison, i32 1, i32 0
+ %splat = shufflevector <vscale x 8 x i32> %one, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %div = lshr <vscale x 8 x i32> %add, %splat
+ %ret = trunc <vscale x 8 x i32> %div to <vscale x 8 x i8>
+ ret <vscale x 8 x i8> %ret
+}
+
+define <vscale x 8 x i8> @vaaddu_i8_lshr2_vscale(ptr %x, ptr %y) {
+; CHECK-LABEL: vaaddu_i8_lshr2_vscale:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1r.v v8, (a0)
+; CHECK-NEXT: vl1r.v v9, (a1)
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vwaddu.vv v10, v8, v9
+; CHECK-NEXT: vnsrl.wi v8, v10, 2
+; CHECK-NEXT: ret
+ %xv = load <vscale x 8 x i8>, ptr %x, align 2
+ %yv = load <vscale x 8 x i8>, ptr %y, align 2
+ %xzv = zext <vscale x 8 x i8> %xv to <vscale x 8 x i16>
+ %yzv = zext <vscale x 8 x i8> %yv to <vscale x 8 x i16>
+ %add = add nuw nsw <vscale x 8 x i16> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i16> poison, i16 2, i32 0
+ %splat = shufflevector <vscale x 8 x i16> %one, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %div = lshr <vscale x 8 x i16> %add, %splat
+ %ret = trunc <vscale x 8 x i16> %div to <vscale x 8 x i8>
+ ret <vscale x 8 x i8> %ret
+}
+
+define <vscale x 8 x i16> @vaaddu_i16_vscale(ptr %x, ptr %y) {
+; CHECK-LABEL: vaaddu_i16_vscale:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2re16.v v8, (a0)
+; CHECK-NEXT: vl2re16.v v10, (a1)
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %xv = load <vscale x 8 x i16>, ptr %x, align 2
+ %yv = load <vscale x 8 x i16>, ptr %y, align 2
+ %xzv = zext <vscale x 8 x i16> %xv to <vscale x 8 x i32>
+ %yzv = zext <vscale x 8 x i16> %yv to <vscale x 8 x i32>
+ %add = add nuw nsw <vscale x 8 x i32> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i32> poison, i32 1, i32 0
+ %splat = shufflevector <vscale x 8 x i32> %one, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %div = lshr <vscale x 8 x i32> %add, %splat
+ %ret = trunc <vscale x 8 x i32> %div to <vscale x 8 x i16>
+ ret <vscale x 8 x i16> %ret
+}
+
+
+define <vscale x 8 x i32> @vaaddu_i32_vscale(ptr %x, ptr %y) {
+; CHECK-LABEL: vaaddu_i32_vscale:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl4r.v v8, (a0)
+; CHECK-NEXT: vl4r.v v12, (a1)
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %xv = load <vscale x 8 x i32>, ptr %x, align 2
+ %yv = load <vscale x 8 x i32>, ptr %y, align 2
+ %xzv = zext <vscale x 8 x i32> %xv to <vscale x 8 x i64>
+ %yzv = zext <vscale x 8 x i32> %yv to <vscale x 8 x i64>
+ %add = add nuw nsw <vscale x 8 x i64> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i64> poison, i64 1, i64 0
+ %splat = shufflevector <vscale x 8 x i64> %one, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %div = lshr <vscale x 8 x i64> %add, %splat
+ %ret = trunc <vscale x 8 x i64> %div to <vscale x 8 x i32>
+ ret <vscale x 8 x i32> %ret
+}
+
+define <vscale x 8 x i64> @vaaddu_i64_vscale(ptr %x, ptr %y, ptr %z) {
+; CHECK-LABEL: vaaddu_i64_vscale:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl8r.v v8, (a0)
+; CHECK-NEXT: vl8r.v v16, (a1)
+; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %xv = load <vscale x 8 x i64>, ptr %x, align 2
+ %yv = load <vscale x 8 x i64>, ptr %y, align 2
+ %xzv = zext <vscale x 8 x i64> %xv to <vscale x 8 x i128>
+ %yzv = zext <vscale x 8 x i64> %yv to <vscale x 8 x i128>
+ %add = add nuw nsw <vscale x 8 x i128> %xzv, %yzv
+ %one = insertelement <vscale x 8 x i128> poison, i128 1, i128 0
+ %splat = shufflevector <vscale x 8 x i128> %one, <vscale x 8 x i128> poison, <vscale x 8 x i32> zeroinitializer
+ %div = lshr <vscale x 8 x i128> %add, %splat
+ %ret = trunc <vscale x 8 x i128> %div to <vscale x 8 x i64>
+ ret <vscale x 8 x i64> %ret
+}
>From 42c04b90af7fa4b4f66ddd930c75b9a633ad79b3 Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Mon, 8 Jan 2024 16:46:22 +0900
Subject: [PATCH 3/8] [RISCV][Isel] removed combineUnsignedAvgFloor.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 48 -------------------
.../{vaadd-autogen.ll => vaaddu-autogen.ll} | 6 +--
2 files changed, 2 insertions(+), 52 deletions(-)
rename llvm/test/CodeGen/RISCV/rvv/{vaadd-autogen.ll => vaaddu-autogen.ll} (98%)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5fb1b9bfcfb74f..8b887d13196eb4 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -12393,51 +12393,6 @@ static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) {
N0.getOperand(0));
}
-static SDValue combineUnsignedAvgFloor(SDNode *N, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
-
- if (!Subtarget.hasVInstructions())
- return SDValue();
-
- EVT VT = N->getValueType(0);
- if (!VT.isVector() || !VT.isInteger())
- return SDValue();
-
- assert(N->getOpcode() == ISD::TRUNCATE && "Opcode should be ISD::TRUNCATE");
-
- if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
- return SDValue();
-
- SDValue Srl = N->getOperand(0);
-
- // (lshr X, 1)
- if (!Srl.hasOneUse() || Srl.getOpcode() != ISD::SRL ||
- !isOneOrOneSplat(Srl->getOperand(1)))
- return SDValue();
-
- SDValue WiddenAdd = Srl.getOperand(0);
-
- if (!WiddenAdd.hasOneUse() || WiddenAdd.getOpcode() != ISD::ADD)
- return SDValue();
-
- SDValue N0 = WiddenAdd.getOperand(0);
- SDValue N1 = WiddenAdd.getOperand(1);
-
- auto IsZext = [&](SDValue V) {
- if (V.getOpcode() != ISD::ZERO_EXTEND)
- return false;
-
- return V.getOperand(0)->getValueType(0) == VT;
- };
-
- if (!IsZext(N0) || !IsZext(N1))
- return SDValue();
-
- SDLoc DL(N);
- return DAG.getNode(ISD::AVGFLOORU, DL, VT, N0->getOperand(0),
- N1->getOperand(0));
-}
-
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
if (SDValue V = combineAddOfBooleanXor(N, DAG))
@@ -12571,9 +12526,6 @@ static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) {
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
- if (SDValue V = combineUnsignedAvgFloor(N, DAG, Subtarget))
- return V;
-
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
diff --git a/llvm/test/CodeGen/RISCV/rvv/vaadd-autogen.ll b/llvm/test/CodeGen/RISCV/rvv/vaaddu-autogen.ll
similarity index 98%
rename from llvm/test/CodeGen/RISCV/rvv/vaadd-autogen.ll
rename to llvm/test/CodeGen/RISCV/rvv/vaaddu-autogen.ll
index 9fe865146c0eaf..428cc28c43f3d8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vaadd-autogen.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vaaddu-autogen.ll
@@ -1,8 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
define <8 x i8> @vaaddu_i8(ptr %x, ptr %y) {
>From 63e39fcb128fea59ec776ae1cf90cb8e11b24ba9 Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Mon, 8 Jan 2024 18:36:36 +0900
Subject: [PATCH 4/8] [RISCV][Isel] modify setTargetDAGCombine(ISD::TRUNCATE)
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8b887d13196eb4..20f94749efd8c6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1379,8 +1379,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,
- ISD::TRUNCATE, ISD::OR, ISD::XOR, ISD::SETCC,
- ISD::SELECT});
+ ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
if (Subtarget.is64Bit())
setTargetDAGCombine(ISD::SRA);
@@ -1390,6 +1389,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.hasStdExtZbb())
setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN});
+ if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
+ setTargetDAGCombine(ISD::TRUNCATE);
+
if (Subtarget.hasStdExtZbkb())
setTargetDAGCombine(ISD::BITREVERSE);
if (Subtarget.hasStdExtZfhminOrZhinxmin())
>From 4638ea5245ed4b01b11017fed211bc73f7cdcf66 Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Mon, 8 Jan 2024 18:38:43 +0900
Subject: [PATCH 5/8] [RISCV][Isel] add fixed-vectors-vaaddu.ll and
vaaddu-sdnode.ll.
---
.../CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll | 186 ++++++++++++++++++
.../{vaaddu-autogen.ll => vaaddu-sdnode.ll} | 182 -----------------
2 files changed, 186 insertions(+), 182 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll
rename llvm/test/CodeGen/RISCV/rvv/{vaaddu-autogen.ll => vaaddu-sdnode.ll} (51%)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll
new file mode 100644
index 00000000000000..7610ce3e218c5c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll
@@ -0,0 +1,186 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+
+
+define <8 x i8> @vaaddu_i8(ptr %x, ptr %y) {
+; CHECK-LABEL: vaaddu_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vle8.v v9, (a1)
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %xv = load <8 x i8>, ptr %x, align 2
+ %yv = load <8 x i8>, ptr %y, align 2
+ %xzv = zext <8 x i8> %xv to <8 x i16>
+ %yzv = zext <8 x i8> %yv to <8 x i16>
+ %add = add nuw nsw <8 x i16> %xzv, %yzv
+ %div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %ret = trunc <8 x i16> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+
+define <8 x i8> @vaaddu_i8_arg(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_i8_arg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %xz = zext <8 x i8> %x to <8 x i16>
+ %yz = zext <8 x i8> %y to <8 x i16>
+ %add = add nuw nsw <8 x i16> %xz, %yz
+ %div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %ret = trunc <8 x i16> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i8> @vaaddu_i8_sext(ptr %x, ptr %y) {
+; CHECK-LABEL: vaaddu_i8_sext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vle8.v v9, (a1)
+; CHECK-NEXT: vwadd.vv v10, v8, v9
+; CHECK-NEXT: vnsrl.wi v8, v10, 1
+; CHECK-NEXT: ret
+ %xv = load <8 x i8>, ptr %x, align 2
+ %yv = load <8 x i8>, ptr %y, align 2
+ %xzv = sext <8 x i8> %xv to <8 x i16>
+ %yzv = sext <8 x i8> %yv to <8 x i16>
+ %add = add nuw nsw <8 x i16> %xzv, %yzv
+ %div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %ret = trunc <8 x i16> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i8> @vaaddu_i8_zexti32(ptr %x, ptr %y) {
+; CHECK-LABEL: vaaddu_i8_zexti32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vle8.v v9, (a1)
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %xv = load <8 x i8>, ptr %x, align 2
+ %yv = load <8 x i8>, ptr %y, align 2
+ %xzv = zext <8 x i8> %xv to <8 x i32>
+ %yzv = zext <8 x i8> %yv to <8 x i32>
+ %add = add nuw nsw <8 x i32> %xzv, %yzv
+ %div = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %ret = trunc <8 x i32> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i8> @vaaddu_i8_lshr2(ptr %x, ptr %y) {
+; CHECK-LABEL: vaaddu_i8_lshr2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vle8.v v9, (a1)
+; CHECK-NEXT: vwaddu.vv v10, v8, v9
+; CHECK-NEXT: vnsrl.wi v8, v10, 2
+; CHECK-NEXT: ret
+ %xv = load <8 x i8>, ptr %x, align 2
+ %yv = load <8 x i8>, ptr %y, align 2
+ %xzv = zext <8 x i8> %xv to <8 x i16>
+ %yzv = zext <8 x i8> %yv to <8 x i16>
+ %add = add nuw nsw <8 x i16> %xzv, %yzv
+ %div = lshr <8 x i16> %add, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+ %ret = trunc <8 x i16> %div to <8 x i8>
+ ret <8 x i8> %ret
+}
+
+define <8 x i16> @vaaddu_i16(ptr %x, ptr %y) {
+; CHECK-LABEL: vaaddu_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %xv = load <8 x i16>, ptr %x, align 2
+ %yv = load <8 x i16>, ptr %y, align 2
+ %xzv = zext <8 x i16> %xv to <8 x i32>
+ %yzv = zext <8 x i16> %yv to <8 x i32>
+ %add = add nuw nsw <8 x i32> %xzv, %yzv
+ %div = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %ret = trunc <8 x i32> %div to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+
+define <8 x i32> @vaaddu_i32(ptr %x, ptr %y) {
+; CHECK-LABEL: vaaddu_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vle8.v v10, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %xv = load <8 x i32>, ptr %x, align 2
+ %yv = load <8 x i32>, ptr %y, align 2
+ %xzv = zext <8 x i32> %xv to <8 x i64>
+ %yzv = zext <8 x i32> %yv to <8 x i64>
+ %add = add nuw nsw <8 x i64> %xzv, %yzv
+ %div = lshr <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+ %ret = trunc <8 x i64> %div to <8 x i32>
+ ret <8 x i32> %ret
+}
+
+define <8 x i64> @vaaddu_i64(ptr %x, ptr %y) {
+; CHECK-LABEL: vaaddu_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 64
+; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vle8.v v12, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %xv = load <8 x i64>, ptr %x, align 2
+ %yv = load <8 x i64>, ptr %y, align 2
+ %xzv = zext <8 x i64> %xv to <8 x i128>
+ %yzv = zext <8 x i64> %yv to <8 x i128>
+ %add = add nuw nsw <8 x i128> %xzv, %yzv
+ %div = lshr <8 x i128> %add, <i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1>
+ %ret = trunc <8 x i128> %div to <8 x i64>
+ ret <8 x i64> %ret
+}
+
+
+define <8 x i1> @vaaddu_i1(ptr %x, ptr %y) {
+; CHECK-LABEL: vaaddu_i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vlm.v v0, (a0)
+; CHECK-NEXT: vlm.v v8, (a1)
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v10, v9, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vim v8, v9, 1, v0
+; CHECK-NEXT: csrwi vxrm, 2
+; CHECK-NEXT: vaaddu.vv v8, v10, v8
+; CHECK-NEXT: vand.vi v8, v8, 1
+; CHECK-NEXT: vmsne.vi v0, v8, 0
+; CHECK-NEXT: ret
+ %xv = load <8 x i1>, ptr %x, align 2
+ %yv = load <8 x i1>, ptr %y, align 2
+ %xzv = zext <8 x i1> %xv to <8 x i8>
+ %yzv = zext <8 x i1> %yv to <8 x i8>
+ %add = add nuw nsw <8 x i8> %xzv, %yzv
+ %div = lshr <8 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %ret = trunc <8 x i8> %div to <8 x i1>
+ ret <8 x i1> %ret
+}
+
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vaaddu-autogen.ll b/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll
similarity index 51%
rename from llvm/test/CodeGen/RISCV/rvv/vaaddu-autogen.ll
rename to llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll
index 428cc28c43f3d8..8066d3505c0156 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vaaddu-autogen.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll
@@ -2,188 +2,6 @@
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
-
-define <8 x i8> @vaaddu_i8(ptr %x, ptr %y) {
-; CHECK-LABEL: vaaddu_i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vle8.v v9, (a1)
-; CHECK-NEXT: csrwi vxrm, 2
-; CHECK-NEXT: vaaddu.vv v8, v8, v9
-; CHECK-NEXT: ret
- %xv = load <8 x i8>, ptr %x, align 2
- %yv = load <8 x i8>, ptr %y, align 2
- %xzv = zext <8 x i8> %xv to <8 x i16>
- %yzv = zext <8 x i8> %yv to <8 x i16>
- %add = add nuw nsw <8 x i16> %xzv, %yzv
- %div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
- %ret = trunc <8 x i16> %div to <8 x i8>
- ret <8 x i8> %ret
-}
-
-
-define <8 x i8> @vaaddu_i8_arg(<8 x i8> %x, <8 x i8> %y) {
-; CHECK-LABEL: vaaddu_i8_arg:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: csrwi vxrm, 2
-; CHECK-NEXT: vaaddu.vv v8, v8, v9
-; CHECK-NEXT: ret
- %xz = zext <8 x i8> %x to <8 x i16>
- %yz = zext <8 x i8> %y to <8 x i16>
- %add = add nuw nsw <8 x i16> %xz, %yz
- %div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
- %ret = trunc <8 x i16> %div to <8 x i8>
- ret <8 x i8> %ret
-}
-
-define <8 x i8> @vaaddu_i8_sext(ptr %x, ptr %y) {
-; CHECK-LABEL: vaaddu_i8_sext:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vle8.v v9, (a1)
-; CHECK-NEXT: vwadd.vv v10, v8, v9
-; CHECK-NEXT: vnsrl.wi v8, v10, 1
-; CHECK-NEXT: ret
- %xv = load <8 x i8>, ptr %x, align 2
- %yv = load <8 x i8>, ptr %y, align 2
- %xzv = sext <8 x i8> %xv to <8 x i16>
- %yzv = sext <8 x i8> %yv to <8 x i16>
- %add = add nuw nsw <8 x i16> %xzv, %yzv
- %div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
- %ret = trunc <8 x i16> %div to <8 x i8>
- ret <8 x i8> %ret
-}
-
-define <8 x i8> @vaaddu_i8_zexti32(ptr %x, ptr %y) {
-; CHECK-LABEL: vaaddu_i8_zexti32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vle8.v v9, (a1)
-; CHECK-NEXT: csrwi vxrm, 2
-; CHECK-NEXT: vaaddu.vv v8, v8, v9
-; CHECK-NEXT: ret
- %xv = load <8 x i8>, ptr %x, align 2
- %yv = load <8 x i8>, ptr %y, align 2
- %xzv = zext <8 x i8> %xv to <8 x i32>
- %yzv = zext <8 x i8> %yv to <8 x i32>
- %add = add nuw nsw <8 x i32> %xzv, %yzv
- %div = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
- %ret = trunc <8 x i32> %div to <8 x i8>
- ret <8 x i8> %ret
-}
-
-define <8 x i8> @vaaddu_i8_lshr2(ptr %x, ptr %y) {
-; CHECK-LABEL: vaaddu_i8_lshr2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vle8.v v9, (a1)
-; CHECK-NEXT: vwaddu.vv v10, v8, v9
-; CHECK-NEXT: vnsrl.wi v8, v10, 2
-; CHECK-NEXT: ret
- %xv = load <8 x i8>, ptr %x, align 2
- %yv = load <8 x i8>, ptr %y, align 2
- %xzv = zext <8 x i8> %xv to <8 x i16>
- %yzv = zext <8 x i8> %yv to <8 x i16>
- %add = add nuw nsw <8 x i16> %xzv, %yzv
- %div = lshr <8 x i16> %add, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
- %ret = trunc <8 x i16> %div to <8 x i8>
- ret <8 x i8> %ret
-}
-
-define <8 x i16> @vaaddu_i16(ptr %x, ptr %y) {
-; CHECK-LABEL: vaaddu_i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vle16.v v9, (a1)
-; CHECK-NEXT: csrwi vxrm, 2
-; CHECK-NEXT: vaaddu.vv v8, v8, v9
-; CHECK-NEXT: ret
- %xv = load <8 x i16>, ptr %x, align 2
- %yv = load <8 x i16>, ptr %y, align 2
- %xzv = zext <8 x i16> %xv to <8 x i32>
- %yzv = zext <8 x i16> %yv to <8 x i32>
- %add = add nuw nsw <8 x i32> %xzv, %yzv
- %div = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
- %ret = trunc <8 x i32> %div to <8 x i16>
- ret <8 x i16> %ret
-}
-
-
-define <8 x i32> @vaaddu_i32(ptr %x, ptr %y) {
-; CHECK-LABEL: vaaddu_i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 32
-; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vle8.v v10, (a1)
-; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: csrwi vxrm, 2
-; CHECK-NEXT: vaaddu.vv v8, v8, v10
-; CHECK-NEXT: ret
- %xv = load <8 x i32>, ptr %x, align 2
- %yv = load <8 x i32>, ptr %y, align 2
- %xzv = zext <8 x i32> %xv to <8 x i64>
- %yzv = zext <8 x i32> %yv to <8 x i64>
- %add = add nuw nsw <8 x i64> %xzv, %yzv
- %div = lshr <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
- %ret = trunc <8 x i64> %div to <8 x i32>
- ret <8 x i32> %ret
-}
-
-define <8 x i64> @vaaddu_i64(ptr %x, ptr %y) {
-; CHECK-LABEL: vaaddu_i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 64
-; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vle8.v v12, (a1)
-; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT: csrwi vxrm, 2
-; CHECK-NEXT: vaaddu.vv v8, v8, v12
-; CHECK-NEXT: ret
- %xv = load <8 x i64>, ptr %x, align 2
- %yv = load <8 x i64>, ptr %y, align 2
- %xzv = zext <8 x i64> %xv to <8 x i128>
- %yzv = zext <8 x i64> %yv to <8 x i128>
- %add = add nuw nsw <8 x i128> %xzv, %yzv
- %div = lshr <8 x i128> %add, <i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1>
- %ret = trunc <8 x i128> %div to <8 x i64>
- ret <8 x i64> %ret
-}
-
-
-define <8 x i1> @vaaddu_i1(ptr %x, ptr %y) {
-; CHECK-LABEL: vaaddu_i1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vlm.v v0, (a0)
-; CHECK-NEXT: vlm.v v8, (a1)
-; CHECK-NEXT: vmv.v.i v9, 0
-; CHECK-NEXT: vmerge.vim v10, v9, 1, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v9, 1, v0
-; CHECK-NEXT: csrwi vxrm, 2
-; CHECK-NEXT: vaaddu.vv v8, v10, v8
-; CHECK-NEXT: vand.vi v8, v8, 1
-; CHECK-NEXT: vmsne.vi v0, v8, 0
-; CHECK-NEXT: ret
- %xv = load <8 x i1>, ptr %x, align 2
- %yv = load <8 x i1>, ptr %y, align 2
- %xzv = zext <8 x i1> %xv to <8 x i8>
- %yzv = zext <8 x i1> %yv to <8 x i8>
- %add = add nuw nsw <8 x i8> %xzv, %yzv
- %div = lshr <8 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
- %ret = trunc <8 x i8> %div to <8 x i1>
- ret <8 x i1> %ret
-}
-
-
define <vscale x 8 x i8> @vaaddu_i8_vscale(ptr %x, ptr %y) {
; CHECK-LABEL: vaaddu_i8_vscale:
; CHECK: # %bb.0:
>From 7e6e29f05864d52b27533bc7f39aa9464ba923a5 Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Mon, 8 Jan 2024 20:44:44 +0900
Subject: [PATCH 6/8] [RISCV][Isel] add avgflooru pattern in
RISCVInstrInfoVSDPatterns.td.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 53 ++++---------------
llvm/lib/Target/RISCV/RISCVISelLowering.h | 2 +-
.../Target/RISCV/RISCVInstrInfoVSDPatterns.td | 11 ++++
.../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 23 +++-----
4 files changed, 27 insertions(+), 62 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 20f94749efd8c6..cfe6e52eaa0771 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -814,8 +814,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
Custom);
setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
- setOperationAction(
- {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal);
+ setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT,
+ ISD::SSUBSAT, ISD::USUBSAT},
+ VT, Legal);
// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
// nodes which truncate by one power of two at a time.
@@ -859,8 +860,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
- setOperationAction(ISD::AVGFLOORU, VT, Custom);
-
// Splice
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
@@ -1179,8 +1178,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL},
VT, Custom);
- setOperationAction(ISD::AVGFLOORU, VT, Custom);
-
setOperationAction(
{ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom);
@@ -1188,9 +1185,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
- setOperationAction(
- {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT,
- Custom);
+ setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT,
+ ISD::SSUBSAT, ISD::USUBSAT},
+ VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
@@ -5462,6 +5459,7 @@ static unsigned getRISCVVLOp(SDValue Op) {
OP_CASE(UADDSAT)
OP_CASE(SSUBSAT)
OP_CASE(USUBSAT)
+ OP_CASE(AVGFLOORU)
OP_CASE(FADD)
OP_CASE(FSUB)
OP_CASE(FMUL)
@@ -5505,8 +5503,6 @@ static unsigned getRISCVVLOp(SDValue Op) {
VP_CASE(CTLZ) // VP_CTLZ
VP_CASE(CTTZ) // VP_CTTZ
VP_CASE(CTPOP) // VP_CTPOP
- case ISD::AVGFLOORU:
- return RISCVISD::UAVGADD_VL;
case ISD::CTLZ_ZERO_UNDEF:
case ISD::VP_CTLZ_ZERO_UNDEF:
return RISCVISD::CTLZ_VL;
@@ -6444,8 +6440,6 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
return SDValue();
- case ISD::AVGFLOORU:
- return lowerUnsignedAvgFloor(Op, DAG);
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
@@ -6461,6 +6455,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
!Subtarget.hasVInstructionsF16()))
return SplitVectorOp(Op, DAG);
[[fallthrough]];
+ case ISD::AVGFLOORU:
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
@@ -10306,36 +10301,6 @@ SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
}
-// Lower vector AVGFLOORU(X, Y)
-SDValue RISCVTargetLowering::lowerUnsignedAvgFloor(SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc DL(Op);
- assert((Op.getOpcode() == ISD::AVGFLOORU) &&
- "Opcode should be ISD::AVGFLOORU");
-
- MVT VT = Op.getSimpleValueType();
- SDValue X = Op.getOperand(0);
- SDValue Y = Op.getOperand(1);
-
- MVT ContainerVT = VT;
- if (VT.isFixedLengthVector()) {
- ContainerVT = getContainerForFixedLengthVector(VT);
- X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
- Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
- }
-
- auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
-
- SDValue RM = DAG.getTargetConstant(0b10, DL, Subtarget.getXLenVT());
- SDValue Result = DAG.getNode(RISCVISD::UAVGADD_VL, DL, ContainerVT,
- {X, Y, DAG.getUNDEF(ContainerVT), Mask, VL, RM});
-
- if (VT.isFixedLengthVector())
- Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
-
- return Result;
-}
-
// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
// * Operands of each node are assumed to be in the same order.
// * The EVL operand is promoted from i32 to i64 on RV64.
@@ -18632,6 +18597,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(UDIV_VL)
NODE_NAME_CASE(UREM_VL)
NODE_NAME_CASE(XOR_VL)
+ NODE_NAME_CASE(AVGFLOORU_VL)
NODE_NAME_CASE(SADDSAT_VL)
NODE_NAME_CASE(UADDSAT_VL)
NODE_NAME_CASE(SSUBSAT_VL)
@@ -18657,7 +18623,6 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(SMAX_VL)
NODE_NAME_CASE(UMIN_VL)
NODE_NAME_CASE(UMAX_VL)
- NODE_NAME_CASE(UAVGADD_VL)
NODE_NAME_CASE(BITREVERSE_VL)
NODE_NAME_CASE(BSWAP_VL)
NODE_NAME_CASE(CTLZ_VL)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 911b2fcf2aec05..25fd872ac2b082 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -254,7 +254,7 @@ enum NodeType : unsigned {
USUBSAT_VL,
// Averaging adds of unsigned integers.
- UAVGADD_VL,
+ AVGFLOORU_VL,
MULHS_VL,
MULHU_VL,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index b7c8457037947c..8f8b0a029639e8 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -1131,6 +1131,17 @@ defm : VPatBinarySDNode_VV_VX_VI<uaddsat, "PseudoVSADDU">;
defm : VPatBinarySDNode_VV_VX<ssubsat, "PseudoVSSUB">;
defm : VPatBinarySDNode_VV_VX<usubsat, "PseudoVSSUBU">;
+// 12.2. Vector Single-Width Averaging Add and Subtract
+foreach vti = AllIntegerVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVAADDU_VV_"# vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2,
+ 0b10, vti.AVL, vti.Log2SEW, TA_MA)>;
+ }
+}
+
// 15. Vector Mask Instructions
// 15.1. Vector Mask-Register Logical Instructions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 570bca5ca49086..943ed02995d129 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -57,15 +57,6 @@ def SDT_RISCVCopySign_VL : SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>,
SDTCisSameNumEltsAs<0, 4>,
SDTCisVT<5, XLenVT>]>;
-def SDT_RISCVIntBinOp_RM_VL : SDTypeProfile<1, 6, [SDTCisSameAs<0, 1>,
- SDTCisSameAs<0, 2>,
- SDTCisVec<0>, SDTCisInt<0>,
- SDTCisSameAs<0, 3>,
- SDTCVecEltisVT<4, i1>,
- SDTCisSameNumEltsAs<0, 4>,
- SDTCisVT<5, XLenVT>,
- SDTCisVT<6, XLenVT>]>; // Rounding Mode
-
def riscv_vmv_v_v_vl : SDNode<"RISCVISD::VMV_V_V_VL",
SDTypeProfile<1, 3, [SDTCisVec<0>,
SDTCisSameAs<0, 1>,
@@ -120,11 +111,11 @@ def riscv_ctlz_vl : SDNode<"RISCVISD::CTLZ_VL", SDT_RISCVIntUnOp_VL>
def riscv_cttz_vl : SDNode<"RISCVISD::CTTZ_VL", SDT_RISCVIntUnOp_VL>;
def riscv_ctpop_vl : SDNode<"RISCVISD::CTPOP_VL", SDT_RISCVIntUnOp_VL>;
+def riscv_avgflooru_vl : SDNode<"RISCVISD::AVGFLOORU_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_saddsat_vl : SDNode<"RISCVISD::SADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_uaddsat_vl : SDNode<"RISCVISD::UADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_ssubsat_vl : SDNode<"RISCVISD::SSUBSAT_VL", SDT_RISCVIntBinOp_VL>;
def riscv_usubsat_vl : SDNode<"RISCVISD::USUBSAT_VL", SDT_RISCVIntBinOp_VL>;
-def riscv_uavgadd_vl : SDNode<"RISCVISD::UAVGADD_VL", SDT_RISCVIntBinOp_RM_VL, [SDNPCommutative]>;
def riscv_fadd_vl : SDNode<"RISCVISD::FADD_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>;
def riscv_fsub_vl : SDNode<"RISCVISD::FSUB_VL", SDT_RISCVFPBinOp_VL>;
@@ -2351,14 +2342,12 @@ defm : VPatBinaryVL_VV_VX<riscv_usubsat_vl, "PseudoVSSUBU">;
// 12.2. Vector Single-Width Averaging Add and Subtract
foreach vti = AllIntegerVectors in {
let Predicates = GetVTypePredicates<vti>.Predicates in {
- def : Pat<(riscv_uavgadd_vl (vti.Vector vti.RegClass:$rs1),
- (vti.Vector vti.RegClass:$rs2),
- vti.RegClass:$merge, (vti.Mask V0), VLOpFrag,
- (XLenVT timm:$rounding_mode)),
+ def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2),
+ vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
(!cast<Instruction>("PseudoVAADDU_VV_"# vti.LMul.MX#"_MASK")
- vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask V0), (XLenVT timm:$rounding_mode),
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
}
>From 73a42075a709a707aa866aa576355407cf39ce85 Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Mon, 8 Jan 2024 20:46:15 +0900
Subject: [PATCH 7/8] [RISCV][Isel] remove extra space.
---
llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td | 2 +-
llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 8f8b0a029639e8..43aca2436e59f9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -1136,7 +1136,7 @@ foreach vti = AllIntegerVectors in {
let Predicates = GetVTypePredicates<vti>.Predicates in {
def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1),
(vti.Vector vti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVAADDU_VV_"# vti.LMul.MX)
+ (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2,
0b10, vti.AVL, vti.Log2SEW, TA_MA)>;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 943ed02995d129..ab1841090d4d20 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -2345,7 +2345,7 @@ foreach vti = AllIntegerVectors in {
def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1),
(vti.Vector vti.RegClass:$rs2),
vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
- (!cast<Instruction>("PseudoVAADDU_VV_"# vti.LMul.MX#"_MASK")
+ (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX#"_MASK")
vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2,
(vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
>From 80841a3338d0e3d70adc841d1941937d7ab36f7a Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Mon, 8 Jan 2024 20:56:59 +0900
Subject: [PATCH 8/8] [RISCV][Isel] refactor vaaddu-sdnode.ll and
fixed-vectors-vaaddu.ll
---
.../CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll | 113 +++++-------------
llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll | 84 +++++--------
2 files changed, 59 insertions(+), 138 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll
index 7610ce3e218c5c..e2ba7dc41ead84 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll
@@ -3,19 +3,15 @@
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
-define <8 x i8> @vaaddu_i8(ptr %x, ptr %y) {
-; CHECK-LABEL: vaaddu_i8:
+define <8 x i8> @vaaddu_vv_v8i8(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vle8.v v9, (a1)
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: vaaddu.vv v8, v8, v9
; CHECK-NEXT: ret
- %xv = load <8 x i8>, ptr %x, align 2
- %yv = load <8 x i8>, ptr %y, align 2
- %xzv = zext <8 x i8> %xv to <8 x i16>
- %yzv = zext <8 x i8> %yv to <8 x i16>
+ %xzv = zext <8 x i8> %x to <8 x i16>
+ %yzv = zext <8 x i8> %y to <8 x i16>
%add = add nuw nsw <8 x i16> %xzv, %yzv
%div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%ret = trunc <8 x i16> %div to <8 x i8>
@@ -23,91 +19,60 @@ define <8 x i8> @vaaddu_i8(ptr %x, ptr %y) {
}
-define <8 x i8> @vaaddu_i8_arg(<8 x i8> %x, <8 x i8> %y) {
-; CHECK-LABEL: vaaddu_i8_arg:
+define <8 x i8> @vaaddu_vv_v8i8_sexti16(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_sexti16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: csrwi vxrm, 2
-; CHECK-NEXT: vaaddu.vv v8, v8, v9
-; CHECK-NEXT: ret
- %xz = zext <8 x i8> %x to <8 x i16>
- %yz = zext <8 x i8> %y to <8 x i16>
- %add = add nuw nsw <8 x i16> %xz, %yz
- %div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
- %ret = trunc <8 x i16> %div to <8 x i8>
- ret <8 x i8> %ret
-}
-
-define <8 x i8> @vaaddu_i8_sext(ptr %x, ptr %y) {
-; CHECK-LABEL: vaaddu_i8_sext:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vle8.v v9, (a1)
; CHECK-NEXT: vwadd.vv v10, v8, v9
; CHECK-NEXT: vnsrl.wi v8, v10, 1
; CHECK-NEXT: ret
- %xv = load <8 x i8>, ptr %x, align 2
- %yv = load <8 x i8>, ptr %y, align 2
- %xzv = sext <8 x i8> %xv to <8 x i16>
- %yzv = sext <8 x i8> %yv to <8 x i16>
+ %xzv = sext <8 x i8> %x to <8 x i16>
+ %yzv = sext <8 x i8> %y to <8 x i16>
%add = add nuw nsw <8 x i16> %xzv, %yzv
%div = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%ret = trunc <8 x i16> %div to <8 x i8>
ret <8 x i8> %ret
}
-define <8 x i8> @vaaddu_i8_zexti32(ptr %x, ptr %y) {
-; CHECK-LABEL: vaaddu_i8_zexti32:
+define <8 x i8> @vaaddu_vv_v8i8_zexti32(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i8_zexti32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vle8.v v9, (a1)
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: vaaddu.vv v8, v8, v9
; CHECK-NEXT: ret
- %xv = load <8 x i8>, ptr %x, align 2
- %yv = load <8 x i8>, ptr %y, align 2
- %xzv = zext <8 x i8> %xv to <8 x i32>
- %yzv = zext <8 x i8> %yv to <8 x i32>
+ %xzv = zext <8 x i8> %x to <8 x i32>
+ %yzv = zext <8 x i8> %y to <8 x i32>
%add = add nuw nsw <8 x i32> %xzv, %yzv
%div = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%ret = trunc <8 x i32> %div to <8 x i8>
ret <8 x i8> %ret
}
-define <8 x i8> @vaaddu_i8_lshr2(ptr %x, ptr %y) {
+define <8 x i8> @vaaddu_i8_lshr2(<8 x i8> %x, <8 x i8> %y) {
; CHECK-LABEL: vaaddu_i8_lshr2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vle8.v v9, (a1)
; CHECK-NEXT: vwaddu.vv v10, v8, v9
; CHECK-NEXT: vnsrl.wi v8, v10, 2
; CHECK-NEXT: ret
- %xv = load <8 x i8>, ptr %x, align 2
- %yv = load <8 x i8>, ptr %y, align 2
- %xzv = zext <8 x i8> %xv to <8 x i16>
- %yzv = zext <8 x i8> %yv to <8 x i16>
+ %xzv = zext <8 x i8> %x to <8 x i16>
+ %yzv = zext <8 x i8> %y to <8 x i16>
%add = add nuw nsw <8 x i16> %xzv, %yzv
%div = lshr <8 x i16> %add, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
%ret = trunc <8 x i16> %div to <8 x i8>
ret <8 x i8> %ret
}
-define <8 x i16> @vaaddu_i16(ptr %x, ptr %y) {
-; CHECK-LABEL: vaaddu_i16:
+define <8 x i16> @vaaddu_vv_v8i16(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vle16.v v9, (a1)
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: vaaddu.vv v8, v8, v9
; CHECK-NEXT: ret
- %xv = load <8 x i16>, ptr %x, align 2
- %yv = load <8 x i16>, ptr %y, align 2
- %xzv = zext <8 x i16> %xv to <8 x i32>
- %yzv = zext <8 x i16> %yv to <8 x i32>
+ %xzv = zext <8 x i16> %x to <8 x i32>
+ %yzv = zext <8 x i16> %y to <8 x i32>
%add = add nuw nsw <8 x i32> %xzv, %yzv
%div = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%ret = trunc <8 x i32> %div to <8 x i16>
@@ -115,42 +80,30 @@ define <8 x i16> @vaaddu_i16(ptr %x, ptr %y) {
}
-define <8 x i32> @vaaddu_i32(ptr %x, ptr %y) {
-; CHECK-LABEL: vaaddu_i32:
+define <8 x i32> @vaaddu_vv_v8i32(<8 x i32> %x, <8 x i32> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 32
-; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vle8.v v10, (a1)
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: vaaddu.vv v8, v8, v10
; CHECK-NEXT: ret
- %xv = load <8 x i32>, ptr %x, align 2
- %yv = load <8 x i32>, ptr %y, align 2
- %xzv = zext <8 x i32> %xv to <8 x i64>
- %yzv = zext <8 x i32> %yv to <8 x i64>
+ %xzv = zext <8 x i32> %x to <8 x i64>
+ %yzv = zext <8 x i32> %y to <8 x i64>
%add = add nuw nsw <8 x i64> %xzv, %yzv
%div = lshr <8 x i64> %add, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
%ret = trunc <8 x i64> %div to <8 x i32>
ret <8 x i32> %ret
}
-define <8 x i64> @vaaddu_i64(ptr %x, ptr %y) {
-; CHECK-LABEL: vaaddu_i64:
+define <8 x i64> @vaaddu_vv_v8i64(<8 x i64> %x, <8 x i64> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 64
-; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vle8.v v12, (a1)
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: vaaddu.vv v8, v8, v12
; CHECK-NEXT: ret
- %xv = load <8 x i64>, ptr %x, align 2
- %yv = load <8 x i64>, ptr %y, align 2
- %xzv = zext <8 x i64> %xv to <8 x i128>
- %yzv = zext <8 x i64> %yv to <8 x i128>
+ %xzv = zext <8 x i64> %x to <8 x i128>
+ %yzv = zext <8 x i64> %y to <8 x i128>
%add = add nuw nsw <8 x i128> %xzv, %yzv
%div = lshr <8 x i128> %add, <i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1, i128 1>
%ret = trunc <8 x i128> %div to <8 x i64>
@@ -158,12 +111,10 @@ define <8 x i64> @vaaddu_i64(ptr %x, ptr %y) {
}
-define <8 x i1> @vaaddu_i1(ptr %x, ptr %y) {
-; CHECK-LABEL: vaaddu_i1:
+define <8 x i1> @vaaddu_vv_v8i1(<8 x i1> %x, <8 x i1> %y) {
+; CHECK-LABEL: vaaddu_vv_v8i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vlm.v v0, (a0)
-; CHECK-NEXT: vlm.v v8, (a1)
; CHECK-NEXT: vmv.v.i v9, 0
; CHECK-NEXT: vmerge.vim v10, v9, 1, v0
; CHECK-NEXT: vmv1r.v v0, v8
@@ -173,10 +124,8 @@ define <8 x i1> @vaaddu_i1(ptr %x, ptr %y) {
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
- %xv = load <8 x i1>, ptr %x, align 2
- %yv = load <8 x i1>, ptr %y, align 2
- %xzv = zext <8 x i1> %xv to <8 x i8>
- %yzv = zext <8 x i1> %yv to <8 x i8>
+ %xzv = zext <8 x i1> %x to <8 x i8>
+ %yzv = zext <8 x i1> %y to <8 x i8>
%add = add nuw nsw <8 x i8> %xzv, %yzv
%div = lshr <8 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%ret = trunc <8 x i8> %div to <8 x i1>
diff --git a/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll
index 8066d3505c0156..19be07e2844b61 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll
@@ -2,19 +2,15 @@
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
-define <vscale x 8 x i8> @vaaddu_i8_vscale(ptr %x, ptr %y) {
-; CHECK-LABEL: vaaddu_i8_vscale:
+define <vscale x 8 x i8> @vaaddu_vv_nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vl1r.v v8, (a0)
-; CHECK-NEXT: vl1r.v v9, (a1)
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: vaaddu.vv v8, v8, v9
; CHECK-NEXT: ret
- %xv = load <vscale x 8 x i8>, ptr %x, align 2
- %yv = load <vscale x 8 x i8>, ptr %y, align 2
- %xzv = zext <vscale x 8 x i8> %xv to <vscale x 8 x i16>
- %yzv = zext <vscale x 8 x i8> %yv to <vscale x 8 x i16>
+ %xzv = zext <vscale x 8 x i8> %x to <vscale x 8 x i16>
+ %yzv = zext <vscale x 8 x i8> %y to <vscale x 8 x i16>
%add = add nuw nsw <vscale x 8 x i16> %xzv, %yzv
%one = insertelement <vscale x 8 x i16> poison, i16 1, i32 0
%splat = shufflevector <vscale x 8 x i16> %one, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -24,19 +20,15 @@ define <vscale x 8 x i8> @vaaddu_i8_vscale(ptr %x, ptr %y) {
}
-define <vscale x 8 x i8> @vaaddu_i8_sext_vscale(ptr %x, ptr %y) {
-; CHECK-LABEL: vaaddu_i8_sext_vscale:
+define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_sexti16(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i8_sexti16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vl1r.v v8, (a0)
-; CHECK-NEXT: vl1r.v v9, (a1)
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; CHECK-NEXT: vwadd.vv v10, v8, v9
; CHECK-NEXT: vnsrl.wi v8, v10, 1
; CHECK-NEXT: ret
- %xv = load <vscale x 8 x i8>, ptr %x, align 2
- %yv = load <vscale x 8 x i8>, ptr %y, align 2
- %xzv = sext <vscale x 8 x i8> %xv to <vscale x 8 x i16>
- %yzv = sext <vscale x 8 x i8> %yv to <vscale x 8 x i16>
+ %xzv = sext <vscale x 8 x i8> %x to <vscale x 8 x i16>
+ %yzv = sext <vscale x 8 x i8> %y to <vscale x 8 x i16>
%add = add nuw nsw <vscale x 8 x i16> %xzv, %yzv
%one = insertelement <vscale x 8 x i16> poison, i16 1, i32 0
%splat = shufflevector <vscale x 8 x i16> %one, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -45,19 +37,15 @@ define <vscale x 8 x i8> @vaaddu_i8_sext_vscale(ptr %x, ptr %y) {
ret <vscale x 8 x i8> %ret
}
-define <vscale x 8 x i8> @vaaddu_i8_zexti32_vscale(ptr %x, ptr %y) {
-; CHECK-LABEL: vaaddu_i8_zexti32_vscale:
+define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_zexti32(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i8_zexti32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vl1r.v v8, (a0)
-; CHECK-NEXT: vl1r.v v9, (a1)
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: vaaddu.vv v8, v8, v9
; CHECK-NEXT: ret
- %xv = load <vscale x 8 x i8>, ptr %x, align 2
- %yv = load <vscale x 8 x i8>, ptr %y, align 2
- %xzv = zext <vscale x 8 x i8> %xv to <vscale x 8 x i32>
- %yzv = zext <vscale x 8 x i8> %yv to <vscale x 8 x i32>
+ %xzv = zext <vscale x 8 x i8> %x to <vscale x 8 x i32>
+ %yzv = zext <vscale x 8 x i8> %y to <vscale x 8 x i32>
%add = add nuw nsw <vscale x 8 x i32> %xzv, %yzv
%one = insertelement <vscale x 8 x i32> poison, i32 1, i32 0
%splat = shufflevector <vscale x 8 x i32> %one, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -66,19 +54,15 @@ define <vscale x 8 x i8> @vaaddu_i8_zexti32_vscale(ptr %x, ptr %y) {
ret <vscale x 8 x i8> %ret
}
-define <vscale x 8 x i8> @vaaddu_i8_lshr2_vscale(ptr %x, ptr %y) {
-; CHECK-LABEL: vaaddu_i8_lshr2_vscale:
+define <vscale x 8 x i8> @vaaddu_vv_nxv8i8_lshr2(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i8_lshr2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vl1r.v v8, (a0)
-; CHECK-NEXT: vl1r.v v9, (a1)
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; CHECK-NEXT: vwaddu.vv v10, v8, v9
; CHECK-NEXT: vnsrl.wi v8, v10, 2
; CHECK-NEXT: ret
- %xv = load <vscale x 8 x i8>, ptr %x, align 2
- %yv = load <vscale x 8 x i8>, ptr %y, align 2
- %xzv = zext <vscale x 8 x i8> %xv to <vscale x 8 x i16>
- %yzv = zext <vscale x 8 x i8> %yv to <vscale x 8 x i16>
+ %xzv = zext <vscale x 8 x i8> %x to <vscale x 8 x i16>
+ %yzv = zext <vscale x 8 x i8> %y to <vscale x 8 x i16>
%add = add nuw nsw <vscale x 8 x i16> %xzv, %yzv
%one = insertelement <vscale x 8 x i16> poison, i16 2, i32 0
%splat = shufflevector <vscale x 8 x i16> %one, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -87,19 +71,15 @@ define <vscale x 8 x i8> @vaaddu_i8_lshr2_vscale(ptr %x, ptr %y) {
ret <vscale x 8 x i8> %ret
}
-define <vscale x 8 x i16> @vaaddu_i16_vscale(ptr %x, ptr %y) {
-; CHECK-LABEL: vaaddu_i16_vscale:
+define <vscale x 8 x i16> @vaaddu_vv_nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vl2re16.v v8, (a0)
-; CHECK-NEXT: vl2re16.v v10, (a1)
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: vaaddu.vv v8, v8, v10
; CHECK-NEXT: ret
- %xv = load <vscale x 8 x i16>, ptr %x, align 2
- %yv = load <vscale x 8 x i16>, ptr %y, align 2
- %xzv = zext <vscale x 8 x i16> %xv to <vscale x 8 x i32>
- %yzv = zext <vscale x 8 x i16> %yv to <vscale x 8 x i32>
+ %xzv = zext <vscale x 8 x i16> %x to <vscale x 8 x i32>
+ %yzv = zext <vscale x 8 x i16> %y to <vscale x 8 x i32>
%add = add nuw nsw <vscale x 8 x i32> %xzv, %yzv
%one = insertelement <vscale x 8 x i32> poison, i32 1, i32 0
%splat = shufflevector <vscale x 8 x i32> %one, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
@@ -109,19 +89,15 @@ define <vscale x 8 x i16> @vaaddu_i16_vscale(ptr %x, ptr %y) {
}
-define <vscale x 8 x i32> @vaaddu_i32_vscale(ptr %x, ptr %y) {
-; CHECK-LABEL: vaaddu_i32_vscale:
+define <vscale x 8 x i32> @vaaddu_vv_nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vl4r.v v8, (a0)
-; CHECK-NEXT: vl4r.v v12, (a1)
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: vaaddu.vv v8, v8, v12
; CHECK-NEXT: ret
- %xv = load <vscale x 8 x i32>, ptr %x, align 2
- %yv = load <vscale x 8 x i32>, ptr %y, align 2
- %xzv = zext <vscale x 8 x i32> %xv to <vscale x 8 x i64>
- %yzv = zext <vscale x 8 x i32> %yv to <vscale x 8 x i64>
+ %xzv = zext <vscale x 8 x i32> %x to <vscale x 8 x i64>
+ %yzv = zext <vscale x 8 x i32> %y to <vscale x 8 x i64>
%add = add nuw nsw <vscale x 8 x i64> %xzv, %yzv
%one = insertelement <vscale x 8 x i64> poison, i64 1, i64 0
%splat = shufflevector <vscale x 8 x i64> %one, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -130,19 +106,15 @@ define <vscale x 8 x i32> @vaaddu_i32_vscale(ptr %x, ptr %y) {
ret <vscale x 8 x i32> %ret
}
-define <vscale x 8 x i64> @vaaddu_i64_vscale(ptr %x, ptr %y, ptr %z) {
-; CHECK-LABEL: vaaddu_i64_vscale:
+define <vscale x 8 x i64> @vaaddu_vv_nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %y) {
+; CHECK-LABEL: vaaddu_vv_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vl8r.v v8, (a0)
-; CHECK-NEXT: vl8r.v v16, (a1)
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
; CHECK-NEXT: csrwi vxrm, 2
; CHECK-NEXT: vaaddu.vv v8, v8, v16
; CHECK-NEXT: ret
- %xv = load <vscale x 8 x i64>, ptr %x, align 2
- %yv = load <vscale x 8 x i64>, ptr %y, align 2
- %xzv = zext <vscale x 8 x i64> %xv to <vscale x 8 x i128>
- %yzv = zext <vscale x 8 x i64> %yv to <vscale x 8 x i128>
+ %xzv = zext <vscale x 8 x i64> %x to <vscale x 8 x i128>
+ %yzv = zext <vscale x 8 x i64> %y to <vscale x 8 x i128>
%add = add nuw nsw <vscale x 8 x i128> %xzv, %yzv
%one = insertelement <vscale x 8 x i128> poison, i128 1, i128 0
%splat = shufflevector <vscale x 8 x i128> %one, <vscale x 8 x i128> poison, <vscale x 8 x i32> zeroinitializer
More information about the llvm-commits
mailing list