[llvm] 4bccd25 - [AArch64] LowerAVG - fallback to default expansion (#95416)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 14 02:22:16 PDT 2024
Author: Simon Pilgrim
Date: 2024-06-14T10:22:13+01:00
New Revision: 4bccd25467ce591869dad41c8b7c550093c20f1b
URL: https://github.com/llvm/llvm-project/commit/4bccd25467ce591869dad41c8b7c550093c20f1b
DIFF: https://github.com/llvm/llvm-project/commit/4bccd25467ce591869dad41c8b7c550093c20f1b.diff
LOG: [AArch64] LowerAVG - fallback to default expansion (#95416)
The TargetLowering::expandAVG implementations now match or are better than the AArch64 override.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-hadd.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index af8b9d9576ff7..394b741f1c1d0 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -15007,55 +15007,13 @@ AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
return SDValue();
}
-// When x and y are extended, lower:
-// avgfloor(x, y) -> (x + y) >> 1
-// avgceil(x, y) -> (x + y + 1) >> 1
-
-// Otherwise, lower to:
-// avgfloor(x, y) -> (x >> 1) + (y >> 1) + (x & y & 1)
-// avgceil(x, y) -> (x >> 1) + (y >> 1) + ((x || y) & 1)
SDValue AArch64TargetLowering::LowerAVG(SDValue Op, SelectionDAG &DAG,
unsigned NewOp) const {
if (Subtarget->hasSVE2())
return LowerToPredicatedOp(Op, DAG, NewOp);
- SDLoc dl(Op);
- SDValue OpA = Op->getOperand(0);
- SDValue OpB = Op->getOperand(1);
- EVT VT = Op.getValueType();
- bool IsCeil =
- (Op->getOpcode() == ISD::AVGCEILS || Op->getOpcode() == ISD::AVGCEILU);
- bool IsSigned =
- (Op->getOpcode() == ISD::AVGFLOORS || Op->getOpcode() == ISD::AVGCEILS);
- unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
-
- assert(VT.isScalableVector() && "Only expect to lower scalable vector op!");
-
- auto IsZeroExtended = [&DAG](SDValue &Node) {
- KnownBits Known = DAG.computeKnownBits(Node, 0);
- return Known.Zero.isSignBitSet();
- };
-
- auto IsSignExtended = [&DAG](SDValue &Node) {
- return (DAG.ComputeNumSignBits(Node, 0) > 1);
- };
-
- SDValue ConstantOne = DAG.getConstant(1, dl, VT);
- if ((!IsSigned && IsZeroExtended(OpA) && IsZeroExtended(OpB)) ||
- (IsSigned && IsSignExtended(OpA) && IsSignExtended(OpB))) {
- SDValue Add = DAG.getNode(ISD::ADD, dl, VT, OpA, OpB);
- if (IsCeil)
- Add = DAG.getNode(ISD::ADD, dl, VT, Add, ConstantOne);
- return DAG.getNode(ShiftOpc, dl, VT, Add, ConstantOne);
- }
-
- SDValue ShiftOpA = DAG.getNode(ShiftOpc, dl, VT, OpA, ConstantOne);
- SDValue ShiftOpB = DAG.getNode(ShiftOpc, dl, VT, OpB, ConstantOne);
-
- SDValue tmp = DAG.getNode(IsCeil ? ISD::OR : ISD::AND, dl, VT, OpA, OpB);
- tmp = DAG.getNode(ISD::AND, dl, VT, tmp, ConstantOne);
- SDValue Add = DAG.getNode(ISD::ADD, dl, VT, ShiftOpA, ShiftOpB);
- return DAG.getNode(ISD::ADD, dl, VT, Add, tmp);
+ // Default to expand.
+ return SDValue();
}
SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
diff --git a/llvm/test/CodeGen/AArch64/sve-hadd.ll b/llvm/test/CodeGen/AArch64/sve-hadd.ll
index 3fead88780e7d..6017e13ce0035 100644
--- a/llvm/test/CodeGen/AArch64/sve-hadd.ll
+++ b/llvm/test/CodeGen/AArch64/sve-hadd.ll
@@ -5,12 +5,10 @@
define <vscale x 2 x i64> @hadds_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
; SVE-LABEL: hadds_v2i64:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.d, z1.d, #1
-; SVE-NEXT: asr z3.d, z0.d, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.d, z3.d, z2.d
-; SVE-NEXT: and z0.d, z0.d, #0x1
-; SVE-NEXT: add z0.d, z1.d, z0.d
+; SVE-NEXT: asr z1.d, z2.d, #1
+; SVE-NEXT: add z0.d, z0.d, z1.d
; SVE-NEXT: ret
;
; SVE2-LABEL: hadds_v2i64:
@@ -30,12 +28,10 @@ entry:
define <vscale x 2 x i64> @hadds_v2i64_lsh(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
; SVE-LABEL: hadds_v2i64_lsh:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.d, z1.d, #1
-; SVE-NEXT: asr z3.d, z0.d, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.d, z3.d, z2.d
-; SVE-NEXT: and z0.d, z0.d, #0x1
-; SVE-NEXT: add z0.d, z1.d, z0.d
+; SVE-NEXT: asr z1.d, z2.d, #1
+; SVE-NEXT: add z0.d, z0.d, z1.d
; SVE-NEXT: ret
;
; SVE2-LABEL: hadds_v2i64_lsh:
@@ -55,12 +51,10 @@ entry:
define <vscale x 2 x i64> @haddu_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
; SVE-LABEL: haddu_v2i64:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: lsr z2.d, z1.d, #1
-; SVE-NEXT: lsr z3.d, z0.d, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.d, z3.d, z2.d
-; SVE-NEXT: and z0.d, z0.d, #0x1
-; SVE-NEXT: add z0.d, z1.d, z0.d
+; SVE-NEXT: lsr z1.d, z2.d, #1
+; SVE-NEXT: add z0.d, z0.d, z1.d
; SVE-NEXT: ret
;
; SVE2-LABEL: haddu_v2i64:
@@ -146,12 +140,10 @@ entry:
define <vscale x 4 x i32> @hadds_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
; SVE-LABEL: hadds_v4i32:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.s, z1.s, #1
-; SVE-NEXT: asr z3.s, z0.s, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.s, z3.s, z2.s
-; SVE-NEXT: and z0.s, z0.s, #0x1
-; SVE-NEXT: add z0.s, z1.s, z0.s
+; SVE-NEXT: asr z1.s, z2.s, #1
+; SVE-NEXT: add z0.s, z0.s, z1.s
; SVE-NEXT: ret
;
; SVE2-LABEL: hadds_v4i32:
@@ -171,12 +163,10 @@ entry:
define <vscale x 4 x i32> @hadds_v4i32_lsh(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
; SVE-LABEL: hadds_v4i32_lsh:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.s, z1.s, #1
-; SVE-NEXT: asr z3.s, z0.s, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.s, z3.s, z2.s
-; SVE-NEXT: and z0.s, z0.s, #0x1
-; SVE-NEXT: add z0.s, z1.s, z0.s
+; SVE-NEXT: asr z1.s, z2.s, #1
+; SVE-NEXT: add z0.s, z0.s, z1.s
; SVE-NEXT: ret
;
; SVE2-LABEL: hadds_v4i32_lsh:
@@ -196,12 +186,10 @@ entry:
define <vscale x 4 x i32> @haddu_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
; SVE-LABEL: haddu_v4i32:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: lsr z2.s, z1.s, #1
-; SVE-NEXT: lsr z3.s, z0.s, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.s, z3.s, z2.s
-; SVE-NEXT: and z0.s, z0.s, #0x1
-; SVE-NEXT: add z0.s, z1.s, z0.s
+; SVE-NEXT: lsr z1.s, z2.s, #1
+; SVE-NEXT: add z0.s, z0.s, z1.s
; SVE-NEXT: ret
;
; SVE2-LABEL: haddu_v4i32:
@@ -360,12 +348,10 @@ entry:
define <vscale x 8 x i16> @hadds_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
; SVE-LABEL: hadds_v8i16:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.h, z1.h, #1
-; SVE-NEXT: asr z3.h, z0.h, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.h, z3.h, z2.h
-; SVE-NEXT: and z0.h, z0.h, #0x1
-; SVE-NEXT: add z0.h, z1.h, z0.h
+; SVE-NEXT: asr z1.h, z2.h, #1
+; SVE-NEXT: add z0.h, z0.h, z1.h
; SVE-NEXT: ret
;
; SVE2-LABEL: hadds_v8i16:
@@ -385,12 +371,10 @@ entry:
define <vscale x 8 x i16> @hadds_v8i16_lsh(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
; SVE-LABEL: hadds_v8i16_lsh:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.h, z1.h, #1
-; SVE-NEXT: asr z3.h, z0.h, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.h, z3.h, z2.h
-; SVE-NEXT: and z0.h, z0.h, #0x1
-; SVE-NEXT: add z0.h, z1.h, z0.h
+; SVE-NEXT: asr z1.h, z2.h, #1
+; SVE-NEXT: add z0.h, z0.h, z1.h
; SVE-NEXT: ret
;
; SVE2-LABEL: hadds_v8i16_lsh:
@@ -410,12 +394,10 @@ entry:
define <vscale x 8 x i16> @haddu_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
; SVE-LABEL: haddu_v8i16:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: lsr z2.h, z1.h, #1
-; SVE-NEXT: lsr z3.h, z0.h, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.h, z3.h, z2.h
-; SVE-NEXT: and z0.h, z0.h, #0x1
-; SVE-NEXT: add z0.h, z1.h, z0.h
+; SVE-NEXT: lsr z1.h, z2.h, #1
+; SVE-NEXT: add z0.h, z0.h, z1.h
; SVE-NEXT: ret
;
; SVE2-LABEL: haddu_v8i16:
@@ -574,12 +556,10 @@ entry:
define <vscale x 16 x i8> @hadds_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
; SVE-LABEL: hadds_v16i8:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.b, z1.b, #1
-; SVE-NEXT: asr z3.b, z0.b, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.b, z3.b, z2.b
-; SVE-NEXT: and z0.b, z0.b, #0x1
-; SVE-NEXT: add z0.b, z1.b, z0.b
+; SVE-NEXT: asr z1.b, z2.b, #1
+; SVE-NEXT: add z0.b, z0.b, z1.b
; SVE-NEXT: ret
;
; SVE2-LABEL: hadds_v16i8:
@@ -599,12 +579,10 @@ entry:
define <vscale x 16 x i8> @hadds_v16i8_lsh(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
; SVE-LABEL: hadds_v16i8_lsh:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.b, z1.b, #1
-; SVE-NEXT: asr z3.b, z0.b, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.b, z3.b, z2.b
-; SVE-NEXT: and z0.b, z0.b, #0x1
-; SVE-NEXT: add z0.b, z1.b, z0.b
+; SVE-NEXT: asr z1.b, z2.b, #1
+; SVE-NEXT: add z0.b, z0.b, z1.b
; SVE-NEXT: ret
;
; SVE2-LABEL: hadds_v16i8_lsh:
@@ -624,12 +602,10 @@ entry:
define <vscale x 16 x i8> @haddu_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
; SVE-LABEL: haddu_v16i8:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: lsr z2.b, z1.b, #1
-; SVE-NEXT: lsr z3.b, z0.b, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.b, z3.b, z2.b
-; SVE-NEXT: and z0.b, z0.b, #0x1
-; SVE-NEXT: add z0.b, z1.b, z0.b
+; SVE-NEXT: lsr z1.b, z2.b, #1
+; SVE-NEXT: add z0.b, z0.b, z1.b
; SVE-NEXT: ret
;
; SVE2-LABEL: haddu_v16i8:
@@ -649,12 +625,10 @@ entry:
define <vscale x 2 x i64> @rhadds_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
; SVE-LABEL: rhadds_v2i64:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.d, z1.d, #1
-; SVE-NEXT: asr z3.d, z0.d, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.d, z3.d, z2.d
-; SVE-NEXT: and z0.d, z0.d, #0x1
-; SVE-NEXT: add z0.d, z1.d, z0.d
+; SVE-NEXT: asr z1.d, z2.d, #1
+; SVE-NEXT: sub z0.d, z0.d, z1.d
; SVE-NEXT: ret
;
; SVE2-LABEL: rhadds_v2i64:
@@ -675,12 +649,10 @@ entry:
define <vscale x 2 x i64> @rhadds_v2i64_lsh(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
; SVE-LABEL: rhadds_v2i64_lsh:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.d, z1.d, #1
-; SVE-NEXT: asr z3.d, z0.d, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.d, z3.d, z2.d
-; SVE-NEXT: and z0.d, z0.d, #0x1
-; SVE-NEXT: add z0.d, z1.d, z0.d
+; SVE-NEXT: asr z1.d, z2.d, #1
+; SVE-NEXT: sub z0.d, z0.d, z1.d
; SVE-NEXT: ret
;
; SVE2-LABEL: rhadds_v2i64_lsh:
@@ -701,12 +673,10 @@ entry:
define <vscale x 2 x i64> @rhaddu_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
; SVE-LABEL: rhaddu_v2i64:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: lsr z2.d, z1.d, #1
-; SVE-NEXT: lsr z3.d, z0.d, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.d, z3.d, z2.d
-; SVE-NEXT: and z0.d, z0.d, #0x1
-; SVE-NEXT: add z0.d, z1.d, z0.d
+; SVE-NEXT: lsr z1.d, z2.d, #1
+; SVE-NEXT: sub z0.d, z0.d, z1.d
; SVE-NEXT: ret
;
; SVE2-LABEL: rhaddu_v2i64:
@@ -805,12 +775,10 @@ entry:
define <vscale x 4 x i32> @rhadds_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
; SVE-LABEL: rhadds_v4i32:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.s, z1.s, #1
-; SVE-NEXT: asr z3.s, z0.s, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.s, z3.s, z2.s
-; SVE-NEXT: and z0.s, z0.s, #0x1
-; SVE-NEXT: add z0.s, z1.s, z0.s
+; SVE-NEXT: asr z1.s, z2.s, #1
+; SVE-NEXT: sub z0.s, z0.s, z1.s
; SVE-NEXT: ret
;
; SVE2-LABEL: rhadds_v4i32:
@@ -831,12 +799,10 @@ entry:
define <vscale x 4 x i32> @rhadds_v4i32_lsh(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
; SVE-LABEL: rhadds_v4i32_lsh:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.s, z1.s, #1
-; SVE-NEXT: asr z3.s, z0.s, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.s, z3.s, z2.s
-; SVE-NEXT: and z0.s, z0.s, #0x1
-; SVE-NEXT: add z0.s, z1.s, z0.s
+; SVE-NEXT: asr z1.s, z2.s, #1
+; SVE-NEXT: sub z0.s, z0.s, z1.s
; SVE-NEXT: ret
;
; SVE2-LABEL: rhadds_v4i32_lsh:
@@ -857,12 +823,10 @@ entry:
define <vscale x 4 x i32> @rhaddu_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
; SVE-LABEL: rhaddu_v4i32:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: lsr z2.s, z1.s, #1
-; SVE-NEXT: lsr z3.s, z0.s, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.s, z3.s, z2.s
-; SVE-NEXT: and z0.s, z0.s, #0x1
-; SVE-NEXT: add z0.s, z1.s, z0.s
+; SVE-NEXT: lsr z1.s, z2.s, #1
+; SVE-NEXT: sub z0.s, z0.s, z1.s
; SVE-NEXT: ret
;
; SVE2-LABEL: rhaddu_v4i32:
@@ -1040,12 +1004,10 @@ entry:
define <vscale x 8 x i16> @rhadds_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
; SVE-LABEL: rhadds_v8i16:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.h, z1.h, #1
-; SVE-NEXT: asr z3.h, z0.h, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.h, z3.h, z2.h
-; SVE-NEXT: and z0.h, z0.h, #0x1
-; SVE-NEXT: add z0.h, z1.h, z0.h
+; SVE-NEXT: asr z1.h, z2.h, #1
+; SVE-NEXT: sub z0.h, z0.h, z1.h
; SVE-NEXT: ret
;
; SVE2-LABEL: rhadds_v8i16:
@@ -1066,12 +1028,10 @@ entry:
define <vscale x 8 x i16> @rhadds_v8i16_lsh(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
; SVE-LABEL: rhadds_v8i16_lsh:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.h, z1.h, #1
-; SVE-NEXT: asr z3.h, z0.h, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.h, z3.h, z2.h
-; SVE-NEXT: and z0.h, z0.h, #0x1
-; SVE-NEXT: add z0.h, z1.h, z0.h
+; SVE-NEXT: asr z1.h, z2.h, #1
+; SVE-NEXT: sub z0.h, z0.h, z1.h
; SVE-NEXT: ret
;
; SVE2-LABEL: rhadds_v8i16_lsh:
@@ -1092,12 +1052,10 @@ entry:
define <vscale x 8 x i16> @rhaddu_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
; SVE-LABEL: rhaddu_v8i16:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: lsr z2.h, z1.h, #1
-; SVE-NEXT: lsr z3.h, z0.h, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.h, z3.h, z2.h
-; SVE-NEXT: and z0.h, z0.h, #0x1
-; SVE-NEXT: add z0.h, z1.h, z0.h
+; SVE-NEXT: lsr z1.h, z2.h, #1
+; SVE-NEXT: sub z0.h, z0.h, z1.h
; SVE-NEXT: ret
;
; SVE2-LABEL: rhaddu_v8i16:
@@ -1275,12 +1233,10 @@ entry:
define <vscale x 16 x i8> @rhadds_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
; SVE-LABEL: rhadds_v16i8:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.b, z1.b, #1
-; SVE-NEXT: asr z3.b, z0.b, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.b, z3.b, z2.b
-; SVE-NEXT: and z0.b, z0.b, #0x1
-; SVE-NEXT: add z0.b, z1.b, z0.b
+; SVE-NEXT: asr z1.b, z2.b, #1
+; SVE-NEXT: sub z0.b, z0.b, z1.b
; SVE-NEXT: ret
;
; SVE2-LABEL: rhadds_v16i8:
@@ -1301,12 +1257,10 @@ entry:
define <vscale x 16 x i8> @rhadds_v16i8_lsh(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
; SVE-LABEL: rhadds_v16i8_lsh:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.b, z1.b, #1
-; SVE-NEXT: asr z3.b, z0.b, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.b, z3.b, z2.b
-; SVE-NEXT: and z0.b, z0.b, #0x1
-; SVE-NEXT: add z0.b, z1.b, z0.b
+; SVE-NEXT: asr z1.b, z2.b, #1
+; SVE-NEXT: sub z0.b, z0.b, z1.b
; SVE-NEXT: ret
;
; SVE2-LABEL: rhadds_v16i8_lsh:
@@ -1327,12 +1281,10 @@ entry:
define <vscale x 16 x i8> @rhaddu_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
; SVE-LABEL: rhaddu_v16i8:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: lsr z2.b, z1.b, #1
-; SVE-NEXT: lsr z3.b, z0.b, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.b, z3.b, z2.b
-; SVE-NEXT: and z0.b, z0.b, #0x1
-; SVE-NEXT: add z0.b, z1.b, z0.b
+; SVE-NEXT: lsr z1.b, z2.b, #1
+; SVE-NEXT: sub z0.b, z0.b, z1.b
; SVE-NEXT: ret
;
; SVE2-LABEL: rhaddu_v16i8:
More information about the llvm-commits
mailing list