[llvm] [AArch64] LowerAVG - fallback to default expansion (PR #95416)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 13 07:21:51 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
The TargetLowering::expandAVG implementations now match or are better than the AArch64 override.
---
Full diff: https://github.com/llvm/llvm-project/pull/95416.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+2-44)
- (modified) llvm/test/CodeGen/AArch64/sve-hadd.ll (+72-120)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c4f819f5fcdd2..74421db97ff23 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -14995,55 +14995,13 @@ AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
return SDValue();
}
-// When x and y are extended, lower:
-// avgfloor(x, y) -> (x + y) >> 1
-// avgceil(x, y) -> (x + y + 1) >> 1
-
-// Otherwise, lower to:
-// avgfloor(x, y) -> (x >> 1) + (y >> 1) + (x & y & 1)
-// avgceil(x, y) -> (x >> 1) + (y >> 1) + ((x || y) & 1)
SDValue AArch64TargetLowering::LowerAVG(SDValue Op, SelectionDAG &DAG,
unsigned NewOp) const {
if (Subtarget->hasSVE2())
return LowerToPredicatedOp(Op, DAG, NewOp);
- SDLoc dl(Op);
- SDValue OpA = Op->getOperand(0);
- SDValue OpB = Op->getOperand(1);
- EVT VT = Op.getValueType();
- bool IsCeil =
- (Op->getOpcode() == ISD::AVGCEILS || Op->getOpcode() == ISD::AVGCEILU);
- bool IsSigned =
- (Op->getOpcode() == ISD::AVGFLOORS || Op->getOpcode() == ISD::AVGCEILS);
- unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
-
- assert(VT.isScalableVector() && "Only expect to lower scalable vector op!");
-
- auto IsZeroExtended = [&DAG](SDValue &Node) {
- KnownBits Known = DAG.computeKnownBits(Node, 0);
- return Known.Zero.isSignBitSet();
- };
-
- auto IsSignExtended = [&DAG](SDValue &Node) {
- return (DAG.ComputeNumSignBits(Node, 0) > 1);
- };
-
- SDValue ConstantOne = DAG.getConstant(1, dl, VT);
- if ((!IsSigned && IsZeroExtended(OpA) && IsZeroExtended(OpB)) ||
- (IsSigned && IsSignExtended(OpA) && IsSignExtended(OpB))) {
- SDValue Add = DAG.getNode(ISD::ADD, dl, VT, OpA, OpB);
- if (IsCeil)
- Add = DAG.getNode(ISD::ADD, dl, VT, Add, ConstantOne);
- return DAG.getNode(ShiftOpc, dl, VT, Add, ConstantOne);
- }
-
- SDValue ShiftOpA = DAG.getNode(ShiftOpc, dl, VT, OpA, ConstantOne);
- SDValue ShiftOpB = DAG.getNode(ShiftOpc, dl, VT, OpB, ConstantOne);
-
- SDValue tmp = DAG.getNode(IsCeil ? ISD::OR : ISD::AND, dl, VT, OpA, OpB);
- tmp = DAG.getNode(ISD::AND, dl, VT, tmp, ConstantOne);
- SDValue Add = DAG.getNode(ISD::ADD, dl, VT, ShiftOpA, ShiftOpB);
- return DAG.getNode(ISD::ADD, dl, VT, Add, tmp);
+ // Default to expand.
+ return SDValue();
}
SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
diff --git a/llvm/test/CodeGen/AArch64/sve-hadd.ll b/llvm/test/CodeGen/AArch64/sve-hadd.ll
index 3fead88780e7d..6017e13ce0035 100644
--- a/llvm/test/CodeGen/AArch64/sve-hadd.ll
+++ b/llvm/test/CodeGen/AArch64/sve-hadd.ll
@@ -5,12 +5,10 @@
define <vscale x 2 x i64> @hadds_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
; SVE-LABEL: hadds_v2i64:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.d, z1.d, #1
-; SVE-NEXT: asr z3.d, z0.d, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.d, z3.d, z2.d
-; SVE-NEXT: and z0.d, z0.d, #0x1
-; SVE-NEXT: add z0.d, z1.d, z0.d
+; SVE-NEXT: asr z1.d, z2.d, #1
+; SVE-NEXT: add z0.d, z0.d, z1.d
; SVE-NEXT: ret
;
; SVE2-LABEL: hadds_v2i64:
@@ -30,12 +28,10 @@ entry:
define <vscale x 2 x i64> @hadds_v2i64_lsh(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
; SVE-LABEL: hadds_v2i64_lsh:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.d, z1.d, #1
-; SVE-NEXT: asr z3.d, z0.d, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.d, z3.d, z2.d
-; SVE-NEXT: and z0.d, z0.d, #0x1
-; SVE-NEXT: add z0.d, z1.d, z0.d
+; SVE-NEXT: asr z1.d, z2.d, #1
+; SVE-NEXT: add z0.d, z0.d, z1.d
; SVE-NEXT: ret
;
; SVE2-LABEL: hadds_v2i64_lsh:
@@ -55,12 +51,10 @@ entry:
define <vscale x 2 x i64> @haddu_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
; SVE-LABEL: haddu_v2i64:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: lsr z2.d, z1.d, #1
-; SVE-NEXT: lsr z3.d, z0.d, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.d, z3.d, z2.d
-; SVE-NEXT: and z0.d, z0.d, #0x1
-; SVE-NEXT: add z0.d, z1.d, z0.d
+; SVE-NEXT: lsr z1.d, z2.d, #1
+; SVE-NEXT: add z0.d, z0.d, z1.d
; SVE-NEXT: ret
;
; SVE2-LABEL: haddu_v2i64:
@@ -146,12 +140,10 @@ entry:
define <vscale x 4 x i32> @hadds_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
; SVE-LABEL: hadds_v4i32:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.s, z1.s, #1
-; SVE-NEXT: asr z3.s, z0.s, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.s, z3.s, z2.s
-; SVE-NEXT: and z0.s, z0.s, #0x1
-; SVE-NEXT: add z0.s, z1.s, z0.s
+; SVE-NEXT: asr z1.s, z2.s, #1
+; SVE-NEXT: add z0.s, z0.s, z1.s
; SVE-NEXT: ret
;
; SVE2-LABEL: hadds_v4i32:
@@ -171,12 +163,10 @@ entry:
define <vscale x 4 x i32> @hadds_v4i32_lsh(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
; SVE-LABEL: hadds_v4i32_lsh:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.s, z1.s, #1
-; SVE-NEXT: asr z3.s, z0.s, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.s, z3.s, z2.s
-; SVE-NEXT: and z0.s, z0.s, #0x1
-; SVE-NEXT: add z0.s, z1.s, z0.s
+; SVE-NEXT: asr z1.s, z2.s, #1
+; SVE-NEXT: add z0.s, z0.s, z1.s
; SVE-NEXT: ret
;
; SVE2-LABEL: hadds_v4i32_lsh:
@@ -196,12 +186,10 @@ entry:
define <vscale x 4 x i32> @haddu_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
; SVE-LABEL: haddu_v4i32:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: lsr z2.s, z1.s, #1
-; SVE-NEXT: lsr z3.s, z0.s, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.s, z3.s, z2.s
-; SVE-NEXT: and z0.s, z0.s, #0x1
-; SVE-NEXT: add z0.s, z1.s, z0.s
+; SVE-NEXT: lsr z1.s, z2.s, #1
+; SVE-NEXT: add z0.s, z0.s, z1.s
; SVE-NEXT: ret
;
; SVE2-LABEL: haddu_v4i32:
@@ -360,12 +348,10 @@ entry:
define <vscale x 8 x i16> @hadds_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
; SVE-LABEL: hadds_v8i16:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.h, z1.h, #1
-; SVE-NEXT: asr z3.h, z0.h, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.h, z3.h, z2.h
-; SVE-NEXT: and z0.h, z0.h, #0x1
-; SVE-NEXT: add z0.h, z1.h, z0.h
+; SVE-NEXT: asr z1.h, z2.h, #1
+; SVE-NEXT: add z0.h, z0.h, z1.h
; SVE-NEXT: ret
;
; SVE2-LABEL: hadds_v8i16:
@@ -385,12 +371,10 @@ entry:
define <vscale x 8 x i16> @hadds_v8i16_lsh(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
; SVE-LABEL: hadds_v8i16_lsh:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.h, z1.h, #1
-; SVE-NEXT: asr z3.h, z0.h, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.h, z3.h, z2.h
-; SVE-NEXT: and z0.h, z0.h, #0x1
-; SVE-NEXT: add z0.h, z1.h, z0.h
+; SVE-NEXT: asr z1.h, z2.h, #1
+; SVE-NEXT: add z0.h, z0.h, z1.h
; SVE-NEXT: ret
;
; SVE2-LABEL: hadds_v8i16_lsh:
@@ -410,12 +394,10 @@ entry:
define <vscale x 8 x i16> @haddu_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
; SVE-LABEL: haddu_v8i16:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: lsr z2.h, z1.h, #1
-; SVE-NEXT: lsr z3.h, z0.h, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.h, z3.h, z2.h
-; SVE-NEXT: and z0.h, z0.h, #0x1
-; SVE-NEXT: add z0.h, z1.h, z0.h
+; SVE-NEXT: lsr z1.h, z2.h, #1
+; SVE-NEXT: add z0.h, z0.h, z1.h
; SVE-NEXT: ret
;
; SVE2-LABEL: haddu_v8i16:
@@ -574,12 +556,10 @@ entry:
define <vscale x 16 x i8> @hadds_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
; SVE-LABEL: hadds_v16i8:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.b, z1.b, #1
-; SVE-NEXT: asr z3.b, z0.b, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.b, z3.b, z2.b
-; SVE-NEXT: and z0.b, z0.b, #0x1
-; SVE-NEXT: add z0.b, z1.b, z0.b
+; SVE-NEXT: asr z1.b, z2.b, #1
+; SVE-NEXT: add z0.b, z0.b, z1.b
; SVE-NEXT: ret
;
; SVE2-LABEL: hadds_v16i8:
@@ -599,12 +579,10 @@ entry:
define <vscale x 16 x i8> @hadds_v16i8_lsh(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
; SVE-LABEL: hadds_v16i8_lsh:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.b, z1.b, #1
-; SVE-NEXT: asr z3.b, z0.b, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.b, z3.b, z2.b
-; SVE-NEXT: and z0.b, z0.b, #0x1
-; SVE-NEXT: add z0.b, z1.b, z0.b
+; SVE-NEXT: asr z1.b, z2.b, #1
+; SVE-NEXT: add z0.b, z0.b, z1.b
; SVE-NEXT: ret
;
; SVE2-LABEL: hadds_v16i8_lsh:
@@ -624,12 +602,10 @@ entry:
define <vscale x 16 x i8> @haddu_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
; SVE-LABEL: haddu_v16i8:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: lsr z2.b, z1.b, #1
-; SVE-NEXT: lsr z3.b, z0.b, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: and z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.b, z3.b, z2.b
-; SVE-NEXT: and z0.b, z0.b, #0x1
-; SVE-NEXT: add z0.b, z1.b, z0.b
+; SVE-NEXT: lsr z1.b, z2.b, #1
+; SVE-NEXT: add z0.b, z0.b, z1.b
; SVE-NEXT: ret
;
; SVE2-LABEL: haddu_v16i8:
@@ -649,12 +625,10 @@ entry:
define <vscale x 2 x i64> @rhadds_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
; SVE-LABEL: rhadds_v2i64:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.d, z1.d, #1
-; SVE-NEXT: asr z3.d, z0.d, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.d, z3.d, z2.d
-; SVE-NEXT: and z0.d, z0.d, #0x1
-; SVE-NEXT: add z0.d, z1.d, z0.d
+; SVE-NEXT: asr z1.d, z2.d, #1
+; SVE-NEXT: sub z0.d, z0.d, z1.d
; SVE-NEXT: ret
;
; SVE2-LABEL: rhadds_v2i64:
@@ -675,12 +649,10 @@ entry:
define <vscale x 2 x i64> @rhadds_v2i64_lsh(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
; SVE-LABEL: rhadds_v2i64_lsh:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.d, z1.d, #1
-; SVE-NEXT: asr z3.d, z0.d, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.d, z3.d, z2.d
-; SVE-NEXT: and z0.d, z0.d, #0x1
-; SVE-NEXT: add z0.d, z1.d, z0.d
+; SVE-NEXT: asr z1.d, z2.d, #1
+; SVE-NEXT: sub z0.d, z0.d, z1.d
; SVE-NEXT: ret
;
; SVE2-LABEL: rhadds_v2i64_lsh:
@@ -701,12 +673,10 @@ entry:
define <vscale x 2 x i64> @rhaddu_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
; SVE-LABEL: rhaddu_v2i64:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: lsr z2.d, z1.d, #1
-; SVE-NEXT: lsr z3.d, z0.d, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.d, z3.d, z2.d
-; SVE-NEXT: and z0.d, z0.d, #0x1
-; SVE-NEXT: add z0.d, z1.d, z0.d
+; SVE-NEXT: lsr z1.d, z2.d, #1
+; SVE-NEXT: sub z0.d, z0.d, z1.d
; SVE-NEXT: ret
;
; SVE2-LABEL: rhaddu_v2i64:
@@ -805,12 +775,10 @@ entry:
define <vscale x 4 x i32> @rhadds_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
; SVE-LABEL: rhadds_v4i32:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.s, z1.s, #1
-; SVE-NEXT: asr z3.s, z0.s, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.s, z3.s, z2.s
-; SVE-NEXT: and z0.s, z0.s, #0x1
-; SVE-NEXT: add z0.s, z1.s, z0.s
+; SVE-NEXT: asr z1.s, z2.s, #1
+; SVE-NEXT: sub z0.s, z0.s, z1.s
; SVE-NEXT: ret
;
; SVE2-LABEL: rhadds_v4i32:
@@ -831,12 +799,10 @@ entry:
define <vscale x 4 x i32> @rhadds_v4i32_lsh(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
; SVE-LABEL: rhadds_v4i32_lsh:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.s, z1.s, #1
-; SVE-NEXT: asr z3.s, z0.s, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.s, z3.s, z2.s
-; SVE-NEXT: and z0.s, z0.s, #0x1
-; SVE-NEXT: add z0.s, z1.s, z0.s
+; SVE-NEXT: asr z1.s, z2.s, #1
+; SVE-NEXT: sub z0.s, z0.s, z1.s
; SVE-NEXT: ret
;
; SVE2-LABEL: rhadds_v4i32_lsh:
@@ -857,12 +823,10 @@ entry:
define <vscale x 4 x i32> @rhaddu_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
; SVE-LABEL: rhaddu_v4i32:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: lsr z2.s, z1.s, #1
-; SVE-NEXT: lsr z3.s, z0.s, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.s, z3.s, z2.s
-; SVE-NEXT: and z0.s, z0.s, #0x1
-; SVE-NEXT: add z0.s, z1.s, z0.s
+; SVE-NEXT: lsr z1.s, z2.s, #1
+; SVE-NEXT: sub z0.s, z0.s, z1.s
; SVE-NEXT: ret
;
; SVE2-LABEL: rhaddu_v4i32:
@@ -1040,12 +1004,10 @@ entry:
define <vscale x 8 x i16> @rhadds_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
; SVE-LABEL: rhadds_v8i16:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.h, z1.h, #1
-; SVE-NEXT: asr z3.h, z0.h, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.h, z3.h, z2.h
-; SVE-NEXT: and z0.h, z0.h, #0x1
-; SVE-NEXT: add z0.h, z1.h, z0.h
+; SVE-NEXT: asr z1.h, z2.h, #1
+; SVE-NEXT: sub z0.h, z0.h, z1.h
; SVE-NEXT: ret
;
; SVE2-LABEL: rhadds_v8i16:
@@ -1066,12 +1028,10 @@ entry:
define <vscale x 8 x i16> @rhadds_v8i16_lsh(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
; SVE-LABEL: rhadds_v8i16_lsh:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.h, z1.h, #1
-; SVE-NEXT: asr z3.h, z0.h, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.h, z3.h, z2.h
-; SVE-NEXT: and z0.h, z0.h, #0x1
-; SVE-NEXT: add z0.h, z1.h, z0.h
+; SVE-NEXT: asr z1.h, z2.h, #1
+; SVE-NEXT: sub z0.h, z0.h, z1.h
; SVE-NEXT: ret
;
; SVE2-LABEL: rhadds_v8i16_lsh:
@@ -1092,12 +1052,10 @@ entry:
define <vscale x 8 x i16> @rhaddu_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
; SVE-LABEL: rhaddu_v8i16:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: lsr z2.h, z1.h, #1
-; SVE-NEXT: lsr z3.h, z0.h, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.h, z3.h, z2.h
-; SVE-NEXT: and z0.h, z0.h, #0x1
-; SVE-NEXT: add z0.h, z1.h, z0.h
+; SVE-NEXT: lsr z1.h, z2.h, #1
+; SVE-NEXT: sub z0.h, z0.h, z1.h
; SVE-NEXT: ret
;
; SVE2-LABEL: rhaddu_v8i16:
@@ -1275,12 +1233,10 @@ entry:
define <vscale x 16 x i8> @rhadds_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
; SVE-LABEL: rhadds_v16i8:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.b, z1.b, #1
-; SVE-NEXT: asr z3.b, z0.b, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.b, z3.b, z2.b
-; SVE-NEXT: and z0.b, z0.b, #0x1
-; SVE-NEXT: add z0.b, z1.b, z0.b
+; SVE-NEXT: asr z1.b, z2.b, #1
+; SVE-NEXT: sub z0.b, z0.b, z1.b
; SVE-NEXT: ret
;
; SVE2-LABEL: rhadds_v16i8:
@@ -1301,12 +1257,10 @@ entry:
define <vscale x 16 x i8> @rhadds_v16i8_lsh(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
; SVE-LABEL: rhadds_v16i8_lsh:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: asr z2.b, z1.b, #1
-; SVE-NEXT: asr z3.b, z0.b, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.b, z3.b, z2.b
-; SVE-NEXT: and z0.b, z0.b, #0x1
-; SVE-NEXT: add z0.b, z1.b, z0.b
+; SVE-NEXT: asr z1.b, z2.b, #1
+; SVE-NEXT: sub z0.b, z0.b, z1.b
; SVE-NEXT: ret
;
; SVE2-LABEL: rhadds_v16i8_lsh:
@@ -1327,12 +1281,10 @@ entry:
define <vscale x 16 x i8> @rhaddu_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
; SVE-LABEL: rhaddu_v16i8:
; SVE: // %bb.0: // %entry
-; SVE-NEXT: lsr z2.b, z1.b, #1
-; SVE-NEXT: lsr z3.b, z0.b, #1
+; SVE-NEXT: eor z2.d, z0.d, z1.d
; SVE-NEXT: orr z0.d, z0.d, z1.d
-; SVE-NEXT: add z1.b, z3.b, z2.b
-; SVE-NEXT: and z0.b, z0.b, #0x1
-; SVE-NEXT: add z0.b, z1.b, z0.b
+; SVE-NEXT: lsr z1.b, z2.b, #1
+; SVE-NEXT: sub z0.b, z0.b, z1.b
; SVE-NEXT: ret
;
; SVE2-LABEL: rhaddu_v16i8:
``````````
</details>
https://github.com/llvm/llvm-project/pull/95416
More information about the llvm-commits
mailing list