[llvm] ee27e5d - [TargetLowering][ARM][AArch64] Remove usage of NoSignedWrap/NoUnsignedWrap from AVGFLOOR/CEIL transform.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 1 14:20:53 PDT 2023


Author: Craig Topper
Date: 2023-06-01T14:18:08-07:00
New Revision: ee27e5df9e67bffbc629ea8638524ee7725d12ab

URL: https://github.com/llvm/llvm-project/commit/ee27e5df9e67bffbc629ea8638524ee7725d12ab
DIFF: https://github.com/llvm/llvm-project/commit/ee27e5df9e67bffbc629ea8638524ee7725d12ab.diff

LOG: [TargetLowering][ARM][AArch64] Remove usage of NoSignedWrap/NoUnsignedWrap from AVGFLOOR/CEIL transform.

Use computeOverflowForUnsignedAdd and computeOverflowForSignedAdd
instead. Unfortunately, this recomputes some known bits and sign bits
we may have already computed, but was the easiest fix without a lot
of restructuring.

This recovers the regressions from D151472.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D151858

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/test/CodeGen/AArch64/arm64-vhadd.ll
    llvm/test/CodeGen/AArch64/sve-hadd.ll
    llvm/test/CodeGen/Thumb2/mve-vhadd.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 58f6e2a36b212..4c443600f9413 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1033,13 +1033,17 @@ static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
   if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT)) {
     // If we could not transform, and (both) adds are nuw/nsw, we can use the
     // larger type size to do the transform.
-    if (((!IsSigned && Add->getFlags().hasNoUnsignedWrap() &&
-          (!Add2 || Add2->getFlags().hasNoUnsignedWrap())) ||
-         (IsSigned && Add->getFlags().hasNoSignedWrap() &&
-          (!Add2 || Add2->getFlags().hasNoSignedWrap()))) &&
-        TLI.isOperationLegalOrCustom(AVGOpc, VT)) {
+    if (!TLI.isOperationLegalOrCustom(AVGOpc, VT))
+      return SDValue();
+
+    if (DAG.computeOverflowForAdd(IsSigned, Add.getOperand(0),
+                                  Add.getOperand(1)) ==
+            SelectionDAG::OFK_Never &&
+        (!Add2 || DAG.computeOverflowForAdd(IsSigned, Add2.getOperand(0),
+                                            Add2.getOperand(1)) ==
+                      SelectionDAG::OFK_Never))
       NVT = VT;
-    } else
+    else
       return SDValue();
   }
 

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
index 0626d9e1147e6..c6848d44c3d6a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
@@ -873,8 +873,8 @@ define <2 x i16> @hadd8x2_sext_asr(<2 x i8> %src1, <2 x i8> %src2) {
 ; CHECK-NEXT:    shl.2s v0, v0, #24
 ; CHECK-NEXT:    shl.2s v1, v1, #24
 ; CHECK-NEXT:    sshr.2s v0, v0, #24
-; CHECK-NEXT:    ssra.2s v0, v1, #24
-; CHECK-NEXT:    sshr.2s v0, v0, #1
+; CHECK-NEXT:    sshr.2s v1, v1, #24
+; CHECK-NEXT:    shadd.2s v0, v0, v1
 ; CHECK-NEXT:    ret
   %zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
   %zextsrc2 = sext <2 x i8> %src2 to <2 x i16>
@@ -889,8 +889,7 @@ define <2 x i16> @hadd8x2_zext_asr(<2 x i8> %src1, <2 x i8> %src2) {
 ; CHECK-NEXT:    movi d2, #0x0000ff000000ff
 ; CHECK-NEXT:    and.8b v0, v0, v2
 ; CHECK-NEXT:    and.8b v1, v1, v2
-; CHECK-NEXT:    add.2s v0, v0, v1
-; CHECK-NEXT:    ushr.2s v0, v0, #1
+; CHECK-NEXT:    uhadd.2s v0, v0, v1
 ; CHECK-NEXT:    ret
   %zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
   %zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
@@ -923,8 +922,7 @@ define <2 x i16> @hadd8x2_zext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
 ; CHECK-NEXT:    movi d2, #0x0000ff000000ff
 ; CHECK-NEXT:    and.8b v0, v0, v2
 ; CHECK-NEXT:    and.8b v1, v1, v2
-; CHECK-NEXT:    add.2s v0, v0, v1
-; CHECK-NEXT:    ushr.2s v0, v0, #1
+; CHECK-NEXT:    uhadd.2s v0, v0, v1
 ; CHECK-NEXT:    ret
   %zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
   %zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
@@ -1006,9 +1004,7 @@ define <2 x i16> @rhadd8x2_sext_asr(<2 x i8> %src1, <2 x i8> %src2) {
 ; CHECK-NEXT:    shl.2s v1, v1, #24
 ; CHECK-NEXT:    sshr.2s v0, v0, #24
 ; CHECK-NEXT:    sshr.2s v1, v1, #24
-; CHECK-NEXT:    mvn.8b v0, v0
-; CHECK-NEXT:    sub.2s v0, v1, v0
-; CHECK-NEXT:    sshr.2s v0, v0, #1
+; CHECK-NEXT:    srhadd.2s v0, v0, v1
 ; CHECK-NEXT:    ret
   %zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
   %zextsrc2 = sext <2 x i8> %src2 to <2 x i16>
@@ -1024,9 +1020,7 @@ define <2 x i16> @rhadd8x2_zext_asr(<2 x i8> %src1, <2 x i8> %src2) {
 ; CHECK-NEXT:    movi d2, #0x0000ff000000ff
 ; CHECK-NEXT:    and.8b v0, v0, v2
 ; CHECK-NEXT:    and.8b v1, v1, v2
-; CHECK-NEXT:    mvn.8b v0, v0
-; CHECK-NEXT:    sub.2s v0, v1, v0
-; CHECK-NEXT:    ushr.2s v0, v0, #1
+; CHECK-NEXT:    urhadd.2s v0, v0, v1
 ; CHECK-NEXT:    ret
   %zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
   %zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
@@ -1063,9 +1057,7 @@ define <2 x i16> @rhadd8x2_zext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
 ; CHECK-NEXT:    movi d2, #0x0000ff000000ff
 ; CHECK-NEXT:    and.8b v0, v0, v2
 ; CHECK-NEXT:    and.8b v1, v1, v2
-; CHECK-NEXT:    mvn.8b v0, v0
-; CHECK-NEXT:    sub.2s v0, v1, v0
-; CHECK-NEXT:    ushr.2s v0, v0, #1
+; CHECK-NEXT:    urhadd.2s v0, v0, v1
 ; CHECK-NEXT:    ret
   %zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
   %zextsrc2 = zext <2 x i8> %src2 to <2 x i16>

diff  --git a/llvm/test/CodeGen/AArch64/sve-hadd.ll b/llvm/test/CodeGen/AArch64/sve-hadd.ll
index 28242547b5720..7936094af1c0a 100644
--- a/llvm/test/CodeGen/AArch64/sve-hadd.ll
+++ b/llvm/test/CodeGen/AArch64/sve-hadd.ll
@@ -219,14 +219,22 @@ entry:
 }
 
 define <vscale x 2 x i16> @hadds_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
-; CHECK-LABEL: hadds_v2i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    sxth z0.d, p0/m, z0.d
-; CHECK-NEXT:    sxth z1.d, p0/m, z1.d
-; CHECK-NEXT:    add z0.d, z0.d, z1.d
-; CHECK-NEXT:    asr z0.d, z0.d, #1
-; CHECK-NEXT:    ret
+; SVE-LABEL: hadds_v2i16:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    ptrue p0.d
+; SVE-NEXT:    sxth z0.d, p0/m, z0.d
+; SVE-NEXT:    sxth z1.d, p0/m, z1.d
+; SVE-NEXT:    add z0.d, z0.d, z1.d
+; SVE-NEXT:    asr z0.d, z0.d, #1
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: hadds_v2i16:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.d
+; SVE2-NEXT:    sxth z0.d, p0/m, z0.d
+; SVE2-NEXT:    sxth z1.d, p0/m, z1.d
+; SVE2-NEXT:    shadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT:    ret
 entry:
   %s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
   %s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
@@ -256,13 +264,21 @@ entry:
 }
 
 define <vscale x 2 x i16> @haddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
-; CHECK-LABEL: haddu_v2i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    and z0.d, z0.d, #0xffff
-; CHECK-NEXT:    and z1.d, z1.d, #0xffff
-; CHECK-NEXT:    add z0.d, z0.d, z1.d
-; CHECK-NEXT:    lsr z0.d, z0.d, #1
-; CHECK-NEXT:    ret
+; SVE-LABEL: haddu_v2i16:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    and z0.d, z0.d, #0xffff
+; SVE-NEXT:    and z1.d, z1.d, #0xffff
+; SVE-NEXT:    add z0.d, z0.d, z1.d
+; SVE-NEXT:    lsr z0.d, z0.d, #1
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: haddu_v2i16:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.d
+; SVE2-NEXT:    and z0.d, z0.d, #0xffff
+; SVE2-NEXT:    and z1.d, z1.d, #0xffff
+; SVE2-NEXT:    uhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT:    ret
 entry:
   %s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
   %s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
@@ -417,14 +433,22 @@ entry:
 }
 
 define <vscale x 4 x i8> @hadds_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
-; CHECK-LABEL: hadds_v4i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    sxtb z0.s, p0/m, z0.s
-; CHECK-NEXT:    sxtb z1.s, p0/m, z1.s
-; CHECK-NEXT:    add z0.s, z0.s, z1.s
-; CHECK-NEXT:    asr z0.s, z0.s, #1
-; CHECK-NEXT:    ret
+; SVE-LABEL: hadds_v4i8:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    ptrue p0.s
+; SVE-NEXT:    sxtb z0.s, p0/m, z0.s
+; SVE-NEXT:    sxtb z1.s, p0/m, z1.s
+; SVE-NEXT:    add z0.s, z0.s, z1.s
+; SVE-NEXT:    asr z0.s, z0.s, #1
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: hadds_v4i8:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.s
+; SVE2-NEXT:    sxtb z0.s, p0/m, z0.s
+; SVE2-NEXT:    sxtb z1.s, p0/m, z1.s
+; SVE2-NEXT:    shadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT:    ret
 entry:
   %s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
   %s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
@@ -454,13 +478,21 @@ entry:
 }
 
 define <vscale x 4 x i8> @haddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
-; CHECK-LABEL: haddu_v4i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    and z0.s, z0.s, #0xff
-; CHECK-NEXT:    and z1.s, z1.s, #0xff
-; CHECK-NEXT:    add z0.s, z0.s, z1.s
-; CHECK-NEXT:    lsr z0.s, z0.s, #1
-; CHECK-NEXT:    ret
+; SVE-LABEL: haddu_v4i8:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    and z0.s, z0.s, #0xff
+; SVE-NEXT:    and z1.s, z1.s, #0xff
+; SVE-NEXT:    add z0.s, z0.s, z1.s
+; SVE-NEXT:    lsr z0.s, z0.s, #1
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: haddu_v4i8:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.s
+; SVE2-NEXT:    and z0.s, z0.s, #0xff
+; SVE2-NEXT:    and z1.s, z1.s, #0xff
+; SVE2-NEXT:    uhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT:    ret
 entry:
   %s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
   %s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
@@ -693,16 +725,24 @@ entry:
 }
 
 define <vscale x 2 x i32> @rhadds_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
-; CHECK-LABEL: rhadds_v2i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z2.d, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    sxtw z0.d, p0/m, z0.d
-; CHECK-NEXT:    sxtw z1.d, p0/m, z1.d
-; CHECK-NEXT:    eor z0.d, z0.d, z2.d
-; CHECK-NEXT:    sub z0.d, z1.d, z0.d
-; CHECK-NEXT:    asr z0.d, z0.d, #1
-; CHECK-NEXT:    ret
+; SVE-LABEL: rhadds_v2i32:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    ptrue p0.d
+; SVE-NEXT:    mov z2.d, #-1 // =0xffffffffffffffff
+; SVE-NEXT:    sxtw z0.d, p0/m, z0.d
+; SVE-NEXT:    sxtw z1.d, p0/m, z1.d
+; SVE-NEXT:    eor z0.d, z0.d, z2.d
+; SVE-NEXT:    sub z0.d, z1.d, z0.d
+; SVE-NEXT:    asr z0.d, z0.d, #1
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: rhadds_v2i32:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.d
+; SVE2-NEXT:    sxtw z0.d, p0/m, z0.d
+; SVE2-NEXT:    sxtw z1.d, p0/m, z1.d
+; SVE2-NEXT:    srhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT:    ret
 entry:
   %s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
   %s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
@@ -884,15 +924,23 @@ entry:
 }
 
 define <vscale x 2 x i16> @rhaddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
-; CHECK-LABEL: rhaddu_v2i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov z2.d, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    and z0.d, z0.d, #0xffff
-; CHECK-NEXT:    and z1.d, z1.d, #0xffff
-; CHECK-NEXT:    eor z0.d, z0.d, z2.d
-; CHECK-NEXT:    sub z0.d, z1.d, z0.d
-; CHECK-NEXT:    lsr z0.d, z0.d, #1
-; CHECK-NEXT:    ret
+; SVE-LABEL: rhaddu_v2i16:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    mov z2.d, #-1 // =0xffffffffffffffff
+; SVE-NEXT:    and z0.d, z0.d, #0xffff
+; SVE-NEXT:    and z1.d, z1.d, #0xffff
+; SVE-NEXT:    eor z0.d, z0.d, z2.d
+; SVE-NEXT:    sub z0.d, z1.d, z0.d
+; SVE-NEXT:    lsr z0.d, z0.d, #1
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: rhaddu_v2i16:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.d
+; SVE2-NEXT:    and z0.d, z0.d, #0xffff
+; SVE2-NEXT:    and z1.d, z1.d, #0xffff
+; SVE2-NEXT:    urhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT:    ret
 entry:
   %s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
   %s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
@@ -904,16 +952,24 @@ entry:
 }
 
 define <vscale x 4 x i16> @rhadds_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
-; CHECK-LABEL: rhadds_v4i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z2.s, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
-; CHECK-NEXT:    sxth z1.s, p0/m, z1.s
-; CHECK-NEXT:    eor z0.d, z0.d, z2.d
-; CHECK-NEXT:    sub z0.s, z1.s, z0.s
-; CHECK-NEXT:    asr z0.s, z0.s, #1
-; CHECK-NEXT:    ret
+; SVE-LABEL: rhadds_v4i16:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    ptrue p0.s
+; SVE-NEXT:    mov z2.s, #-1 // =0xffffffffffffffff
+; SVE-NEXT:    sxth z0.s, p0/m, z0.s
+; SVE-NEXT:    sxth z1.s, p0/m, z1.s
+; SVE-NEXT:    eor z0.d, z0.d, z2.d
+; SVE-NEXT:    sub z0.s, z1.s, z0.s
+; SVE-NEXT:    asr z0.s, z0.s, #1
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: rhadds_v4i16:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.s
+; SVE2-NEXT:    sxth z0.s, p0/m, z0.s
+; SVE2-NEXT:    sxth z1.s, p0/m, z1.s
+; SVE2-NEXT:    srhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT:    ret
 entry:
   %s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
   %s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
@@ -1095,15 +1151,23 @@ entry:
 }
 
 define <vscale x 4 x i8> @rhaddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
-; CHECK-LABEL: rhaddu_v4i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov z2.s, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    and z0.s, z0.s, #0xff
-; CHECK-NEXT:    and z1.s, z1.s, #0xff
-; CHECK-NEXT:    eor z0.d, z0.d, z2.d
-; CHECK-NEXT:    sub z0.s, z1.s, z0.s
-; CHECK-NEXT:    lsr z0.s, z0.s, #1
-; CHECK-NEXT:    ret
+; SVE-LABEL: rhaddu_v4i8:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    mov z2.s, #-1 // =0xffffffffffffffff
+; SVE-NEXT:    and z0.s, z0.s, #0xff
+; SVE-NEXT:    and z1.s, z1.s, #0xff
+; SVE-NEXT:    eor z0.d, z0.d, z2.d
+; SVE-NEXT:    sub z0.s, z1.s, z0.s
+; SVE-NEXT:    lsr z0.s, z0.s, #1
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: rhaddu_v4i8:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.s
+; SVE2-NEXT:    and z0.s, z0.s, #0xff
+; SVE2-NEXT:    and z1.s, z1.s, #0xff
+; SVE2-NEXT:    urhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT:    ret
 entry:
   %s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
   %s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
@@ -1115,16 +1179,24 @@ entry:
 }
 
 define <vscale x 8 x i8> @rhadds_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
-; CHECK-LABEL: rhadds_v8i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z2.h, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
-; CHECK-NEXT:    sxtb z1.h, p0/m, z1.h
-; CHECK-NEXT:    eor z0.d, z0.d, z2.d
-; CHECK-NEXT:    sub z0.h, z1.h, z0.h
-; CHECK-NEXT:    asr z0.h, z0.h, #1
-; CHECK-NEXT:    ret
+; SVE-LABEL: rhadds_v8i8:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    ptrue p0.h
+; SVE-NEXT:    mov z2.h, #-1 // =0xffffffffffffffff
+; SVE-NEXT:    sxtb z0.h, p0/m, z0.h
+; SVE-NEXT:    sxtb z1.h, p0/m, z1.h
+; SVE-NEXT:    eor z0.d, z0.d, z2.d
+; SVE-NEXT:    sub z0.h, z1.h, z0.h
+; SVE-NEXT:    asr z0.h, z0.h, #1
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: rhadds_v8i8:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.h
+; SVE2-NEXT:    sxtb z0.h, p0/m, z0.h
+; SVE2-NEXT:    sxtb z1.h, p0/m, z1.h
+; SVE2-NEXT:    srhadd z0.h, p0/m, z0.h, z1.h
+; SVE2-NEXT:    ret
 entry:
   %s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
   %s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vhadd.ll b/llvm/test/CodeGen/Thumb2/mve-vhadd.ll
index 9b64dde23627f..82a286627a9e5 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vhadd.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vhadd.ll
@@ -116,8 +116,7 @@ define arm_aapcs_vfpcc <4 x i8> @vhaddu_v4i8(<4 x i8> %s0, <4 x i8> %s1) {
 ; CHECK-NEXT:    vmov.i32 q2, #0xff
 ; CHECK-NEXT:    vand q1, q1, q2
 ; CHECK-NEXT:    vand q0, q0, q2
-; CHECK-NEXT:    vadd.i32 q0, q0, q1
-; CHECK-NEXT:    vshr.u32 q0, q0, #1
+; CHECK-NEXT:    vhadd.u32 q0, q0, q1
 ; CHECK-NEXT:    bx lr
 entry:
   %s0s = zext <4 x i8> %s0 to <4 x i16>
@@ -313,12 +312,9 @@ define arm_aapcs_vfpcc <4 x i8> @vrhaddu_v4i8(<4 x i8> %s0, <4 x i8> %s1) {
 ; CHECK-LABEL: vrhaddu_v4i8:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmov.i32 q2, #0xff
-; CHECK-NEXT:    movs r0, #1
 ; CHECK-NEXT:    vand q1, q1, q2
 ; CHECK-NEXT:    vand q0, q0, q2
-; CHECK-NEXT:    vadd.i32 q0, q0, q1
-; CHECK-NEXT:    vadd.i32 q0, q0, r0
-; CHECK-NEXT:    vshr.u32 q0, q0, #1
+; CHECK-NEXT:    vrhadd.u32 q0, q0, q1
 ; CHECK-NEXT:    bx lr
 entry:
   %s0s = zext <4 x i8> %s0 to <4 x i16>


        


More information about the llvm-commits mailing list