[llvm] ee27e5d - [TargetLowering][ARM][AArch64] Remove usage of NoSignedWrap/NoUnsignedWrap from AVGFLOOR/CEIL transform.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 1 14:20:53 PDT 2023
Author: Craig Topper
Date: 2023-06-01T14:18:08-07:00
New Revision: ee27e5df9e67bffbc629ea8638524ee7725d12ab
URL: https://github.com/llvm/llvm-project/commit/ee27e5df9e67bffbc629ea8638524ee7725d12ab
DIFF: https://github.com/llvm/llvm-project/commit/ee27e5df9e67bffbc629ea8638524ee7725d12ab.diff
LOG: [TargetLowering][ARM][AArch64] Remove usage of NoSignedWrap/NoUnsignedWrap from AVGFLOOR/CEIL transform.
Use computeOverflowForUnsignedAdd and computeOverflowForSignedAdd
instead. Unfortunately, this recomputes some known bits and sign bits
we may have already computed, but was the easiest fix without a lot
of restructuring.
This recovers the regressions from D151472.
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D151858
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/AArch64/arm64-vhadd.ll
llvm/test/CodeGen/AArch64/sve-hadd.ll
llvm/test/CodeGen/Thumb2/mve-vhadd.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 58f6e2a36b212..4c443600f9413 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1033,13 +1033,17 @@ static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT)) {
// If we could not transform, and (both) adds are nuw/nsw, we can use the
// larger type size to do the transform.
- if (((!IsSigned && Add->getFlags().hasNoUnsignedWrap() &&
- (!Add2 || Add2->getFlags().hasNoUnsignedWrap())) ||
- (IsSigned && Add->getFlags().hasNoSignedWrap() &&
- (!Add2 || Add2->getFlags().hasNoSignedWrap()))) &&
- TLI.isOperationLegalOrCustom(AVGOpc, VT)) {
+ if (!TLI.isOperationLegalOrCustom(AVGOpc, VT))
+ return SDValue();
+
+ if (DAG.computeOverflowForAdd(IsSigned, Add.getOperand(0),
+ Add.getOperand(1)) ==
+ SelectionDAG::OFK_Never &&
+ (!Add2 || DAG.computeOverflowForAdd(IsSigned, Add2.getOperand(0),
+ Add2.getOperand(1)) ==
+ SelectionDAG::OFK_Never))
NVT = VT;
- } else
+ else
return SDValue();
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
index 0626d9e1147e6..c6848d44c3d6a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
@@ -873,8 +873,8 @@ define <2 x i16> @hadd8x2_sext_asr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-NEXT: shl.2s v0, v0, #24
; CHECK-NEXT: shl.2s v1, v1, #24
; CHECK-NEXT: sshr.2s v0, v0, #24
-; CHECK-NEXT: ssra.2s v0, v1, #24
-; CHECK-NEXT: sshr.2s v0, v0, #1
+; CHECK-NEXT: sshr.2s v1, v1, #24
+; CHECK-NEXT: shadd.2s v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = sext <2 x i8> %src2 to <2 x i16>
@@ -889,8 +889,7 @@ define <2 x i16> @hadd8x2_zext_asr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-NEXT: movi d2, #0x0000ff000000ff
; CHECK-NEXT: and.8b v0, v0, v2
; CHECK-NEXT: and.8b v1, v1, v2
-; CHECK-NEXT: add.2s v0, v0, v1
-; CHECK-NEXT: ushr.2s v0, v0, #1
+; CHECK-NEXT: uhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
@@ -923,8 +922,7 @@ define <2 x i16> @hadd8x2_zext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-NEXT: movi d2, #0x0000ff000000ff
; CHECK-NEXT: and.8b v0, v0, v2
; CHECK-NEXT: and.8b v1, v1, v2
-; CHECK-NEXT: add.2s v0, v0, v1
-; CHECK-NEXT: ushr.2s v0, v0, #1
+; CHECK-NEXT: uhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
@@ -1006,9 +1004,7 @@ define <2 x i16> @rhadd8x2_sext_asr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-NEXT: shl.2s v1, v1, #24
; CHECK-NEXT: sshr.2s v0, v0, #24
; CHECK-NEXT: sshr.2s v1, v1, #24
-; CHECK-NEXT: mvn.8b v0, v0
-; CHECK-NEXT: sub.2s v0, v1, v0
-; CHECK-NEXT: sshr.2s v0, v0, #1
+; CHECK-NEXT: srhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = sext <2 x i8> %src2 to <2 x i16>
@@ -1024,9 +1020,7 @@ define <2 x i16> @rhadd8x2_zext_asr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-NEXT: movi d2, #0x0000ff000000ff
; CHECK-NEXT: and.8b v0, v0, v2
; CHECK-NEXT: and.8b v1, v1, v2
-; CHECK-NEXT: mvn.8b v0, v0
-; CHECK-NEXT: sub.2s v0, v1, v0
-; CHECK-NEXT: ushr.2s v0, v0, #1
+; CHECK-NEXT: urhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
@@ -1063,9 +1057,7 @@ define <2 x i16> @rhadd8x2_zext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-NEXT: movi d2, #0x0000ff000000ff
; CHECK-NEXT: and.8b v0, v0, v2
; CHECK-NEXT: and.8b v1, v1, v2
-; CHECK-NEXT: mvn.8b v0, v0
-; CHECK-NEXT: sub.2s v0, v1, v0
-; CHECK-NEXT: ushr.2s v0, v0, #1
+; CHECK-NEXT: urhadd.2s v0, v0, v1
; CHECK-NEXT: ret
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
diff --git a/llvm/test/CodeGen/AArch64/sve-hadd.ll b/llvm/test/CodeGen/AArch64/sve-hadd.ll
index 28242547b5720..7936094af1c0a 100644
--- a/llvm/test/CodeGen/AArch64/sve-hadd.ll
+++ b/llvm/test/CodeGen/AArch64/sve-hadd.ll
@@ -219,14 +219,22 @@ entry:
}
define <vscale x 2 x i16> @hadds_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
-; CHECK-LABEL: hadds_v2i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: sxth z0.d, p0/m, z0.d
-; CHECK-NEXT: sxth z1.d, p0/m, z1.d
-; CHECK-NEXT: add z0.d, z0.d, z1.d
-; CHECK-NEXT: asr z0.d, z0.d, #1
-; CHECK-NEXT: ret
+; SVE-LABEL: hadds_v2i16:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: ptrue p0.d
+; SVE-NEXT: sxth z0.d, p0/m, z0.d
+; SVE-NEXT: sxth z1.d, p0/m, z1.d
+; SVE-NEXT: add z0.d, z0.d, z1.d
+; SVE-NEXT: asr z0.d, z0.d, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: hadds_v2i16:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.d
+; SVE2-NEXT: sxth z0.d, p0/m, z0.d
+; SVE2-NEXT: sxth z1.d, p0/m, z1.d
+; SVE2-NEXT: shadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT: ret
entry:
%s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
%s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
@@ -256,13 +264,21 @@ entry:
}
define <vscale x 2 x i16> @haddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
-; CHECK-LABEL: haddu_v2i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: and z0.d, z0.d, #0xffff
-; CHECK-NEXT: and z1.d, z1.d, #0xffff
-; CHECK-NEXT: add z0.d, z0.d, z1.d
-; CHECK-NEXT: lsr z0.d, z0.d, #1
-; CHECK-NEXT: ret
+; SVE-LABEL: haddu_v2i16:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: and z0.d, z0.d, #0xffff
+; SVE-NEXT: and z1.d, z1.d, #0xffff
+; SVE-NEXT: add z0.d, z0.d, z1.d
+; SVE-NEXT: lsr z0.d, z0.d, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: haddu_v2i16:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.d
+; SVE2-NEXT: and z0.d, z0.d, #0xffff
+; SVE2-NEXT: and z1.d, z1.d, #0xffff
+; SVE2-NEXT: uhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT: ret
entry:
%s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
%s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
@@ -417,14 +433,22 @@ entry:
}
define <vscale x 4 x i8> @hadds_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
-; CHECK-LABEL: hadds_v4i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
-; CHECK-NEXT: sxtb z1.s, p0/m, z1.s
-; CHECK-NEXT: add z0.s, z0.s, z1.s
-; CHECK-NEXT: asr z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; SVE-LABEL: hadds_v4i8:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: ptrue p0.s
+; SVE-NEXT: sxtb z0.s, p0/m, z0.s
+; SVE-NEXT: sxtb z1.s, p0/m, z1.s
+; SVE-NEXT: add z0.s, z0.s, z1.s
+; SVE-NEXT: asr z0.s, z0.s, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: hadds_v4i8:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s
+; SVE2-NEXT: sxtb z0.s, p0/m, z0.s
+; SVE2-NEXT: sxtb z1.s, p0/m, z1.s
+; SVE2-NEXT: shadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ret
entry:
%s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
%s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
@@ -454,13 +478,21 @@ entry:
}
define <vscale x 4 x i8> @haddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
-; CHECK-LABEL: haddu_v4i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: and z0.s, z0.s, #0xff
-; CHECK-NEXT: and z1.s, z1.s, #0xff
-; CHECK-NEXT: add z0.s, z0.s, z1.s
-; CHECK-NEXT: lsr z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; SVE-LABEL: haddu_v4i8:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: and z0.s, z0.s, #0xff
+; SVE-NEXT: and z1.s, z1.s, #0xff
+; SVE-NEXT: add z0.s, z0.s, z1.s
+; SVE-NEXT: lsr z0.s, z0.s, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: haddu_v4i8:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s
+; SVE2-NEXT: and z0.s, z0.s, #0xff
+; SVE2-NEXT: and z1.s, z1.s, #0xff
+; SVE2-NEXT: uhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ret
entry:
%s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
%s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
@@ -693,16 +725,24 @@ entry:
}
define <vscale x 2 x i32> @rhadds_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
-; CHECK-LABEL: rhadds_v2i32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
-; CHECK-NEXT: sxtw z1.d, p0/m, z1.d
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: sub z0.d, z1.d, z0.d
-; CHECK-NEXT: asr z0.d, z0.d, #1
-; CHECK-NEXT: ret
+; SVE-LABEL: rhadds_v2i32:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: ptrue p0.d
+; SVE-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
+; SVE-NEXT: sxtw z0.d, p0/m, z0.d
+; SVE-NEXT: sxtw z1.d, p0/m, z1.d
+; SVE-NEXT: eor z0.d, z0.d, z2.d
+; SVE-NEXT: sub z0.d, z1.d, z0.d
+; SVE-NEXT: asr z0.d, z0.d, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhadds_v2i32:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.d
+; SVE2-NEXT: sxtw z0.d, p0/m, z0.d
+; SVE2-NEXT: sxtw z1.d, p0/m, z1.d
+; SVE2-NEXT: srhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT: ret
entry:
%s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
%s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
@@ -884,15 +924,23 @@ entry:
}
define <vscale x 2 x i16> @rhaddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
-; CHECK-LABEL: rhaddu_v2i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: and z0.d, z0.d, #0xffff
-; CHECK-NEXT: and z1.d, z1.d, #0xffff
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: sub z0.d, z1.d, z0.d
-; CHECK-NEXT: lsr z0.d, z0.d, #1
-; CHECK-NEXT: ret
+; SVE-LABEL: rhaddu_v2i16:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
+; SVE-NEXT: and z0.d, z0.d, #0xffff
+; SVE-NEXT: and z1.d, z1.d, #0xffff
+; SVE-NEXT: eor z0.d, z0.d, z2.d
+; SVE-NEXT: sub z0.d, z1.d, z0.d
+; SVE-NEXT: lsr z0.d, z0.d, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhaddu_v2i16:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.d
+; SVE2-NEXT: and z0.d, z0.d, #0xffff
+; SVE2-NEXT: and z1.d, z1.d, #0xffff
+; SVE2-NEXT: urhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT: ret
entry:
%s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
%s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
@@ -904,16 +952,24 @@ entry:
}
define <vscale x 4 x i16> @rhadds_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
-; CHECK-LABEL: rhadds_v4i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: sxth z0.s, p0/m, z0.s
-; CHECK-NEXT: sxth z1.s, p0/m, z1.s
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: sub z0.s, z1.s, z0.s
-; CHECK-NEXT: asr z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; SVE-LABEL: rhadds_v4i16:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: ptrue p0.s
+; SVE-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
+; SVE-NEXT: sxth z0.s, p0/m, z0.s
+; SVE-NEXT: sxth z1.s, p0/m, z1.s
+; SVE-NEXT: eor z0.d, z0.d, z2.d
+; SVE-NEXT: sub z0.s, z1.s, z0.s
+; SVE-NEXT: asr z0.s, z0.s, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhadds_v4i16:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s
+; SVE2-NEXT: sxth z0.s, p0/m, z0.s
+; SVE2-NEXT: sxth z1.s, p0/m, z1.s
+; SVE2-NEXT: srhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ret
entry:
%s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
%s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
@@ -1095,15 +1151,23 @@ entry:
}
define <vscale x 4 x i8> @rhaddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
-; CHECK-LABEL: rhaddu_v4i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: and z0.s, z0.s, #0xff
-; CHECK-NEXT: and z1.s, z1.s, #0xff
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: sub z0.s, z1.s, z0.s
-; CHECK-NEXT: lsr z0.s, z0.s, #1
-; CHECK-NEXT: ret
+; SVE-LABEL: rhaddu_v4i8:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
+; SVE-NEXT: and z0.s, z0.s, #0xff
+; SVE-NEXT: and z1.s, z1.s, #0xff
+; SVE-NEXT: eor z0.d, z0.d, z2.d
+; SVE-NEXT: sub z0.s, z1.s, z0.s
+; SVE-NEXT: lsr z0.s, z0.s, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhaddu_v4i8:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s
+; SVE2-NEXT: and z0.s, z0.s, #0xff
+; SVE2-NEXT: and z1.s, z1.s, #0xff
+; SVE2-NEXT: urhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ret
entry:
%s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
%s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
@@ -1115,16 +1179,24 @@ entry:
}
define <vscale x 8 x i8> @rhadds_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
-; CHECK-LABEL: rhadds_v8i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: sxtb z0.h, p0/m, z0.h
-; CHECK-NEXT: sxtb z1.h, p0/m, z1.h
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: sub z0.h, z1.h, z0.h
-; CHECK-NEXT: asr z0.h, z0.h, #1
-; CHECK-NEXT: ret
+; SVE-LABEL: rhadds_v8i8:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: ptrue p0.h
+; SVE-NEXT: mov z2.h, #-1 // =0xffffffffffffffff
+; SVE-NEXT: sxtb z0.h, p0/m, z0.h
+; SVE-NEXT: sxtb z1.h, p0/m, z1.h
+; SVE-NEXT: eor z0.d, z0.d, z2.d
+; SVE-NEXT: sub z0.h, z1.h, z0.h
+; SVE-NEXT: asr z0.h, z0.h, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhadds_v8i8:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.h
+; SVE2-NEXT: sxtb z0.h, p0/m, z0.h
+; SVE2-NEXT: sxtb z1.h, p0/m, z1.h
+; SVE2-NEXT: srhadd z0.h, p0/m, z0.h, z1.h
+; SVE2-NEXT: ret
entry:
%s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
%s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
diff --git a/llvm/test/CodeGen/Thumb2/mve-vhadd.ll b/llvm/test/CodeGen/Thumb2/mve-vhadd.ll
index 9b64dde23627f..82a286627a9e5 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vhadd.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vhadd.ll
@@ -116,8 +116,7 @@ define arm_aapcs_vfpcc <4 x i8> @vhaddu_v4i8(<4 x i8> %s0, <4 x i8> %s1) {
; CHECK-NEXT: vmov.i32 q2, #0xff
; CHECK-NEXT: vand q1, q1, q2
; CHECK-NEXT: vand q0, q0, q2
-; CHECK-NEXT: vadd.i32 q0, q0, q1
-; CHECK-NEXT: vshr.u32 q0, q0, #1
+; CHECK-NEXT: vhadd.u32 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%s0s = zext <4 x i8> %s0 to <4 x i16>
@@ -313,12 +312,9 @@ define arm_aapcs_vfpcc <4 x i8> @vrhaddu_v4i8(<4 x i8> %s0, <4 x i8> %s1) {
; CHECK-LABEL: vrhaddu_v4i8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q2, #0xff
-; CHECK-NEXT: movs r0, #1
; CHECK-NEXT: vand q1, q1, q2
; CHECK-NEXT: vand q0, q0, q2
-; CHECK-NEXT: vadd.i32 q0, q0, q1
-; CHECK-NEXT: vadd.i32 q0, q0, r0
-; CHECK-NEXT: vshr.u32 q0, q0, #1
+; CHECK-NEXT: vrhadd.u32 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%s0s = zext <4 x i8> %s0 to <4 x i16>
More information about the llvm-commits
mailing list