[llvm] [DAG] fold `avgs(sext(x), sext(y))` -> `sext(avgs(x, y))` (PR #95365)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 13 01:57:25 PDT 2024


https://github.com/c8ef updated https://github.com/llvm/llvm-project/pull/95365

>From df7448f15f2ced82e343678da0f5ab4b545c8f85 Mon Sep 17 00:00:00 2001
From: c8ef <c8ef at outlook.com>
Date: Thu, 13 Jun 2024 07:29:41 +0000
Subject: [PATCH 1/3] fold avgu(sext(x), sext(y)) -> sext(avgu(x, y))

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 15 ++++
 llvm/test/CodeGen/AArch64/avg.ll              | 84 +++++++++++++++++++
 2 files changed, 99 insertions(+)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 78970bc4fe4ab..0d4df4a7ecda5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5237,6 +5237,7 @@ SDValue DAGCombiner::visitAVG(SDNode *N) {
                        DAG.getShiftAmountConstant(1, VT, DL));
 
   // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
+  // fold avgu(sext(x), sext(y)) -> sext(avgu(x, y))
   if (sd_match(
           N, m_BinOp(ISD::AVGFLOORU, m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
       X.getValueType() == Y.getValueType() &&
@@ -5251,6 +5252,20 @@ SDValue DAGCombiner::visitAVG(SDNode *N) {
     SDValue AvgCeilU = DAG.getNode(ISD::AVGCEILU, DL, X.getValueType(), X, Y);
     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgCeilU);
   }
+  if (sd_match(
+          N, m_BinOp(ISD::AVGFLOORU, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
+      X.getValueType() == Y.getValueType() &&
+      hasOperation(ISD::AVGFLOORU, X.getValueType())) {
+    SDValue AvgFloorU = DAG.getNode(ISD::AVGFLOORU, DL, X.getValueType(), X, Y);
+    return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgFloorU);
+  }
+  if (sd_match(
+          N, m_BinOp(ISD::AVGCEILU, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
+      X.getValueType() == Y.getValueType() &&
+      hasOperation(ISD::AVGCEILU, X.getValueType())) {
+    SDValue AvgCeilU = DAG.getNode(ISD::AVGCEILU, DL, X.getValueType(), X, Y);
+    return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgCeilU);
+  }
 
   // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
   // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0
diff --git a/llvm/test/CodeGen/AArch64/avg.ll b/llvm/test/CodeGen/AArch64/avg.ll
index dc87708555987..e61b47772b7d7 100644
--- a/llvm/test/CodeGen/AArch64/avg.ll
+++ b/llvm/test/CodeGen/AArch64/avg.ll
@@ -68,3 +68,87 @@ define <16 x i16> @zext_avgceilu_mismatch(<16 x i4> %a0, <16 x i8> %a1) {
   %avg = sub <16 x i16> %or, %shift
   ret <16 x i16> %avg
 }
+
+define <16 x i16> @sext_avgflooru(<16 x i8> %a0, <16 x i8> %a1) {
+; CHECK-LABEL: sext_avgflooru:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll v2.8h, v0.8b, #0
+; CHECK-NEXT:    sshll2 v0.8h, v0.16b, #0
+; CHECK-NEXT:    sshll v3.8h, v1.8b, #0
+; CHECK-NEXT:    sshll2 v1.8h, v1.16b, #0
+; CHECK-NEXT:    shadd v1.8h, v0.8h, v1.8h
+; CHECK-NEXT:    shadd v0.8h, v2.8h, v3.8h
+; CHECK-NEXT:    ret
+  %x0 = sext <16 x i8> %a0 to <16 x i16>
+  %x1 = sext <16 x i8> %a1 to <16 x i16>
+  %and = and <16 x i16> %x0, %x1
+  %xor = xor <16 x i16> %x0, %x1
+  %shift = ashr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %avg = add <16 x i16> %and, %shift
+  ret <16 x i16> %avg
+}
+
+define <16 x i16> @sext_avgflooru_mismatch(<16 x i8> %a0, <16 x i4> %a1) {
+; CHECK-LABEL: sext_avgflooru_mismatch:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ushll2 v2.8h, v1.16b, #0
+; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-NEXT:    sshll v3.8h, v0.8b, #0
+; CHECK-NEXT:    sshll2 v0.8h, v0.16b, #0
+; CHECK-NEXT:    shl v1.8h, v1.8h, #12
+; CHECK-NEXT:    shl v2.8h, v2.8h, #12
+; CHECK-NEXT:    sshr v4.8h, v1.8h, #12
+; CHECK-NEXT:    sshr v1.8h, v2.8h, #12
+; CHECK-NEXT:    shadd v1.8h, v0.8h, v1.8h
+; CHECK-NEXT:    shadd v0.8h, v3.8h, v4.8h
+; CHECK-NEXT:    ret
+  %x0 = sext <16 x i8> %a0 to <16 x i16>
+  %x1 = sext <16 x i4> %a1 to <16 x i16>
+  %and = and <16 x i16> %x0, %x1
+  %xor = xor <16 x i16> %x0, %x1
+  %shift = ashr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %avg = add <16 x i16> %and, %shift
+  ret <16 x i16> %avg
+}
+
+define <16 x i16> @sext_avgceilu(<16 x i8> %a0, <16 x i8> %a1) {
+; CHECK-LABEL: sext_avgceilu:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll v2.8h, v0.8b, #0
+; CHECK-NEXT:    sshll2 v0.8h, v0.16b, #0
+; CHECK-NEXT:    sshll v3.8h, v1.8b, #0
+; CHECK-NEXT:    sshll2 v1.8h, v1.16b, #0
+; CHECK-NEXT:    srhadd v1.8h, v0.8h, v1.8h
+; CHECK-NEXT:    srhadd v0.8h, v2.8h, v3.8h
+; CHECK-NEXT:    ret
+  %x0 = sext <16 x i8> %a0 to <16 x i16>
+  %x1 = sext <16 x i8> %a1 to <16 x i16>
+  %or = or <16 x i16> %x0, %x1
+  %xor = xor <16 x i16> %x0, %x1
+  %shift = ashr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %avg = sub <16 x i16> %or, %shift
+  ret <16 x i16> %avg
+}
+
+define <16 x i16> @sext_avgceilu_mismatch(<16 x i4> %a0, <16 x i8> %a1) {
+; CHECK-LABEL: sext_avgceilu_mismatch:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ushll v2.8h, v0.8b, #0
+; CHECK-NEXT:    ushll2 v0.8h, v0.16b, #0
+; CHECK-NEXT:    sshll v3.8h, v1.8b, #0
+; CHECK-NEXT:    sshll2 v1.8h, v1.16b, #0
+; CHECK-NEXT:    shl v2.8h, v2.8h, #12
+; CHECK-NEXT:    shl v0.8h, v0.8h, #12
+; CHECK-NEXT:    sshr v2.8h, v2.8h, #12
+; CHECK-NEXT:    sshr v0.8h, v0.8h, #12
+; CHECK-NEXT:    srhadd v1.8h, v0.8h, v1.8h
+; CHECK-NEXT:    srhadd v0.8h, v2.8h, v3.8h
+; CHECK-NEXT:    ret
+  %x0 = sext <16 x i4> %a0 to <16 x i16>
+  %x1 = sext <16 x i8> %a1 to <16 x i16>
+  %or = or <16 x i16> %x0, %x1
+  %xor = xor <16 x i16> %x0, %x1
+  %shift = ashr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %avg = sub <16 x i16> %or, %shift
+  ret <16 x i16> %avg
+}

>From fbe39f42ab0559fef068b810768ee81456a0b15e Mon Sep 17 00:00:00 2001
From: c8ef <c8ef at outlook.com>
Date: Thu, 13 Jun 2024 08:02:56 +0000
Subject: [PATCH 2/3] handle avgs, not avgu

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 18 +++++-----
 llvm/test/CodeGen/AArch64/avg.ll              | 34 ++++++++-----------
 2 files changed, 23 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0d4df4a7ecda5..80b8d48251472 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5237,7 +5237,7 @@ SDValue DAGCombiner::visitAVG(SDNode *N) {
                        DAG.getShiftAmountConstant(1, VT, DL));
 
   // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
-  // fold avgu(sext(x), sext(y)) -> sext(avgu(x, y))
+  // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y))
   if (sd_match(
           N, m_BinOp(ISD::AVGFLOORU, m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
       X.getValueType() == Y.getValueType() &&
@@ -5253,18 +5253,18 @@ SDValue DAGCombiner::visitAVG(SDNode *N) {
     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgCeilU);
   }
   if (sd_match(
-          N, m_BinOp(ISD::AVGFLOORU, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
+          N, m_BinOp(ISD::AVGFLOORS, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
       X.getValueType() == Y.getValueType() &&
-      hasOperation(ISD::AVGFLOORU, X.getValueType())) {
-    SDValue AvgFloorU = DAG.getNode(ISD::AVGFLOORU, DL, X.getValueType(), X, Y);
-    return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgFloorU);
+      hasOperation(ISD::AVGFLOORS, X.getValueType())) {
+    SDValue AvgFloorS = DAG.getNode(ISD::AVGFLOORS, DL, X.getValueType(), X, Y);
+    return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgFloorS);
   }
   if (sd_match(
-          N, m_BinOp(ISD::AVGCEILU, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
+          N, m_BinOp(ISD::AVGCEILS, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
       X.getValueType() == Y.getValueType() &&
-      hasOperation(ISD::AVGCEILU, X.getValueType())) {
-    SDValue AvgCeilU = DAG.getNode(ISD::AVGCEILU, DL, X.getValueType(), X, Y);
-    return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgCeilU);
+      hasOperation(ISD::AVGCEILS, X.getValueType())) {
+    SDValue AvgCeilS = DAG.getNode(ISD::AVGCEILS, DL, X.getValueType(), X, Y);
+    return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgCeilS);
   }
 
   // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
diff --git a/llvm/test/CodeGen/AArch64/avg.ll b/llvm/test/CodeGen/AArch64/avg.ll
index e61b47772b7d7..cabc0d346b806 100644
--- a/llvm/test/CodeGen/AArch64/avg.ll
+++ b/llvm/test/CodeGen/AArch64/avg.ll
@@ -69,15 +69,12 @@ define <16 x i16> @zext_avgceilu_mismatch(<16 x i4> %a0, <16 x i8> %a1) {
   ret <16 x i16> %avg
 }
 
-define <16 x i16> @sext_avgflooru(<16 x i8> %a0, <16 x i8> %a1) {
-; CHECK-LABEL: sext_avgflooru:
+define <16 x i16> @sext_avgfloors(<16 x i8> %a0, <16 x i8> %a1) {
+; CHECK-LABEL: sext_avgfloors:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sshll v2.8h, v0.8b, #0
-; CHECK-NEXT:    sshll2 v0.8h, v0.16b, #0
-; CHECK-NEXT:    sshll v3.8h, v1.8b, #0
-; CHECK-NEXT:    sshll2 v1.8h, v1.16b, #0
-; CHECK-NEXT:    shadd v1.8h, v0.8h, v1.8h
-; CHECK-NEXT:    shadd v0.8h, v2.8h, v3.8h
+; CHECK-NEXT:    shadd v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    sshll2 v1.8h, v0.16b, #0
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
 ; CHECK-NEXT:    ret
   %x0 = sext <16 x i8> %a0 to <16 x i16>
   %x1 = sext <16 x i8> %a1 to <16 x i16>
@@ -88,8 +85,8 @@ define <16 x i16> @sext_avgflooru(<16 x i8> %a0, <16 x i8> %a1) {
   ret <16 x i16> %avg
 }
 
-define <16 x i16> @sext_avgflooru_mismatch(<16 x i8> %a0, <16 x i4> %a1) {
-; CHECK-LABEL: sext_avgflooru_mismatch:
+define <16 x i16> @sext_avgfloors_mismatch(<16 x i8> %a0, <16 x i4> %a1) {
+; CHECK-LABEL: sext_avgfloors_mismatch:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ushll2 v2.8h, v1.16b, #0
 ; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
@@ -111,15 +108,12 @@ define <16 x i16> @sext_avgflooru_mismatch(<16 x i8> %a0, <16 x i4> %a1) {
   ret <16 x i16> %avg
 }
 
-define <16 x i16> @sext_avgceilu(<16 x i8> %a0, <16 x i8> %a1) {
-; CHECK-LABEL: sext_avgceilu:
+define <16 x i16> @sext_avgceils(<16 x i8> %a0, <16 x i8> %a1) {
+; CHECK-LABEL: sext_avgceils:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sshll v2.8h, v0.8b, #0
-; CHECK-NEXT:    sshll2 v0.8h, v0.16b, #0
-; CHECK-NEXT:    sshll v3.8h, v1.8b, #0
-; CHECK-NEXT:    sshll2 v1.8h, v1.16b, #0
-; CHECK-NEXT:    srhadd v1.8h, v0.8h, v1.8h
-; CHECK-NEXT:    srhadd v0.8h, v2.8h, v3.8h
+; CHECK-NEXT:    srhadd v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    sshll2 v1.8h, v0.16b, #0
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
 ; CHECK-NEXT:    ret
   %x0 = sext <16 x i8> %a0 to <16 x i16>
   %x1 = sext <16 x i8> %a1 to <16 x i16>
@@ -130,8 +124,8 @@ define <16 x i16> @sext_avgceilu(<16 x i8> %a0, <16 x i8> %a1) {
   ret <16 x i16> %avg
 }
 
-define <16 x i16> @sext_avgceilu_mismatch(<16 x i4> %a0, <16 x i8> %a1) {
-; CHECK-LABEL: sext_avgceilu_mismatch:
+define <16 x i16> @sext_avgceils_mismatch(<16 x i4> %a0, <16 x i8> %a1) {
+; CHECK-LABEL: sext_avgceils_mismatch:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ushll v2.8h, v0.8b, #0
 ; CHECK-NEXT:    ushll2 v0.8h, v0.16b, #0

>From 2cc9ea1a1b53abfdc8216aa5c15aac921971e69a Mon Sep 17 00:00:00 2001
From: c8ef <c8ef at outlook.com>
Date: Thu, 13 Jun 2024 08:57:14 +0000
Subject: [PATCH 3/3] regen test

---
 .../AArch64/aarch64-known-bits-hadd.ll        |   6 +-
 llvm/test/CodeGen/AArch64/arm64-vhadd.ll      |  16 --
 llvm/test/CodeGen/AArch64/sve-hadd.ll         | 139 ++++++++----------
 3 files changed, 62 insertions(+), 99 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
index b2cf089d8145f..0506e1ed9710b 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
@@ -95,9 +95,8 @@ define <8 x i16> @urhadd_sext(<8 x i8> %a0, <8 x i8> %a1) {
 define <8 x i16> @hadds_sext(<8 x i8> %a0, <8 x i8> %a1) {
 ; CHECK-LABEL: hadds_sext:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    shadd v0.8b, v0.8b, v1.8b
 ; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
-; CHECK-NEXT:    sshll v1.8h, v1.8b, #0
-; CHECK-NEXT:    shadd v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    bic v0.8h, #254, lsl #8
 ; CHECK-NEXT:    ret
   %x0 = sext <8 x i8> %a0 to <8 x i16>
@@ -110,9 +109,8 @@ define <8 x i16> @hadds_sext(<8 x i8> %a0, <8 x i8> %a1) {
 define <8 x i16> @shaddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
 ; CHECK-LABEL: shaddu_sext:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    srhadd v0.8b, v0.8b, v1.8b
 ; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
-; CHECK-NEXT:    sshll v1.8h, v1.8b, #0
-; CHECK-NEXT:    srhadd v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    bic v0.8h, #254, lsl #8
 ; CHECK-NEXT:    ret
   %x0 = sext <8 x i8> %a0 to <8 x i16>
diff --git a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
index 076cbf7fce6cc..1c770dce76a77 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
@@ -810,10 +810,6 @@ define <4 x i64> @hadd32_zext_lsr(<4 x i32> %src1, <4 x i32> %src2) {
 define <4 x i16> @hadd8_sext_asr(<4 x i8> %src1, <4 x i8> %src2) {
 ; CHECK-LABEL: hadd8_sext_asr:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl.4h v1, v1, #8
-; CHECK-NEXT:    shl.4h v0, v0, #8
-; CHECK-NEXT:    sshr.4h v1, v1, #8
-; CHECK-NEXT:    sshr.4h v0, v0, #8
 ; CHECK-NEXT:    shadd.4h v0, v0, v1
 ; CHECK-NEXT:    ret
   %zextsrc1 = sext <4 x i8> %src1 to <4 x i16>
@@ -870,10 +866,6 @@ define <4 x i16> @hadd8_zext_lsr(<4 x i8> %src1, <4 x i8> %src2) {
 define <2 x i16> @hadd8x2_sext_asr(<2 x i8> %src1, <2 x i8> %src2) {
 ; CHECK-LABEL: hadd8x2_sext_asr:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl.2s v1, v1, #24
-; CHECK-NEXT:    shl.2s v0, v0, #24
-; CHECK-NEXT:    sshr.2s v1, v1, #24
-; CHECK-NEXT:    sshr.2s v0, v0, #24
 ; CHECK-NEXT:    shadd.2s v0, v0, v1
 ; CHECK-NEXT:    ret
   %zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
@@ -934,10 +926,6 @@ define <2 x i16> @hadd8x2_zext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
 define <4 x i16> @rhadd8_sext_asr(<4 x i8> %src1, <4 x i8> %src2) {
 ; CHECK-LABEL: rhadd8_sext_asr:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl.4h v1, v1, #8
-; CHECK-NEXT:    shl.4h v0, v0, #8
-; CHECK-NEXT:    sshr.4h v1, v1, #8
-; CHECK-NEXT:    sshr.4h v0, v0, #8
 ; CHECK-NEXT:    srhadd.4h v0, v0, v1
 ; CHECK-NEXT:    ret
   %zextsrc1 = sext <4 x i8> %src1 to <4 x i16>
@@ -1000,10 +988,6 @@ define <4 x i16> @rhadd8_zext_lsr(<4 x i8> %src1, <4 x i8> %src2) {
 define <2 x i16> @rhadd8x2_sext_asr(<2 x i8> %src1, <2 x i8> %src2) {
 ; CHECK-LABEL: rhadd8x2_sext_asr:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl.2s v1, v1, #24
-; CHECK-NEXT:    shl.2s v0, v0, #24
-; CHECK-NEXT:    sshr.2s v1, v1, #24
-; CHECK-NEXT:    sshr.2s v0, v0, #24
 ; CHECK-NEXT:    srhadd.2s v0, v0, v1
 ; CHECK-NEXT:    ret
   %zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
diff --git a/llvm/test/CodeGen/AArch64/sve-hadd.ll b/llvm/test/CodeGen/AArch64/sve-hadd.ll
index 3fead88780e7d..6151d9ce04c3a 100644
--- a/llvm/test/CodeGen/AArch64/sve-hadd.ll
+++ b/llvm/test/CodeGen/AArch64/sve-hadd.ll
@@ -80,17 +80,17 @@ entry:
 define <vscale x 2 x i32> @hadds_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
 ; SVE-LABEL: hadds_v2i32:
 ; SVE:       // %bb.0: // %entry
-; SVE-NEXT:    ptrue p0.d
-; SVE-NEXT:    sxtw z0.d, p0/m, z0.d
-; SVE-NEXT:    adr z0.d, [z0.d, z1.d, sxtw]
-; SVE-NEXT:    asr z0.d, z0.d, #1
+; SVE-NEXT:    asr z2.d, z1.d, #1
+; SVE-NEXT:    asr z3.d, z0.d, #1
+; SVE-NEXT:    and z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.d, z3.d, z2.d
+; SVE-NEXT:    and z0.d, z0.d, #0x1
+; SVE-NEXT:    add z0.d, z1.d, z0.d
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: hadds_v2i32:
 ; SVE2:       // %bb.0: // %entry
 ; SVE2-NEXT:    ptrue p0.d
-; SVE2-NEXT:    sxtw z1.d, p0/m, z1.d
-; SVE2-NEXT:    sxtw z0.d, p0/m, z0.d
 ; SVE2-NEXT:    shadd z0.d, p0/m, z0.d, z1.d
 ; SVE2-NEXT:    ret
 entry:
@@ -221,18 +221,17 @@ entry:
 define <vscale x 2 x i16> @hadds_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
 ; SVE-LABEL: hadds_v2i16:
 ; SVE:       // %bb.0: // %entry
-; SVE-NEXT:    ptrue p0.d
-; SVE-NEXT:    sxth z1.d, p0/m, z1.d
-; SVE-NEXT:    sxth z0.d, p0/m, z0.d
-; SVE-NEXT:    add z0.d, z0.d, z1.d
-; SVE-NEXT:    asr z0.d, z0.d, #1
+; SVE-NEXT:    asr z2.d, z1.d, #1
+; SVE-NEXT:    asr z3.d, z0.d, #1
+; SVE-NEXT:    and z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.d, z3.d, z2.d
+; SVE-NEXT:    and z0.d, z0.d, #0x1
+; SVE-NEXT:    add z0.d, z1.d, z0.d
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: hadds_v2i16:
 ; SVE2:       // %bb.0: // %entry
 ; SVE2-NEXT:    ptrue p0.d
-; SVE2-NEXT:    sxth z1.d, p0/m, z1.d
-; SVE2-NEXT:    sxth z0.d, p0/m, z0.d
 ; SVE2-NEXT:    shadd z0.d, p0/m, z0.d, z1.d
 ; SVE2-NEXT:    ret
 entry:
@@ -291,18 +290,17 @@ entry:
 define <vscale x 4 x i16> @hadds_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
 ; SVE-LABEL: hadds_v4i16:
 ; SVE:       // %bb.0: // %entry
-; SVE-NEXT:    ptrue p0.s
-; SVE-NEXT:    sxth z1.s, p0/m, z1.s
-; SVE-NEXT:    sxth z0.s, p0/m, z0.s
-; SVE-NEXT:    add z0.s, z0.s, z1.s
-; SVE-NEXT:    asr z0.s, z0.s, #1
+; SVE-NEXT:    asr z2.s, z1.s, #1
+; SVE-NEXT:    asr z3.s, z0.s, #1
+; SVE-NEXT:    and z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.s, z3.s, z2.s
+; SVE-NEXT:    and z0.s, z0.s, #0x1
+; SVE-NEXT:    add z0.s, z1.s, z0.s
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: hadds_v4i16:
 ; SVE2:       // %bb.0: // %entry
 ; SVE2-NEXT:    ptrue p0.s
-; SVE2-NEXT:    sxth z1.s, p0/m, z1.s
-; SVE2-NEXT:    sxth z0.s, p0/m, z0.s
 ; SVE2-NEXT:    shadd z0.s, p0/m, z0.s, z1.s
 ; SVE2-NEXT:    ret
 entry:
@@ -435,18 +433,17 @@ entry:
 define <vscale x 4 x i8> @hadds_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
 ; SVE-LABEL: hadds_v4i8:
 ; SVE:       // %bb.0: // %entry
-; SVE-NEXT:    ptrue p0.s
-; SVE-NEXT:    sxtb z1.s, p0/m, z1.s
-; SVE-NEXT:    sxtb z0.s, p0/m, z0.s
-; SVE-NEXT:    add z0.s, z0.s, z1.s
-; SVE-NEXT:    asr z0.s, z0.s, #1
+; SVE-NEXT:    asr z2.s, z1.s, #1
+; SVE-NEXT:    asr z3.s, z0.s, #1
+; SVE-NEXT:    and z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.s, z3.s, z2.s
+; SVE-NEXT:    and z0.s, z0.s, #0x1
+; SVE-NEXT:    add z0.s, z1.s, z0.s
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: hadds_v4i8:
 ; SVE2:       // %bb.0: // %entry
 ; SVE2-NEXT:    ptrue p0.s
-; SVE2-NEXT:    sxtb z1.s, p0/m, z1.s
-; SVE2-NEXT:    sxtb z0.s, p0/m, z0.s
 ; SVE2-NEXT:    shadd z0.s, p0/m, z0.s, z1.s
 ; SVE2-NEXT:    ret
 entry:
@@ -505,18 +502,17 @@ entry:
 define <vscale x 8 x i8> @hadds_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
 ; SVE-LABEL: hadds_v8i8:
 ; SVE:       // %bb.0: // %entry
-; SVE-NEXT:    ptrue p0.h
-; SVE-NEXT:    sxtb z1.h, p0/m, z1.h
-; SVE-NEXT:    sxtb z0.h, p0/m, z0.h
-; SVE-NEXT:    add z0.h, z0.h, z1.h
-; SVE-NEXT:    asr z0.h, z0.h, #1
+; SVE-NEXT:    asr z2.h, z1.h, #1
+; SVE-NEXT:    asr z3.h, z0.h, #1
+; SVE-NEXT:    and z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.h, z3.h, z2.h
+; SVE-NEXT:    and z0.h, z0.h, #0x1
+; SVE-NEXT:    add z0.h, z1.h, z0.h
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: hadds_v8i8:
 ; SVE2:       // %bb.0: // %entry
 ; SVE2-NEXT:    ptrue p0.h
-; SVE2-NEXT:    sxtb z1.h, p0/m, z1.h
-; SVE2-NEXT:    sxtb z0.h, p0/m, z0.h
 ; SVE2-NEXT:    shadd z0.h, p0/m, z0.h, z1.h
 ; SVE2-NEXT:    ret
 entry:
@@ -727,20 +723,17 @@ entry:
 define <vscale x 2 x i32> @rhadds_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
 ; SVE-LABEL: rhadds_v2i32:
 ; SVE:       // %bb.0: // %entry
-; SVE-NEXT:    ptrue p0.d
-; SVE-NEXT:    mov z2.d, #-1 // =0xffffffffffffffff
-; SVE-NEXT:    sxtw z0.d, p0/m, z0.d
-; SVE-NEXT:    sxtw z1.d, p0/m, z1.d
-; SVE-NEXT:    eor z0.d, z0.d, z2.d
-; SVE-NEXT:    sub z0.d, z1.d, z0.d
-; SVE-NEXT:    asr z0.d, z0.d, #1
+; SVE-NEXT:    asr z2.d, z1.d, #1
+; SVE-NEXT:    asr z3.d, z0.d, #1
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.d, z3.d, z2.d
+; SVE-NEXT:    and z0.d, z0.d, #0x1
+; SVE-NEXT:    add z0.d, z1.d, z0.d
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: rhadds_v2i32:
 ; SVE2:       // %bb.0: // %entry
 ; SVE2-NEXT:    ptrue p0.d
-; SVE2-NEXT:    sxtw z1.d, p0/m, z1.d
-; SVE2-NEXT:    sxtw z0.d, p0/m, z0.d
 ; SVE2-NEXT:    srhadd z0.d, p0/m, z0.d, z1.d
 ; SVE2-NEXT:    ret
 entry:
@@ -883,20 +876,17 @@ entry:
 define <vscale x 2 x i16> @rhadds_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
 ; SVE-LABEL: rhadds_v2i16:
 ; SVE:       // %bb.0: // %entry
-; SVE-NEXT:    ptrue p0.d
-; SVE-NEXT:    mov z2.d, #-1 // =0xffffffffffffffff
-; SVE-NEXT:    sxth z0.d, p0/m, z0.d
-; SVE-NEXT:    sxth z1.d, p0/m, z1.d
-; SVE-NEXT:    eor z0.d, z0.d, z2.d
-; SVE-NEXT:    sub z0.d, z1.d, z0.d
-; SVE-NEXT:    asr z0.d, z0.d, #1
+; SVE-NEXT:    asr z2.d, z1.d, #1
+; SVE-NEXT:    asr z3.d, z0.d, #1
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.d, z3.d, z2.d
+; SVE-NEXT:    and z0.d, z0.d, #0x1
+; SVE-NEXT:    add z0.d, z1.d, z0.d
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: rhadds_v2i16:
 ; SVE2:       // %bb.0: // %entry
 ; SVE2-NEXT:    ptrue p0.d
-; SVE2-NEXT:    sxth z1.d, p0/m, z1.d
-; SVE2-NEXT:    sxth z0.d, p0/m, z0.d
 ; SVE2-NEXT:    srhadd z0.d, p0/m, z0.d, z1.d
 ; SVE2-NEXT:    ret
 entry:
@@ -962,20 +952,17 @@ entry:
 define <vscale x 4 x i16> @rhadds_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
 ; SVE-LABEL: rhadds_v4i16:
 ; SVE:       // %bb.0: // %entry
-; SVE-NEXT:    ptrue p0.s
-; SVE-NEXT:    mov z2.s, #-1 // =0xffffffffffffffff
-; SVE-NEXT:    sxth z0.s, p0/m, z0.s
-; SVE-NEXT:    sxth z1.s, p0/m, z1.s
-; SVE-NEXT:    eor z0.d, z0.d, z2.d
-; SVE-NEXT:    sub z0.s, z1.s, z0.s
-; SVE-NEXT:    asr z0.s, z0.s, #1
+; SVE-NEXT:    asr z2.s, z1.s, #1
+; SVE-NEXT:    asr z3.s, z0.s, #1
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.s, z3.s, z2.s
+; SVE-NEXT:    and z0.s, z0.s, #0x1
+; SVE-NEXT:    add z0.s, z1.s, z0.s
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: rhadds_v4i16:
 ; SVE2:       // %bb.0: // %entry
 ; SVE2-NEXT:    ptrue p0.s
-; SVE2-NEXT:    sxth z1.s, p0/m, z1.s
-; SVE2-NEXT:    sxth z0.s, p0/m, z0.s
 ; SVE2-NEXT:    srhadd z0.s, p0/m, z0.s, z1.s
 ; SVE2-NEXT:    ret
 entry:
@@ -1118,20 +1105,17 @@ entry:
 define <vscale x 4 x i8> @rhadds_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
 ; SVE-LABEL: rhadds_v4i8:
 ; SVE:       // %bb.0: // %entry
-; SVE-NEXT:    ptrue p0.s
-; SVE-NEXT:    mov z2.s, #-1 // =0xffffffffffffffff
-; SVE-NEXT:    sxtb z0.s, p0/m, z0.s
-; SVE-NEXT:    sxtb z1.s, p0/m, z1.s
-; SVE-NEXT:    eor z0.d, z0.d, z2.d
-; SVE-NEXT:    sub z0.s, z1.s, z0.s
-; SVE-NEXT:    asr z0.s, z0.s, #1
+; SVE-NEXT:    asr z2.s, z1.s, #1
+; SVE-NEXT:    asr z3.s, z0.s, #1
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.s, z3.s, z2.s
+; SVE-NEXT:    and z0.s, z0.s, #0x1
+; SVE-NEXT:    add z0.s, z1.s, z0.s
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: rhadds_v4i8:
 ; SVE2:       // %bb.0: // %entry
 ; SVE2-NEXT:    ptrue p0.s
-; SVE2-NEXT:    sxtb z1.s, p0/m, z1.s
-; SVE2-NEXT:    sxtb z0.s, p0/m, z0.s
 ; SVE2-NEXT:    srhadd z0.s, p0/m, z0.s, z1.s
 ; SVE2-NEXT:    ret
 entry:
@@ -1197,20 +1181,17 @@ entry:
 define <vscale x 8 x i8> @rhadds_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
 ; SVE-LABEL: rhadds_v8i8:
 ; SVE:       // %bb.0: // %entry
-; SVE-NEXT:    ptrue p0.h
-; SVE-NEXT:    mov z2.h, #-1 // =0xffffffffffffffff
-; SVE-NEXT:    sxtb z0.h, p0/m, z0.h
-; SVE-NEXT:    sxtb z1.h, p0/m, z1.h
-; SVE-NEXT:    eor z0.d, z0.d, z2.d
-; SVE-NEXT:    sub z0.h, z1.h, z0.h
-; SVE-NEXT:    asr z0.h, z0.h, #1
+; SVE-NEXT:    asr z2.h, z1.h, #1
+; SVE-NEXT:    asr z3.h, z0.h, #1
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.h, z3.h, z2.h
+; SVE-NEXT:    and z0.h, z0.h, #0x1
+; SVE-NEXT:    add z0.h, z1.h, z0.h
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: rhadds_v8i8:
 ; SVE2:       // %bb.0: // %entry
 ; SVE2-NEXT:    ptrue p0.h
-; SVE2-NEXT:    sxtb z1.h, p0/m, z1.h
-; SVE2-NEXT:    sxtb z0.h, p0/m, z0.h
 ; SVE2-NEXT:    srhadd z0.h, p0/m, z0.h, z1.h
 ; SVE2-NEXT:    ret
 entry:



More information about the llvm-commits mailing list