[llvm] [AArch64][GlobalISel] Added support for hadd family of intrinsics (PR #163985)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 17 09:07:46 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Joshua Rodriguez (JoshdRod)
<details>
<summary>Changes</summary>
GlobalISel now selects hadd family of intrinsics, without falling back to SDAG.
---
Full diff: https://github.com/llvm/llvm-project/pull/163985.diff
4 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64InstrGISel.td (+29)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+8)
- (modified) llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll (+123-48)
- (modified) llvm/test/CodeGen/AArch64/freeze.ll (+64-30)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index fe8419301b306..187ef00eebd3c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -239,6 +239,30 @@ def G_USDOT : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_UHADD : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type1:$src2);
+ let hasSideEffects = 0;
+}
+
+def G_URHADD : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type1:$src2);
+ let hasSideEffects = 0;
+}
+
+def G_SHADD : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type1:$src2);
+ let hasSideEffects = 0;
+}
+
+def G_SRHADD : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type1:$src2);
+ let hasSideEffects = 0;
+}
+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
@@ -286,6 +310,11 @@ def : GINodeEquiv<G_UDOT, AArch64udot>;
def : GINodeEquiv<G_SDOT, AArch64sdot>;
def : GINodeEquiv<G_USDOT, AArch64usdot>;
+def : GINodeEquiv<G_UHADD, avgflooru>;
+def : GINodeEquiv<G_URHADD, avgceilu>;
+def : GINodeEquiv<G_SHADD, avgfloors>;
+def : GINodeEquiv<G_SRHADD, avgceils>;
+
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
def : GINodeEquiv<G_AARCH64_PREFETCH, AArch64Prefetch>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 05a431312472e..d2abdd0662b60 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1825,6 +1825,14 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerBinOp(TargetOpcode::G_ABDS);
case Intrinsic::aarch64_neon_uabd:
return LowerBinOp(TargetOpcode::G_ABDU);
+ case Intrinsic::aarch64_neon_uhadd:
+ return LowerBinOp(AArch64::G_UHADD);
+ case Intrinsic::aarch64_neon_urhadd:
+ return LowerBinOp(AArch64::G_URHADD);
+ case Intrinsic::aarch64_neon_shadd:
+ return LowerBinOp(AArch64::G_SHADD);
+ case Intrinsic::aarch64_neon_srhadd:
+ return LowerBinOp(AArch64::G_SRHADD);
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
diff --git a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
index f900f0209a108..a6fbaf01c5476 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
@@ -7,11 +8,20 @@ declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: haddu_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: haddu_zext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uhadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: haddu_zext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -20,11 +30,20 @@ define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: rhaddu_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: rhaddu_zext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: urhadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: rhaddu_zext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -33,11 +52,20 @@ define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: hadds_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: hadds_zext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uhadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: hadds_zext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -46,12 +74,21 @@ define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: shaddu_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: shaddu_zext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-SD-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: shaddu_zext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -62,13 +99,22 @@ define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
; ; negative tests
define <8 x i16> @haddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: haddu_sext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: haddu_sext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-SD-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: haddu_sext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -77,13 +123,22 @@ define <8 x i16> @haddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @urhadd_sext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: urhadd_sext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: urhadd_sext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-SD-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: urhadd_sext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -92,12 +147,21 @@ define <8 x i16> @urhadd_sext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @hadds_sext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: hadds_sext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: hadds_sext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: shadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: hadds_sext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -106,15 +170,26 @@ define <8 x i16> @hadds_sext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @shaddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: shaddu_sext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: srhadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: shaddu_sext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: srhadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: shaddu_sext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
%res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
ret <8 x i16> %res
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/freeze.ll b/llvm/test/CodeGen/AArch64/freeze.ll
index fb909fec90434..136ac8b0a2aa1 100644
--- a/llvm/test/CodeGen/AArch64/freeze.ll
+++ b/llvm/test/CodeGen/AArch64/freeze.ll
@@ -3,10 +3,6 @@
; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for freeze_v2i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_uhadd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_urhadd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_shadd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_srhadd
%struct.T = type { i32, i32 }
@@ -435,13 +431,23 @@ define <8 x i16> @freeze_abds(<8 x i16> %a, <8 x i16> %b) {
}
define <8 x i16> @freeze_uhadd(<8 x i16> %a0, <8 x i16> %a1) {
-; CHECK-LABEL: freeze_uhadd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.8h, #15
-; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: freeze_uhadd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.8h, #15
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: freeze_uhadd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.8h, #15
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: movi v2.8h, #31
+; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%m0 = and <8 x i16> %a0, splat (i16 15)
%m1 = and <8 x i16> %a1, splat (i16 15)
%avg = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
@@ -451,13 +457,23 @@ define <8 x i16> @freeze_uhadd(<8 x i16> %a0, <8 x i16> %a1) {
}
define <8 x i16> @freeze_urhadd(<8 x i16> %a0, <8 x i16> %a1) {
-; CHECK-LABEL: freeze_urhadd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.8h, #15
-; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: freeze_urhadd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.8h, #15
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: freeze_urhadd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.8h, #15
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: movi v2.8h, #31
+; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%m0 = and <8 x i16> %a0, splat (i16 15)
%m1 = and <8 x i16> %a1, splat (i16 15)
%avg = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
@@ -467,12 +483,21 @@ define <8 x i16> @freeze_urhadd(<8 x i16> %a0, <8 x i16> %a1) {
}
define <8 x i16> @freeze_shadd(<8 x i8> %a0, <8 x i16> %a1) {
-; CHECK-LABEL: freeze_shadd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshr v1.8h, v1.8h, #8
-; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: freeze_shadd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshr v1.8h, v1.8h, #8
+; CHECK-SD-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: freeze_shadd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #8
+; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: shl v0.8h, v0.8h, #8
+; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #8
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = ashr <8 x i16> %a1, splat (i16 8)
%avg = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -483,12 +508,21 @@ define <8 x i16> @freeze_shadd(<8 x i8> %a0, <8 x i16> %a1) {
}
define <8 x i16> @freeze_srhadd(<8 x i8> %a0, <8 x i16> %a1) {
-; CHECK-LABEL: freeze_srhadd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshr v1.8h, v1.8h, #8
-; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: freeze_srhadd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshr v1.8h, v1.8h, #8
+; CHECK-SD-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: freeze_srhadd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #8
+; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: shl v0.8h, v0.8h, #8
+; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #8
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = ashr <8 x i16> %a1, splat (i16 8)
%avg = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
``````````
</details>
https://github.com/llvm/llvm-project/pull/163985
More information about the llvm-commits
mailing list