[llvm] [DAG] Fold mismatched widened avg idioms to narrow form (#147946) (PR #163366)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 15 02:07:18 PDT 2025
================
@@ -2,6 +2,87 @@
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+sve | FileCheck %s -check-prefixes=CHECK,SVE
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+sve2 | FileCheck %s -check-prefixes=CHECK,SVE2
+define <8 x i8> @srhadd_v8i8_trunc(<8 x i8> %s0, <8 x i8> %s1) {
+; CHECK-LABEL: srhadd_v8i8_trunc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: srhadd v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ret
+ %s0s = sext <8 x i8> %s0 to <8 x i16>
+ %s1s = sext <8 x i8> %s1 to <8 x i16>
+ %s = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %s0s, <8 x i16> %s1s)
+ %s2 = trunc <8 x i16> %s to <8 x i8>
+ ret <8 x i8> %s2
+}
+
+define <4 x i16> @srhadd_v4i16_trunc(<4 x i16> %s0, <4 x i16> %s1) {
+; CHECK-LABEL: srhadd_v4i16_trunc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: srhadd v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+ %s0s = sext <4 x i16> %s0 to <4 x i32>
+ %s1s = sext <4 x i16> %s1 to <4 x i32>
+ %s = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %s0s, <4 x i32> %s1s)
+ %s2 = trunc <4 x i32> %s to <4 x i16>
+ ret <4 x i16> %s2
+}
+
+define <2 x i32> @srhadd_v2i32_trunc(<2 x i32> %s0, <2 x i32> %s1) {
+; CHECK-LABEL: srhadd_v2i32_trunc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-NEXT: sshll v1.2d, v1.2s, #0
+; CHECK-NEXT: eor v2.16b, v0.16b, v1.16b
+; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ushr v1.2d, v2.2d, #1
+; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: ret
+ %s0s = sext <2 x i32> %s0 to <2 x i64>
+ %s1s = sext <2 x i32> %s1 to <2 x i64>
+ %s = call <2 x i64> @llvm.aarch64.neon.urhadd.v2i64(<2 x i64> %s0s, <2 x i64> %s1s)
+ %s2 = trunc <2 x i64> %s to <2 x i32>
+ ret <2 x i32> %s2
+}
+
+define <8 x i8> @urhadd_v8i8_trunc(<8 x i8> %s0, <8 x i8> %s1) {
+; CHECK-LABEL: urhadd_v8i8_trunc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ret
+ %s0s = zext <8 x i8> %s0 to <8 x i16>
+ %s1s = zext <8 x i8> %s1 to <8 x i16>
+ %s = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %s0s, <8 x i16> %s1s)
+ %s2 = trunc <8 x i16> %s to <8 x i8>
+ ret <8 x i8> %s2
+}
+
+define <4 x i16> @urhadd_v4i16_trunc(<4 x i16> %s0, <4 x i16> %s1) {
+; CHECK-LABEL: urhadd_v4i16_trunc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: urhadd v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+ %s0s = zext <4 x i16> %s0 to <4 x i32>
+ %s1s = zext <4 x i16> %s1 to <4 x i32>
+ %s = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %s0s, <4 x i32> %s1s)
+ %s2 = trunc <4 x i32> %s to <4 x i16>
+ ret <4 x i16> %s2
+}
+
+define <2 x i32> @urhadd_v2i32_trunc(<2 x i32> %s0, <2 x i32> %s1) {
+; CHECK-LABEL: urhadd_v2i32_trunc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z2.d, #1 // =0x1
+; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT: shrn v0.2s, v0.2d, #1
+; CHECK-NEXT: ret
+ %s0s = zext <2 x i32> %s0 to <2 x i64>
+ %s1s = zext <2 x i32> %s1 to <2 x i64>
+ %s = call <2 x i64> @llvm.aarch64.neon.srhadd.v2i64(<2 x i64> %s0s, <2 x i64> %s1s)
+ %s2 = trunc <2 x i64> %s to <2 x i32>
+ ret <2 x i32> %s2
+}
----------------
RKSimon wrote:
These should probably be added to arm64-vhadd.ll - not a sve file
https://github.com/llvm/llvm-project/pull/163366
More information about the llvm-commits
mailing list