[llvm] [DAG] Fold trunc(avg(x, y)) for avgceil/floor u/s nodes if they have sufficient leading zero/sign bits (PR #152273)

Wed Aug 6 06:54:59 PDT 2025

================
@@ -0,0 +1,43 @@
+; RUN: llc -mtriple=aarch64-- -O2 -mattr=+neon < %s | FileCheck %s
+
+; CHECK-LABEL: test_avgceil_u
+; CHECK: uhadd v0.8b, v0.8b, v1.8b
+define <8 x i8> @test_avgceil_u(<8 x i16> %a, <8 x i16> %b) {
+  %ta = trunc <8 x i16> %a to <8 x i8>
+  %tb = trunc <8 x i16> %b to <8 x i8>
+  %res = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %ta, <8 x i8> %tb)
+  ret <8 x i8> %res
+}
----------------
RKSimon wrote:

These don't look right - you might have to do something like:
```
define <8 x i8> @test_avgceil_u(<8 x i16> %a, <8 x i8> %b) { 
  %ta = and <8 x i16> %a, splat (i16 15)
  %tb = zext <8 x i8> %b to <8 x i16>
  %avg = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %ta, <8 x i16> %tb)
  %res = trunc <8 x i16> %avg to <8 x i8>
  ret <8 x i8> %res
}
```

https://github.com/llvm/llvm-project/pull/152273