[llvm] [DAG] SelectionDAG::canCreateUndefOrPoison - Mark AVGFLOORS and AVGCEILS as safe (PR #148191)

Thu Jul 17 03:48:17 PDT 2025

https://github.com/aabhinavg1 updated https://github.com/llvm/llvm-project/pull/148191

>From ae9706985032659bcd1a3b15d36ea0ff9d3d6053 Mon Sep 17 00:00:00 2001
From: aabhinavg1 <tiwariabhinavak at gmail.com>
Date: Fri, 11 Jul 2025 15:56:53 +0530
Subject: [PATCH] [DAG] SelectionDAG::canCreateUndefOrPoison - Mark AVGFLOORS
 and AVGCEILS as safe

This patch updates `SelectionDAG::canCreateUndefOrPoison` to indicate that
`ISD::AVGFLOORS` and `ISD::AVGCEILS` do not introduce poison or undef values.

| Opcode       | Operation Type    | Alive2 Proof Link |
|--------------|-------------------|--------------------|
| `AVGFLOORS`  | Signed Floor Avg  | [Alive2 Link](https://alive2.llvm.org/ce/z/Dwy8a5) |
| `AVGCEILS`   | Signed Ceil Avg   | [Alive2 Link](https://alive2.llvm.org/ce/z/_JKF8A) |
| `AVGFLOORU`  | Unsigned Floor Avg| [Alive2 Link](https://alive2.llvm.org/ce/z/2-B6RM) |
| `AVGCEILU`   | Unsigned Ceil Avg | [Alive2 Link](https://alive2.llvm.org/ce/z/t5WZ6K) |

These patterns are safe due to the use of `sext i8` into `i32`, which ensures
no signed overflow occurs. The arithmetic is done in the wider domain before
truncating safely back to `i8`.

Includes test coverage to ensure correctness.
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  4 +
 llvm/test/CodeGen/AArch64/neon-hadd-freeze.ll | 99 +++++++++++++++++++
 2 files changed, 103 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/neon-hadd-freeze.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c1356239ad206..78f00809e3862 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5542,6 +5542,10 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
   case ISD::UADDSAT:
   case ISD::SSUBSAT:
   case ISD::USUBSAT:
+  case ISD::AVGFLOORS:
+  case ISD::AVGFLOORU:
+  case ISD::AVGCEILS:
+  case ISD::AVGCEILU:
   case ISD::MULHU:
   case ISD::MULHS:
   case ISD::SMIN:
diff --git a/llvm/test/CodeGen/AArch64/neon-hadd-freeze.ll b/llvm/test/CodeGen/AArch64/neon-hadd-freeze.ll
new file mode 100644
index 0000000000000..9e51e5e0dab1e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/neon-hadd-freeze.ll
@@ -0,0 +1,99 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; RUN: llc < %s -march=arm64 -mattr=+neon | FileCheck %s
+; Test that the presence of 'freeze' does not block instruction selection of:
+; - uhadd
+; - urhadd
+; - shadd
+; - srhadd
+
+declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
+
+;===---------------------------------------------------------------------===;
+; Test: freeze does not block uhadd instruction selection
+;===---------------------------------------------------------------------===;
+
+define <8 x i16> @uhadd_freeze(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: uhadd_freeze:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.8h, #15
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-NEXT:    movi v2.8h, #31
+; CHECK-NEXT:    uhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    ret
+  %m0 = and <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  %m1 = and <8 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  %avg = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
+  %frozen = freeze <8 x i16> %avg
+  %masked = and <8 x i16> %frozen, <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>
+  ret <8 x i16> %masked
+}
+
+;===---------------------------------------------------------------------===;
+; Test: freeze does not block urhadd instruction selection
+;===---------------------------------------------------------------------===;
+
+define <8 x i16> @urhadd_freeze(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: urhadd_freeze:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.8h, #15
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-NEXT:    movi v2.8h, #31
+; CHECK-NEXT:    urhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    ret
+  %m0 = and <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  %m1 = and <8 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  %avg = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
+  %frozen = freeze <8 x i16> %avg
+  %masked = and <8 x i16> %frozen, <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>
+  ret <8 x i16> %masked
+}
+
+;===---------------------------------------------------------------------===;
+; Test: freeze does not block shadd instruction selection
+;===---------------------------------------------------------------------===;
+
+define <8 x i16> @shadd_freeze(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: shadd_freeze:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bic v0.8h, #15
+; CHECK-NEXT:    bic v1.8h, #15
+; CHECK-NEXT:    movi v2.8h, #63
+; CHECK-NEXT:    shadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    ret
+  %m0 = and <8 x i16> %a0, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+  %m1 = and <8 x i16> %a1, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+  %avg = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
+  %frozen = freeze <8 x i16> %avg
+  %masked = and <8 x i16> %frozen, <i16 63, i16 63, i16 63, i16 63, i16 63, i16 63, i16 63, i16 63>
+  ret <8 x i16> %masked
+}
+
+;===---------------------------------------------------------------------===;
+; Test: freeze does not block srhadd instruction selection
+;===---------------------------------------------------------------------===;
+
+define <8 x i16> @srhadd_freeze(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: srhadd_freeze:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bic v0.8h, #15
+; CHECK-NEXT:    bic v1.8h, #15
+; CHECK-NEXT:    movi v2.8h, #63
+; CHECK-NEXT:    srhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    ret
+  %m0 = and <8 x i16> %a0, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+  %m1 = and <8 x i16> %a1, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+  %avg = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
+  %frozen = freeze <8 x i16> %avg
+  %masked = and <8 x i16> %frozen, <i16 63, i16 63, i16 63, i16 63, i16 63, i16 63, i16 63, i16 63>
+  ret <8 x i16> %masked
+}