[llvm] [DAG] SelectionDAG::canCreateUndefOrPoison - Mark AVGFLOORS and AVGCEILS as safe (PR #148191)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 17 03:48:17 PDT 2025
https://github.com/aabhinavg1 updated https://github.com/llvm/llvm-project/pull/148191
>From ae9706985032659bcd1a3b15d36ea0ff9d3d6053 Mon Sep 17 00:00:00 2001
From: aabhinavg1 <tiwariabhinavak at gmail.com>
Date: Fri, 11 Jul 2025 15:56:53 +0530
Subject: [PATCH] [DAG] SelectionDAG::canCreateUndefOrPoison - Mark AVGFLOORS
and AVGCEILS as safe
This patch updates `SelectionDAG::canCreateUndefOrPoison` to indicate that
`ISD::AVGFLOORS` and `ISD::AVGCEILS` do not introduce poison or undef values.
| Opcode | Operation Type | Alive2 Proof Link |
|--------------|-------------------|--------------------|
| `AVGFLOORS` | Signed Floor Avg | [Alive2 Link](https://alive2.llvm.org/ce/z/Dwy8a5) |
| `AVGCEILS` | Signed Ceil Avg | [Alive2 Link](https://alive2.llvm.org/ce/z/_JKF8A) |
| `AVGFLOORU` | Unsigned Floor Avg| [Alive2 Link](https://alive2.llvm.org/ce/z/2-B6RM) |
| `AVGCEILU` | Unsigned Ceil Avg | [Alive2 Link](https://alive2.llvm.org/ce/z/t5WZ6K) |
These patterns are safe due to the use of `sext i8` into `i32`, which ensures
no signed overflow occurs. The arithmetic is done in the wider domain before
truncating safely back to `i8`.
Includes test coverage to ensure correctness.
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +
llvm/test/CodeGen/AArch64/neon-hadd-freeze.ll | 99 +++++++++++++++++++
2 files changed, 103 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/neon-hadd-freeze.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c1356239ad206..78f00809e3862 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5542,6 +5542,10 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::UADDSAT:
case ISD::SSUBSAT:
case ISD::USUBSAT:
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU:
+ case ISD::AVGCEILS:
+ case ISD::AVGCEILU:
case ISD::MULHU:
case ISD::MULHS:
case ISD::SMIN:
diff --git a/llvm/test/CodeGen/AArch64/neon-hadd-freeze.ll b/llvm/test/CodeGen/AArch64/neon-hadd-freeze.ll
new file mode 100644
index 0000000000000..9e51e5e0dab1e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/neon-hadd-freeze.ll
@@ -0,0 +1,99 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; RUN: llc < %s -march=arm64 -mattr=+neon | FileCheck %s
+; Test that the presence of 'freeze' does not block instruction selection of:
+; - uhadd
+; - urhadd
+; - shadd
+; - srhadd
+
+declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
+
+;===---------------------------------------------------------------------===;
+; Test: freeze does not block uhadd instruction selection
+;===---------------------------------------------------------------------===;
+
+define <8 x i16> @uhadd_freeze(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: uhadd_freeze:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.8h, #15
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-NEXT: movi v2.8h, #31
+; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: ret
+ %m0 = and <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %m1 = and <8 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %avg = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
+ %frozen = freeze <8 x i16> %avg
+ %masked = and <8 x i16> %frozen, <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>
+ ret <8 x i16> %masked
+}
+
+;===---------------------------------------------------------------------===;
+; Test: freeze does not block urhadd instruction selection
+;===---------------------------------------------------------------------===;
+
+define <8 x i16> @urhadd_freeze(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: urhadd_freeze:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.8h, #15
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-NEXT: movi v2.8h, #31
+; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: ret
+ %m0 = and <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %m1 = and <8 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %avg = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
+ %frozen = freeze <8 x i16> %avg
+ %masked = and <8 x i16> %frozen, <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>
+ ret <8 x i16> %masked
+}
+
+;===---------------------------------------------------------------------===;
+; Test: freeze does not block shadd instruction selection
+;===---------------------------------------------------------------------===;
+
+define <8 x i16> @shadd_freeze(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: shadd_freeze:
+; CHECK: // %bb.0:
+; CHECK-NEXT: bic v0.8h, #15
+; CHECK-NEXT: bic v1.8h, #15
+; CHECK-NEXT: movi v2.8h, #63
+; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: ret
+ %m0 = and <8 x i16> %a0, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+ %m1 = and <8 x i16> %a1, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+ %avg = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
+ %frozen = freeze <8 x i16> %avg
+ %masked = and <8 x i16> %frozen, <i16 63, i16 63, i16 63, i16 63, i16 63, i16 63, i16 63, i16 63>
+ ret <8 x i16> %masked
+}
+
+;===---------------------------------------------------------------------===;
+; Test: freeze does not block srhadd instruction selection
+;===---------------------------------------------------------------------===;
+
+define <8 x i16> @srhadd_freeze(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: srhadd_freeze:
+; CHECK: // %bb.0:
+; CHECK-NEXT: bic v0.8h, #15
+; CHECK-NEXT: bic v1.8h, #15
+; CHECK-NEXT: movi v2.8h, #63
+; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: ret
+ %m0 = and <8 x i16> %a0, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+ %m1 = and <8 x i16> %a1, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+ %avg = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
+ %frozen = freeze <8 x i16> %avg
+ %masked = and <8 x i16> %frozen, <i16 63, i16 63, i16 63, i16 63, i16 63, i16 63, i16 63, i16 63>
+ ret <8 x i16> %masked
+}
More information about the llvm-commits
mailing list