[llvm] [DAG] Add TRUNCATE_SSAT_S/U and TRUNCATE_USAT_U to canCreateUndefOrPoison and computeKnownBits (#152143) (PR #168809)

Jerry Dang via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 26 19:11:08 PST 2025


================
@@ -0,0 +1,82 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
+
+;; ============================================================================
+;; Tests for canCreateUndefOrPoison = false
+;; These verify that freeze operations are correctly eliminated
+;; ============================================================================
+
+; TRUNCATE_SSAT_S: No saturation path
+define i1 @sqxtn_no_sat_with_freeze(<4 x i32> %x) {
+; CHECK-LABEL: sqxtn_no_sat_with_freeze:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4s, #100
+; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    sqxtn v0.4h, v0.4s
+; CHECK-NEXT:    umov w8, v0.h[0]
+; CHECK-NEXT:    and w8, w8, #0xfffc
+; CHECK-NEXT:    cmp w8, #200
+; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    ret
+  %masked = and <4 x i32> %x, <i32 100, i32 100, i32 100, i32 100>
+  %trunc = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %masked)
+  %freeze = freeze <4 x i16> %trunc
+  %extract = extractelement <4 x i16> %freeze, i32 0
+  ; Input is [0,100], so result > 200 is always false
+  %cmp = icmp sgt i16 %extract, 200
+  ret i1 %cmp
+}
+
+; TRUNCATE_SSAT_S: Test specific known bits
+define i16 @sqxtn_known_bits(<4 x i32> %x) {
+; CHECK-LABEL: sqxtn_known_bits:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    ret
+  ; Input: [0, 32512] fits in i16 without saturation
+  %masked = and <4 x i32> %x, <i32 32512, i32 32512, i32 32512, i32 32512>
+  %trunc = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %masked)
+  %freeze = freeze <4 x i16> %trunc
+  %extract = extractelement <4 x i16> %freeze, i32 0
+  ; Mask to lower 7 bits - with KnownBits, knows upper bits are already 0
+  ; so this AND can be simplified
+  %and = and i16 %extract, 127
+  ret i16 %and
+}
+
+;; ============================================================================
----------------
kuroyukiasuna wrote:

I was able to create working tests for TRUNCATE_SSAT_U/TRUNCATE_USAT_U with values like 0x7F00 that does not trigger the MOVI immediate encoding optimization.

Probably want to add `computeKnownBits` support for `AArch64ISD::MOVIedit` and related nodes, enabling constant propagation to unblock optimization like constant folding, which will use our optimization in selectionDAG.

https://github.com/llvm/llvm-project/pull/168809


More information about the llvm-commits mailing list