[llvm] [AArch64][SVE] Combine UXT[BHW] intrinsics to AND. (PR #137956)
Ricardo Jesus via llvm-commits
llvm-commits at lists.llvm.org
Thu May 1 06:30:59 PDT 2025
https://github.com/rj-jesus updated https://github.com/llvm/llvm-project/pull/137956
>From ed0ee9aba8f4e13d97cf606a683c7e5c13a5b77c Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 30 Apr 2025 02:21:38 -0700
Subject: [PATCH 1/6] Precommit tests.
---
.../InstCombine/AArch64/sve-intrinsic-uxt.ll | 342 ++++++++++++++++++
1 file changed, 342 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll
new file mode 100644
index 0000000000000..e194d6346e64e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll
@@ -0,0 +1,342 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <vscale x 2 x i64> @uxtb_z_64(<vscale x 2 x i64> %0) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @uxtb_z_64(
+; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+;
+ %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
+ ret <vscale x 2 x i64> %2
+}
+
+define <vscale x 2 x i64> @uxtb_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @uxtb_m_64(
+; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
+;
+ %3 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
+ ret <vscale x 2 x i64> %3
+}
+
+define <vscale x 2 x i64> @uxtb_x_64(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @uxtb_x_64(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP3]], <vscale x 2 x i64> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
+;
+ %3 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
+ %4 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %3, <vscale x 2 x i64> %1)
+ ret <vscale x 2 x i64> %4
+}
+
+define <vscale x 2 x i64> @uxtb_z_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @uxtb_z_64_no_ptrue(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP3]], <vscale x 2 x i64> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
+;
+ %3 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
+ %4 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %3, <vscale x 2 x i64> %1)
+ ret <vscale x 2 x i64> %4
+}
+
+define <vscale x 2 x i64> @uxtb_m_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @uxtb_m_64_no_ptrue(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]], <vscale x 2 x i64> [[TMP2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> [[TMP2]], <vscale x 2 x i1> [[TMP4]], <vscale x 2 x i64> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
+;
+ %4 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
+ %5 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> %2, <vscale x 2 x i1> %4, <vscale x 2 x i64> %1)
+ ret <vscale x 2 x i64> %5
+}
+
+define <vscale x 4 x i32> @uxtb_z_32(<vscale x 4 x i32> %0) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @uxtb_z_32(
+; CHECK-SAME: <vscale x 4 x i32> [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP0]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+;
+ %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %0)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @uxtb_m_32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @uxtb_m_32(
+; CHECK-SAME: <vscale x 4 x i32> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP0]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
+;
+ %3 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %0)
+ ret <vscale x 4 x i32> %3
+}
+
+define <vscale x 4 x i32> @uxtb_x_32(<vscale x 16 x i1> %0, <vscale x 4 x i32> %1) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @uxtb_x_32(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP3]], <vscale x 4 x i32> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
+;
+ %3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %0)
+ %4 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %3, <vscale x 4 x i32> %1)
+ ret <vscale x 4 x i32> %4
+}
+
+define <vscale x 4 x i32> @uxtb_z_32_no_ptrue(<vscale x 16 x i1> %0, <vscale x 4 x i32> %1) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @uxtb_z_32_no_ptrue(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP3]], <vscale x 4 x i32> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
+;
+ %3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %0)
+ %4 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %3, <vscale x 4 x i32> %1)
+ ret <vscale x 4 x i32> %4
+}
+
+define <vscale x 4 x i32> @uxtb_m_32_no_ptrue(<vscale x 16 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @uxtb_m_32_no_ptrue(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]], <vscale x 4 x i32> [[TMP2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> [[TMP2]], <vscale x 4 x i1> [[TMP4]], <vscale x 4 x i32> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
+;
+ %4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %0)
+ %5 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> %2, <vscale x 4 x i1> %4, <vscale x 4 x i32> %1)
+ ret <vscale x 4 x i32> %5
+}
+
+define <vscale x 8 x i16> @uxtb_z_16(<vscale x 8 x i16> %0) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @uxtb_z_16(
+; CHECK-SAME: <vscale x 8 x i16> [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[TMP0]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
+;
+ %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> %0)
+ ret <vscale x 8 x i16> %2
+}
+
+define <vscale x 8 x i16> @uxtb_m_16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @uxtb_m_16(
+; CHECK-SAME: <vscale x 8 x i16> [[TMP0:%.*]], <vscale x 8 x i16> [[TMP1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[TMP0]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP3]]
+;
+ %3 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> %1, <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> %0)
+ ret <vscale x 8 x i16> %3
+}
+
+define <vscale x 8 x i16> @uxtb_x_16(<vscale x 16 x i1> %0, <vscale x 8 x i16> %1) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @uxtb_x_16(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 8 x i16> [[TMP1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP3]], <vscale x 8 x i16> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
+;
+ %3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %0)
+ %4 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %3, <vscale x 8 x i16> %1)
+ ret <vscale x 8 x i16> %4
+}
+
+define <vscale x 8 x i16> @uxtb_z_16_no_ptrue(<vscale x 16 x i1> %0, <vscale x 8 x i16> %1) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @uxtb_z_16_no_ptrue(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 8 x i16> [[TMP1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP3]], <vscale x 8 x i16> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
+;
+ %3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %0)
+ %4 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %3, <vscale x 8 x i16> %1)
+ ret <vscale x 8 x i16> %4
+}
+
+define <vscale x 8 x i16> @uxtb_m_16_no_ptrue(<vscale x 16 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @uxtb_m_16_no_ptrue(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 8 x i16> [[TMP1:%.*]], <vscale x 8 x i16> [[TMP2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> [[TMP2]], <vscale x 8 x i1> [[TMP4]], <vscale x 8 x i16> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP5]]
+;
+ %4 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %0)
+ %5 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> %2, <vscale x 8 x i1> %4, <vscale x 8 x i16> %1)
+ ret <vscale x 8 x i16> %5
+}
+
+define <vscale x 2 x i64> @uxth_z_64(<vscale x 2 x i64> %0) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @uxth_z_64(
+; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+;
+ %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
+ ret <vscale x 2 x i64> %2
+}
+
+define <vscale x 2 x i64> @uxth_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @uxth_m_64(
+; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
+;
+ %3 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
+ ret <vscale x 2 x i64> %3
+}
+
+define <vscale x 2 x i64> @uxth_x_64(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @uxth_x_64(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP3]], <vscale x 2 x i64> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
+;
+ %3 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
+ %4 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %3, <vscale x 2 x i64> %1)
+ ret <vscale x 2 x i64> %4
+}
+
+define <vscale x 2 x i64> @uxth_z_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @uxth_z_64_no_ptrue(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP3]], <vscale x 2 x i64> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
+;
+ %3 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
+ %4 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %3, <vscale x 2 x i64> %1)
+ ret <vscale x 2 x i64> %4
+}
+
+define <vscale x 2 x i64> @uxth_m_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @uxth_m_64_no_ptrue(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]], <vscale x 2 x i64> [[TMP2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> [[TMP2]], <vscale x 2 x i1> [[TMP4]], <vscale x 2 x i64> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
+;
+ %4 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
+ %5 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> %2, <vscale x 2 x i1> %4, <vscale x 2 x i64> %1)
+ ret <vscale x 2 x i64> %5
+}
+
+define <vscale x 4 x i32> @uxth_z_32(<vscale x 4 x i32> %0) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @uxth_z_32(
+; CHECK-SAME: <vscale x 4 x i32> [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP0]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+;
+ %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %0)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @uxth_m_32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @uxth_m_32(
+; CHECK-SAME: <vscale x 4 x i32> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP0]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
+;
+ %3 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %0)
+ ret <vscale x 4 x i32> %3
+}
+
+define <vscale x 4 x i32> @uxth_x_32(<vscale x 16 x i1> %0, <vscale x 4 x i32> %1) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @uxth_x_32(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP3]], <vscale x 4 x i32> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
+;
+ %3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %0)
+ %4 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %3, <vscale x 4 x i32> %1)
+ ret <vscale x 4 x i32> %4
+}
+
+define <vscale x 4 x i32> @uxth_z_32_no_ptrue(<vscale x 16 x i1> %0, <vscale x 4 x i32> %1) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @uxth_z_32_no_ptrue(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP3]], <vscale x 4 x i32> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
+;
+ %3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %0)
+ %4 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %3, <vscale x 4 x i32> %1)
+ ret <vscale x 4 x i32> %4
+}
+
+define <vscale x 4 x i32> @uxth_m_32_no_ptrue(<vscale x 16 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @uxth_m_32_no_ptrue(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]], <vscale x 4 x i32> [[TMP2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> [[TMP2]], <vscale x 4 x i1> [[TMP4]], <vscale x 4 x i32> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
+;
+ %4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %0)
+ %5 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> %2, <vscale x 4 x i1> %4, <vscale x 4 x i32> %1)
+ ret <vscale x 4 x i32> %5
+}
+
+define <vscale x 2 x i64> @uxtw_z_64(<vscale x 2 x i64> %0) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @uxtw_z_64(
+; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+;
+ %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
+ ret <vscale x 2 x i64> %2
+}
+
+define <vscale x 2 x i64> @uxtw_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @uxtw_m_64(
+; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
+;
+ %3 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
+ ret <vscale x 2 x i64> %3
+}
+
+define <vscale x 2 x i64> @uxtw_x_64(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @uxtw_x_64(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP3]], <vscale x 2 x i64> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
+;
+ %3 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
+ %4 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %3, <vscale x 2 x i64> %1)
+ ret <vscale x 2 x i64> %4
+}
+
+define <vscale x 2 x i64> @uxtw_z_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @uxtw_z_64_no_ptrue(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP3]], <vscale x 2 x i64> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
+;
+ %3 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
+ %4 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %3, <vscale x 2 x i64> %1)
+ ret <vscale x 2 x i64> %4
+}
+
+define <vscale x 2 x i64> @uxtw_m_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @uxtw_m_64_no_ptrue(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]], <vscale x 2 x i64> [[TMP2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> [[TMP2]], <vscale x 2 x i1> [[TMP4]], <vscale x 2 x i64> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
+;
+ %4 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
+ %5 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> %2, <vscale x 2 x i1> %4, <vscale x 2 x i64> %1)
+ ret <vscale x 2 x i64> %5
+}
+
+attributes #0 = { "target-features"="+sve" }
>From a381043a2888e28a9eb59d2df8860467e1d6b201 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 30 Apr 2025 01:02:57 -0700
Subject: [PATCH 2/6] [AArch64][SVE] Combine UXT[BHW] intrinsics to AND.
Currently, we lower UXT[BHW] intrinsics into the corresponding UXT*
instruction. However, when the governing predicate is all-true or the
passthrough is undef (e.g. in the case of ``don't care'' merging), we
can lower them into AND immediate instructions instead.
For example:
```cpp
svuint64_t foo_z(svuint64_t x) {
return svextb_z(svptrue_b64(), x);
}
```
Currently:
```
foo_z:
ptrue p0.d
movi v1.2d, #0000000000000000
uxtb z0.d, p0/m, z0.d
ret
```
Becomes:
```
foo_z:
and z0.d, z0.d, #0xff
ret
```
We do this early in InstCombine in case it unblocks other
simplifications.
---
.../AArch64/AArch64TargetTransformInfo.cpp | 26 ++++++++++++
.../InstCombine/AArch64/sve-intrinsic-uxt.ll | 42 ++++++++-----------
2 files changed, 44 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 594f1bff5c458..e9050d184f0f7 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2640,6 +2640,26 @@ static std::optional<Instruction *> instCombinePTrue(InstCombiner &IC,
return std::nullopt;
}
+static std::optional<Instruction *> instCombineSVEUxt(InstCombiner &IC,
+ IntrinsicInst &II,
+ unsigned NumBits) {
+ Value *Passthru = II.getOperand(0);
+ Value *Pg = II.getOperand(1);
+ Value *Op = II.getOperand(2);
+
+ // Convert UXT[BHW] to AND.
+ if (isa<UndefValue>(Passthru) || isAllActivePredicate(Pg)) {
+ auto *Ty = cast<VectorType>(II.getType());
+ auto MaskValue = APInt::getLowBitsSet(Ty->getScalarSizeInBits(), NumBits);
+ auto *Mask = ConstantVector::getSplat(
+ Ty->getElementCount(),
+ ConstantInt::get(Ty->getElementType(), MaskValue));
+ return IC.replaceInstUsesWith(II, IC.Builder.CreateAnd(Op, Mask));
+ }
+
+ return std::nullopt;
+}
+
std::optional<Instruction *>
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const {
@@ -2745,6 +2765,12 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
return instCombineSVEInsr(IC, II);
case Intrinsic::aarch64_sve_ptrue:
return instCombinePTrue(IC, II);
+ case Intrinsic::aarch64_sve_uxtb:
+ return instCombineSVEUxt(IC, II, 8);
+ case Intrinsic::aarch64_sve_uxth:
+ return instCombineSVEUxt(IC, II, 16);
+ case Intrinsic::aarch64_sve_uxtw:
+ return instCombineSVEUxt(IC, II, 32);
}
return std::nullopt;
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll
index e194d6346e64e..86986b510aa27 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll
@@ -6,7 +6,7 @@ target triple = "aarch64-unknown-linux-gnu"
define <vscale x 2 x i64> @uxtb_z_64(<vscale x 2 x i64> %0) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxtb_z_64(
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]])
+; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 2 x i64> [[TMP0]], splat (i64 255)
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
;
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
@@ -16,7 +16,7 @@ define <vscale x 2 x i64> @uxtb_z_64(<vscale x 2 x i64> %0) #0 {
define <vscale x 2 x i64> @uxtb_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxtb_m_64(
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 2 x i64> [[TMP0]], splat (i64 255)
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
;
%3 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
@@ -26,8 +26,7 @@ define <vscale x 2 x i64> @uxtb_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %
define <vscale x 2 x i64> @uxtb_x_64(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxtb_x_64(
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP3]], <vscale x 2 x i64> [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = and <vscale x 2 x i64> [[TMP1]], splat (i64 255)
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
;
%3 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
@@ -62,7 +61,7 @@ define <vscale x 2 x i64> @uxtb_m_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2
define <vscale x 4 x i32> @uxtb_z_32(<vscale x 4 x i32> %0) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @uxtb_z_32(
; CHECK-SAME: <vscale x 4 x i32> [[TMP0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP0]])
+; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 4 x i32> [[TMP0]], splat (i32 255)
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
;
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %0)
@@ -72,7 +71,7 @@ define <vscale x 4 x i32> @uxtb_z_32(<vscale x 4 x i32> %0) #0 {
define <vscale x 4 x i32> @uxtb_m_32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @uxtb_m_32(
; CHECK-SAME: <vscale x 4 x i32> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 4 x i32> [[TMP0]], splat (i32 255)
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
;
%3 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %0)
@@ -82,8 +81,7 @@ define <vscale x 4 x i32> @uxtb_m_32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %
define <vscale x 4 x i32> @uxtb_x_32(<vscale x 16 x i1> %0, <vscale x 4 x i32> %1) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @uxtb_x_32(
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP3]], <vscale x 4 x i32> [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = and <vscale x 4 x i32> [[TMP1]], splat (i32 255)
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
;
%3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %0)
@@ -118,7 +116,7 @@ define <vscale x 4 x i32> @uxtb_m_32_no_ptrue(<vscale x 16 x i1> %0, <vscale x 4
define <vscale x 8 x i16> @uxtb_z_16(<vscale x 8 x i16> %0) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @uxtb_z_16(
; CHECK-SAME: <vscale x 8 x i16> [[TMP0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[TMP0]])
+; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 8 x i16> [[TMP0]], splat (i16 255)
; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
;
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> %0)
@@ -128,7 +126,7 @@ define <vscale x 8 x i16> @uxtb_z_16(<vscale x 8 x i16> %0) #0 {
define <vscale x 8 x i16> @uxtb_m_16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @uxtb_m_16(
; CHECK-SAME: <vscale x 8 x i16> [[TMP0:%.*]], <vscale x 8 x i16> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 8 x i16> [[TMP0]], splat (i16 255)
; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP3]]
;
%3 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> %1, <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> %0)
@@ -138,8 +136,7 @@ define <vscale x 8 x i16> @uxtb_m_16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %
define <vscale x 8 x i16> @uxtb_x_16(<vscale x 16 x i1> %0, <vscale x 8 x i16> %1) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @uxtb_x_16(
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 8 x i16> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP3]], <vscale x 8 x i16> [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = and <vscale x 8 x i16> [[TMP1]], splat (i16 255)
; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
;
%3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %0)
@@ -174,7 +171,7 @@ define <vscale x 8 x i16> @uxtb_m_16_no_ptrue(<vscale x 16 x i1> %0, <vscale x 8
define <vscale x 2 x i64> @uxth_z_64(<vscale x 2 x i64> %0) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxth_z_64(
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]])
+; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 2 x i64> [[TMP0]], splat (i64 65535)
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
;
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
@@ -184,7 +181,7 @@ define <vscale x 2 x i64> @uxth_z_64(<vscale x 2 x i64> %0) #0 {
define <vscale x 2 x i64> @uxth_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxth_m_64(
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 2 x i64> [[TMP0]], splat (i64 65535)
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
;
%3 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
@@ -194,8 +191,7 @@ define <vscale x 2 x i64> @uxth_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %
define <vscale x 2 x i64> @uxth_x_64(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxth_x_64(
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP3]], <vscale x 2 x i64> [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = and <vscale x 2 x i64> [[TMP1]], splat (i64 65535)
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
;
%3 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
@@ -230,7 +226,7 @@ define <vscale x 2 x i64> @uxth_m_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2
define <vscale x 4 x i32> @uxth_z_32(<vscale x 4 x i32> %0) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @uxth_z_32(
; CHECK-SAME: <vscale x 4 x i32> [[TMP0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP0]])
+; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 4 x i32> [[TMP0]], splat (i32 65535)
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
;
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %0)
@@ -240,7 +236,7 @@ define <vscale x 4 x i32> @uxth_z_32(<vscale x 4 x i32> %0) #0 {
define <vscale x 4 x i32> @uxth_m_32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @uxth_m_32(
; CHECK-SAME: <vscale x 4 x i32> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 4 x i32> [[TMP0]], splat (i32 65535)
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
;
%3 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %0)
@@ -250,8 +246,7 @@ define <vscale x 4 x i32> @uxth_m_32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %
define <vscale x 4 x i32> @uxth_x_32(<vscale x 16 x i1> %0, <vscale x 4 x i32> %1) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @uxth_x_32(
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP3]], <vscale x 4 x i32> [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = and <vscale x 4 x i32> [[TMP1]], splat (i32 65535)
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
;
%3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %0)
@@ -286,7 +281,7 @@ define <vscale x 4 x i32> @uxth_m_32_no_ptrue(<vscale x 16 x i1> %0, <vscale x 4
define <vscale x 2 x i64> @uxtw_z_64(<vscale x 2 x i64> %0) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxtw_z_64(
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]])
+; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 2 x i64> [[TMP0]], splat (i64 4294967295)
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
;
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
@@ -296,7 +291,7 @@ define <vscale x 2 x i64> @uxtw_z_64(<vscale x 2 x i64> %0) #0 {
define <vscale x 2 x i64> @uxtw_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxtw_m_64(
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 2 x i64> [[TMP0]], splat (i64 4294967295)
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
;
%3 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
@@ -306,8 +301,7 @@ define <vscale x 2 x i64> @uxtw_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %
define <vscale x 2 x i64> @uxtw_x_64(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxtw_x_64(
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP3]], <vscale x 2 x i64> [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = and <vscale x 2 x i64> [[TMP1]], splat (i64 4294967295)
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
;
%3 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
>From 89ce3d8bb0c9fb5a9d1dd8b89be9a6d9806ac155 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 30 Apr 2025 08:13:49 -0700
Subject: [PATCH 3/6] Emit Intrinsic::aarch64_sve_and_u rather than stock and.
---
.../AArch64/AArch64TargetTransformInfo.cpp | 4 +-
.../InstCombine/AArch64/sve-intrinsic-uxt.ll | 42 +++++++++++--------
2 files changed, 27 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index e9050d184f0f7..cd75811a1bad5 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2654,7 +2654,9 @@ static std::optional<Instruction *> instCombineSVEUxt(InstCombiner &IC,
auto *Mask = ConstantVector::getSplat(
Ty->getElementCount(),
ConstantInt::get(Ty->getElementType(), MaskValue));
- return IC.replaceInstUsesWith(II, IC.Builder.CreateAnd(Op, Mask));
+ auto *And = IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_and_u, {Ty},
+ {Pg, Op, Mask});
+ return IC.replaceInstUsesWith(II, And);
}
return std::nullopt;
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll
index 86986b510aa27..755e11e231382 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll
@@ -6,7 +6,7 @@ target triple = "aarch64-unknown-linux-gnu"
define <vscale x 2 x i64> @uxtb_z_64(<vscale x 2 x i64> %0) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxtb_z_64(
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 2 x i64> [[TMP0]], splat (i64 255)
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> splat (i64 255))
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
;
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
@@ -16,7 +16,7 @@ define <vscale x 2 x i64> @uxtb_z_64(<vscale x 2 x i64> %0) #0 {
define <vscale x 2 x i64> @uxtb_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxtb_m_64(
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 2 x i64> [[TMP0]], splat (i64 255)
+; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> splat (i64 255))
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
;
%3 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
@@ -26,7 +26,8 @@ define <vscale x 2 x i64> @uxtb_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %
define <vscale x 2 x i64> @uxtb_x_64(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxtb_x_64(
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP4:%.*]] = and <vscale x 2 x i64> [[TMP1]], splat (i64 255)
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> [[TMP3]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> splat (i64 255))
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
;
%3 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
@@ -61,7 +62,7 @@ define <vscale x 2 x i64> @uxtb_m_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2
define <vscale x 4 x i32> @uxtb_z_32(<vscale x 4 x i32> %0) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @uxtb_z_32(
; CHECK-SAME: <vscale x 4 x i32> [[TMP0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 4 x i32> [[TMP0]], splat (i32 255)
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> splat (i32 255))
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
;
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %0)
@@ -71,7 +72,7 @@ define <vscale x 4 x i32> @uxtb_z_32(<vscale x 4 x i32> %0) #0 {
define <vscale x 4 x i32> @uxtb_m_32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @uxtb_m_32(
; CHECK-SAME: <vscale x 4 x i32> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 4 x i32> [[TMP0]], splat (i32 255)
+; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> splat (i32 255))
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
;
%3 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %0)
@@ -81,7 +82,8 @@ define <vscale x 4 x i32> @uxtb_m_32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %
define <vscale x 4 x i32> @uxtb_x_32(<vscale x 16 x i1> %0, <vscale x 4 x i32> %1) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @uxtb_x_32(
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP4:%.*]] = and <vscale x 4 x i32> [[TMP1]], splat (i32 255)
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> [[TMP3]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> splat (i32 255))
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
;
%3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %0)
@@ -116,7 +118,7 @@ define <vscale x 4 x i32> @uxtb_m_32_no_ptrue(<vscale x 16 x i1> %0, <vscale x 4
define <vscale x 8 x i16> @uxtb_z_16(<vscale x 8 x i16> %0) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @uxtb_z_16(
; CHECK-SAME: <vscale x 8 x i16> [[TMP0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 8 x i16> [[TMP0]], splat (i16 255)
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> splat (i16 255))
; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
;
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> %0)
@@ -126,7 +128,7 @@ define <vscale x 8 x i16> @uxtb_z_16(<vscale x 8 x i16> %0) #0 {
define <vscale x 8 x i16> @uxtb_m_16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @uxtb_m_16(
; CHECK-SAME: <vscale x 8 x i16> [[TMP0:%.*]], <vscale x 8 x i16> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 8 x i16> [[TMP0]], splat (i16 255)
+; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> splat (i16 255))
; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP3]]
;
%3 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> %1, <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> %0)
@@ -136,7 +138,8 @@ define <vscale x 8 x i16> @uxtb_m_16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %
define <vscale x 8 x i16> @uxtb_x_16(<vscale x 16 x i1> %0, <vscale x 8 x i16> %1) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @uxtb_x_16(
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 8 x i16> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP4:%.*]] = and <vscale x 8 x i16> [[TMP1]], splat (i16 255)
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1> [[TMP3]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> splat (i16 255))
; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
;
%3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %0)
@@ -171,7 +174,7 @@ define <vscale x 8 x i16> @uxtb_m_16_no_ptrue(<vscale x 16 x i1> %0, <vscale x 8
define <vscale x 2 x i64> @uxth_z_64(<vscale x 2 x i64> %0) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxth_z_64(
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 2 x i64> [[TMP0]], splat (i64 65535)
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> splat (i64 65535))
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
;
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
@@ -181,7 +184,7 @@ define <vscale x 2 x i64> @uxth_z_64(<vscale x 2 x i64> %0) #0 {
define <vscale x 2 x i64> @uxth_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxth_m_64(
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 2 x i64> [[TMP0]], splat (i64 65535)
+; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> splat (i64 65535))
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
;
%3 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
@@ -191,7 +194,8 @@ define <vscale x 2 x i64> @uxth_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %
define <vscale x 2 x i64> @uxth_x_64(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxth_x_64(
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP4:%.*]] = and <vscale x 2 x i64> [[TMP1]], splat (i64 65535)
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> [[TMP3]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> splat (i64 65535))
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
;
%3 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
@@ -226,7 +230,7 @@ define <vscale x 2 x i64> @uxth_m_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2
define <vscale x 4 x i32> @uxth_z_32(<vscale x 4 x i32> %0) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @uxth_z_32(
; CHECK-SAME: <vscale x 4 x i32> [[TMP0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 4 x i32> [[TMP0]], splat (i32 65535)
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> splat (i32 65535))
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
;
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %0)
@@ -236,7 +240,7 @@ define <vscale x 4 x i32> @uxth_z_32(<vscale x 4 x i32> %0) #0 {
define <vscale x 4 x i32> @uxth_m_32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @uxth_m_32(
; CHECK-SAME: <vscale x 4 x i32> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 4 x i32> [[TMP0]], splat (i32 65535)
+; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> splat (i32 65535))
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
;
%3 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %0)
@@ -246,7 +250,8 @@ define <vscale x 4 x i32> @uxth_m_32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %
define <vscale x 4 x i32> @uxth_x_32(<vscale x 16 x i1> %0, <vscale x 4 x i32> %1) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @uxth_x_32(
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP4:%.*]] = and <vscale x 4 x i32> [[TMP1]], splat (i32 65535)
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> [[TMP3]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> splat (i32 65535))
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
;
%3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %0)
@@ -281,7 +286,7 @@ define <vscale x 4 x i32> @uxth_m_32_no_ptrue(<vscale x 16 x i1> %0, <vscale x 4
define <vscale x 2 x i64> @uxtw_z_64(<vscale x 2 x i64> %0) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxtw_z_64(
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 2 x i64> [[TMP0]], splat (i64 4294967295)
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> splat (i64 4294967295))
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
;
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
@@ -291,7 +296,7 @@ define <vscale x 2 x i64> @uxtw_z_64(<vscale x 2 x i64> %0) #0 {
define <vscale x 2 x i64> @uxtw_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxtw_m_64(
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 2 x i64> [[TMP0]], splat (i64 4294967295)
+; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> splat (i64 4294967295))
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
;
%3 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
@@ -301,7 +306,8 @@ define <vscale x 2 x i64> @uxtw_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %
define <vscale x 2 x i64> @uxtw_x_64(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxtw_x_64(
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP4:%.*]] = and <vscale x 2 x i64> [[TMP1]], splat (i64 4294967295)
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> [[TMP3]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> splat (i64 4294967295))
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
;
%3 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
>From f0b3424cddd23224165e556bf7aced828172a180 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Thu, 1 May 2025 06:08:26 -0700
Subject: [PATCH 4/6] Simplify splat creation.
---
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index cd75811a1bad5..bab47a14d380c 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2651,9 +2651,7 @@ static std::optional<Instruction *> instCombineSVEUxt(InstCombiner &IC,
if (isa<UndefValue>(Passthru) || isAllActivePredicate(Pg)) {
auto *Ty = cast<VectorType>(II.getType());
auto MaskValue = APInt::getLowBitsSet(Ty->getScalarSizeInBits(), NumBits);
- auto *Mask = ConstantVector::getSplat(
- Ty->getElementCount(),
- ConstantInt::get(Ty->getElementType(), MaskValue));
+ auto *Mask = ConstantInt::get(Ty, MaskValue);
auto *And = IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_and_u, {Ty},
{Pg, Op, Mask});
return IC.replaceInstUsesWith(II, And);
>From 34f4dc062f0ec0027ad6ec232684b1111d1971f5 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Thu, 1 May 2025 06:14:17 -0700
Subject: [PATCH 5/6] Remove zeroing tests.
---
.../InstCombine/AArch64/sve-intrinsic-uxt.ll | 132 ------------------
1 file changed, 132 deletions(-)
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll
index 755e11e231382..4b4810f94811c 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll
@@ -3,16 +3,6 @@
target triple = "aarch64-unknown-linux-gnu"
-define <vscale x 2 x i64> @uxtb_z_64(<vscale x 2 x i64> %0) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @uxtb_z_64(
-; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> splat (i64 255))
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
-;
- %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
- ret <vscale x 2 x i64> %2
-}
-
define <vscale x 2 x i64> @uxtb_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxtb_m_64(
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
@@ -35,18 +25,6 @@ define <vscale x 2 x i64> @uxtb_x_64(<vscale x 16 x i1> %0, <vscale x 2 x i64> %
ret <vscale x 2 x i64> %4
}
-define <vscale x 2 x i64> @uxtb_z_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @uxtb_z_64_no_ptrue(
-; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP3]], <vscale x 2 x i64> [[TMP1]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
-;
- %3 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
- %4 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %3, <vscale x 2 x i64> %1)
- ret <vscale x 2 x i64> %4
-}
-
define <vscale x 2 x i64> @uxtb_m_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxtb_m_64_no_ptrue(
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]], <vscale x 2 x i64> [[TMP2:%.*]]) #[[ATTR0]] {
@@ -59,16 +37,6 @@ define <vscale x 2 x i64> @uxtb_m_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2
ret <vscale x 2 x i64> %5
}
-define <vscale x 4 x i32> @uxtb_z_32(<vscale x 4 x i32> %0) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @uxtb_z_32(
-; CHECK-SAME: <vscale x 4 x i32> [[TMP0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> splat (i32 255))
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
-;
- %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %0)
- ret <vscale x 4 x i32> %2
-}
-
define <vscale x 4 x i32> @uxtb_m_32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @uxtb_m_32(
; CHECK-SAME: <vscale x 4 x i32> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
@@ -91,18 +59,6 @@ define <vscale x 4 x i32> @uxtb_x_32(<vscale x 16 x i1> %0, <vscale x 4 x i32> %
ret <vscale x 4 x i32> %4
}
-define <vscale x 4 x i32> @uxtb_z_32_no_ptrue(<vscale x 16 x i1> %0, <vscale x 4 x i32> %1) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @uxtb_z_32_no_ptrue(
-; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP3]], <vscale x 4 x i32> [[TMP1]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
-;
- %3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %0)
- %4 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %3, <vscale x 4 x i32> %1)
- ret <vscale x 4 x i32> %4
-}
-
define <vscale x 4 x i32> @uxtb_m_32_no_ptrue(<vscale x 16 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @uxtb_m_32_no_ptrue(
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]], <vscale x 4 x i32> [[TMP2:%.*]]) #[[ATTR0]] {
@@ -115,16 +71,6 @@ define <vscale x 4 x i32> @uxtb_m_32_no_ptrue(<vscale x 16 x i1> %0, <vscale x 4
ret <vscale x 4 x i32> %5
}
-define <vscale x 8 x i16> @uxtb_z_16(<vscale x 8 x i16> %0) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @uxtb_z_16(
-; CHECK-SAME: <vscale x 8 x i16> [[TMP0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> splat (i16 255))
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
-;
- %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> %0)
- ret <vscale x 8 x i16> %2
-}
-
define <vscale x 8 x i16> @uxtb_m_16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @uxtb_m_16(
; CHECK-SAME: <vscale x 8 x i16> [[TMP0:%.*]], <vscale x 8 x i16> [[TMP1:%.*]]) #[[ATTR0]] {
@@ -147,18 +93,6 @@ define <vscale x 8 x i16> @uxtb_x_16(<vscale x 16 x i1> %0, <vscale x 8 x i16> %
ret <vscale x 8 x i16> %4
}
-define <vscale x 8 x i16> @uxtb_z_16_no_ptrue(<vscale x 16 x i1> %0, <vscale x 8 x i16> %1) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @uxtb_z_16_no_ptrue(
-; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 8 x i16> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP3]], <vscale x 8 x i16> [[TMP1]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
-;
- %3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %0)
- %4 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %3, <vscale x 8 x i16> %1)
- ret <vscale x 8 x i16> %4
-}
-
define <vscale x 8 x i16> @uxtb_m_16_no_ptrue(<vscale x 16 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @uxtb_m_16_no_ptrue(
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 8 x i16> [[TMP1:%.*]], <vscale x 8 x i16> [[TMP2:%.*]]) #[[ATTR0]] {
@@ -171,16 +105,6 @@ define <vscale x 8 x i16> @uxtb_m_16_no_ptrue(<vscale x 16 x i1> %0, <vscale x 8
ret <vscale x 8 x i16> %5
}
-define <vscale x 2 x i64> @uxth_z_64(<vscale x 2 x i64> %0) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @uxth_z_64(
-; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> splat (i64 65535))
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
-;
- %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
- ret <vscale x 2 x i64> %2
-}
-
define <vscale x 2 x i64> @uxth_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxth_m_64(
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
@@ -203,18 +127,6 @@ define <vscale x 2 x i64> @uxth_x_64(<vscale x 16 x i1> %0, <vscale x 2 x i64> %
ret <vscale x 2 x i64> %4
}
-define <vscale x 2 x i64> @uxth_z_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @uxth_z_64_no_ptrue(
-; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP3]], <vscale x 2 x i64> [[TMP1]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
-;
- %3 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
- %4 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %3, <vscale x 2 x i64> %1)
- ret <vscale x 2 x i64> %4
-}
-
define <vscale x 2 x i64> @uxth_m_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxth_m_64_no_ptrue(
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]], <vscale x 2 x i64> [[TMP2:%.*]]) #[[ATTR0]] {
@@ -227,16 +139,6 @@ define <vscale x 2 x i64> @uxth_m_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2
ret <vscale x 2 x i64> %5
}
-define <vscale x 4 x i32> @uxth_z_32(<vscale x 4 x i32> %0) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @uxth_z_32(
-; CHECK-SAME: <vscale x 4 x i32> [[TMP0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> splat (i32 65535))
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
-;
- %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %0)
- ret <vscale x 4 x i32> %2
-}
-
define <vscale x 4 x i32> @uxth_m_32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @uxth_m_32(
; CHECK-SAME: <vscale x 4 x i32> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
@@ -259,18 +161,6 @@ define <vscale x 4 x i32> @uxth_x_32(<vscale x 16 x i1> %0, <vscale x 4 x i32> %
ret <vscale x 4 x i32> %4
}
-define <vscale x 4 x i32> @uxth_z_32_no_ptrue(<vscale x 16 x i1> %0, <vscale x 4 x i32> %1) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @uxth_z_32_no_ptrue(
-; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP3]], <vscale x 4 x i32> [[TMP1]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
-;
- %3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %0)
- %4 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %3, <vscale x 4 x i32> %1)
- ret <vscale x 4 x i32> %4
-}
-
define <vscale x 4 x i32> @uxth_m_32_no_ptrue(<vscale x 16 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @uxth_m_32_no_ptrue(
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]], <vscale x 4 x i32> [[TMP2:%.*]]) #[[ATTR0]] {
@@ -283,16 +173,6 @@ define <vscale x 4 x i32> @uxth_m_32_no_ptrue(<vscale x 16 x i1> %0, <vscale x 4
ret <vscale x 4 x i32> %5
}
-define <vscale x 2 x i64> @uxtw_z_64(<vscale x 2 x i64> %0) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @uxtw_z_64(
-; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> splat (i64 4294967295))
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
-;
- %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
- ret <vscale x 2 x i64> %2
-}
-
define <vscale x 2 x i64> @uxtw_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxtw_m_64(
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
@@ -315,18 +195,6 @@ define <vscale x 2 x i64> @uxtw_x_64(<vscale x 16 x i1> %0, <vscale x 2 x i64> %
ret <vscale x 2 x i64> %4
}
-define <vscale x 2 x i64> @uxtw_z_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @uxtw_z_64_no_ptrue(
-; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP3]], <vscale x 2 x i64> [[TMP1]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
-;
- %3 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
- %4 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %3, <vscale x 2 x i64> %1)
- ret <vscale x 2 x i64> %4
-}
-
define <vscale x 2 x i64> @uxtw_m_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxtw_m_64_no_ptrue(
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]], <vscale x 2 x i64> [[TMP2:%.*]]) #[[ATTR0]] {
>From dcf54e2215cde8037474f7dc71a8aec766de8760 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Thu, 1 May 2025 06:26:41 -0700
Subject: [PATCH 6/6] Remove redundant tests.
---
.../InstCombine/AArch64/sve-intrinsic-uxt.ll | 129 ++----------------
1 file changed, 9 insertions(+), 120 deletions(-)
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll
index 4b4810f94811c..5d2f68650c3b5 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll
@@ -3,6 +3,8 @@
target triple = "aarch64-unknown-linux-gnu"
+; Test that we combine uxtb to and_u for all-active predicates.
+
define <vscale x 2 x i64> @uxtb_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxtb_m_64(
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
@@ -13,6 +15,8 @@ define <vscale x 2 x i64> @uxtb_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %
ret <vscale x 2 x i64> %3
}
+; Test that we combine uxtb to and_u for undef (``unknown'') passthrough.
+
define <vscale x 2 x i64> @uxtb_x_64(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxtb_x_64(
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
@@ -25,6 +29,8 @@ define <vscale x 2 x i64> @uxtb_x_64(<vscale x 16 x i1> %0, <vscale x 2 x i64> %
ret <vscale x 2 x i64> %4
}
+; Negative test - ensure we don't combine non-undef, no-all-active predicates.
+
define <vscale x 2 x i64> @uxtb_m_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxtb_m_64_no_ptrue(
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]], <vscale x 2 x i64> [[TMP2:%.*]]) #[[ATTR0]] {
@@ -37,6 +43,9 @@ define <vscale x 2 x i64> @uxtb_m_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2
ret <vscale x 2 x i64> %5
}
+; For the remaining uxt* intrinsics and types, test that we combine them to the
+; appropriate and_u variant with a suitable mask.
+
define <vscale x 4 x i32> @uxtb_m_32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @uxtb_m_32(
; CHECK-SAME: <vscale x 4 x i32> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
@@ -47,30 +56,6 @@ define <vscale x 4 x i32> @uxtb_m_32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %
ret <vscale x 4 x i32> %3
}
-define <vscale x 4 x i32> @uxtb_x_32(<vscale x 16 x i1> %0, <vscale x 4 x i32> %1) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @uxtb_x_32(
-; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> [[TMP3]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> splat (i32 255))
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
-;
- %3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %0)
- %4 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %3, <vscale x 4 x i32> %1)
- ret <vscale x 4 x i32> %4
-}
-
-define <vscale x 4 x i32> @uxtb_m_32_no_ptrue(<vscale x 16 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @uxtb_m_32_no_ptrue(
-; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]], <vscale x 4 x i32> [[TMP2:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> [[TMP2]], <vscale x 4 x i1> [[TMP4]], <vscale x 4 x i32> [[TMP1]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
-;
- %4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %0)
- %5 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> %2, <vscale x 4 x i1> %4, <vscale x 4 x i32> %1)
- ret <vscale x 4 x i32> %5
-}
-
define <vscale x 8 x i16> @uxtb_m_16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @uxtb_m_16(
; CHECK-SAME: <vscale x 8 x i16> [[TMP0:%.*]], <vscale x 8 x i16> [[TMP1:%.*]]) #[[ATTR0]] {
@@ -81,30 +66,6 @@ define <vscale x 8 x i16> @uxtb_m_16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %
ret <vscale x 8 x i16> %3
}
-define <vscale x 8 x i16> @uxtb_x_16(<vscale x 16 x i1> %0, <vscale x 8 x i16> %1) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @uxtb_x_16(
-; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 8 x i16> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1> [[TMP3]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> splat (i16 255))
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
-;
- %3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %0)
- %4 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %3, <vscale x 8 x i16> %1)
- ret <vscale x 8 x i16> %4
-}
-
-define <vscale x 8 x i16> @uxtb_m_16_no_ptrue(<vscale x 16 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @uxtb_m_16_no_ptrue(
-; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 8 x i16> [[TMP1:%.*]], <vscale x 8 x i16> [[TMP2:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> [[TMP2]], <vscale x 8 x i1> [[TMP4]], <vscale x 8 x i16> [[TMP1]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP5]]
-;
- %4 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %0)
- %5 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> %2, <vscale x 8 x i1> %4, <vscale x 8 x i16> %1)
- ret <vscale x 8 x i16> %5
-}
-
define <vscale x 2 x i64> @uxth_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxth_m_64(
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
@@ -115,30 +76,6 @@ define <vscale x 2 x i64> @uxth_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %
ret <vscale x 2 x i64> %3
}
-define <vscale x 2 x i64> @uxth_x_64(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @uxth_x_64(
-; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> [[TMP3]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> splat (i64 65535))
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
-;
- %3 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
- %4 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %3, <vscale x 2 x i64> %1)
- ret <vscale x 2 x i64> %4
-}
-
-define <vscale x 2 x i64> @uxth_m_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @uxth_m_64_no_ptrue(
-; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]], <vscale x 2 x i64> [[TMP2:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> [[TMP2]], <vscale x 2 x i1> [[TMP4]], <vscale x 2 x i64> [[TMP1]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
-;
- %4 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
- %5 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> %2, <vscale x 2 x i1> %4, <vscale x 2 x i64> %1)
- ret <vscale x 2 x i64> %5
-}
-
define <vscale x 4 x i32> @uxth_m_32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @uxth_m_32(
; CHECK-SAME: <vscale x 4 x i32> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
@@ -149,30 +86,6 @@ define <vscale x 4 x i32> @uxth_m_32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %
ret <vscale x 4 x i32> %3
}
-define <vscale x 4 x i32> @uxth_x_32(<vscale x 16 x i1> %0, <vscale x 4 x i32> %1) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @uxth_x_32(
-; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> [[TMP3]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> splat (i32 65535))
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
-;
- %3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %0)
- %4 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %3, <vscale x 4 x i32> %1)
- ret <vscale x 4 x i32> %4
-}
-
-define <vscale x 4 x i32> @uxth_m_32_no_ptrue(<vscale x 16 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @uxth_m_32_no_ptrue(
-; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]], <vscale x 4 x i32> [[TMP2:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> [[TMP2]], <vscale x 4 x i1> [[TMP4]], <vscale x 4 x i32> [[TMP1]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
-;
- %4 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %0)
- %5 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> %2, <vscale x 4 x i1> %4, <vscale x 4 x i32> %1)
- ret <vscale x 4 x i32> %5
-}
-
define <vscale x 2 x i64> @uxtw_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @uxtw_m_64(
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
@@ -183,28 +96,4 @@ define <vscale x 2 x i64> @uxtw_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %
ret <vscale x 2 x i64> %3
}
-define <vscale x 2 x i64> @uxtw_x_64(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @uxtw_x_64(
-; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> [[TMP3]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> splat (i64 4294967295))
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
-;
- %3 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
- %4 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %3, <vscale x 2 x i64> %1)
- ret <vscale x 2 x i64> %4
-}
-
-define <vscale x 2 x i64> @uxtw_m_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @uxtw_m_64_no_ptrue(
-; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]], <vscale x 2 x i64> [[TMP2:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> [[TMP2]], <vscale x 2 x i1> [[TMP4]], <vscale x 2 x i64> [[TMP1]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
-;
- %4 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
- %5 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> %2, <vscale x 2 x i1> %4, <vscale x 2 x i64> %1)
- ret <vscale x 2 x i64> %5
-}
-
attributes #0 = { "target-features"="+sve" }
More information about the llvm-commits
mailing list