[llvm] cbebace - [AArch64] Add UQXTN2 patterns
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 21 04:57:36 PDT 2024
Author: David Green
Date: 2024-07-21T12:57:31+01:00
New Revision: cbebacef5e12c985ca32c894bcfddec9adb2fc85
URL: https://github.com/llvm/llvm-project/commit/cbebacef5e12c985ca32c894bcfddec9adb2fc85
DIFF: https://github.com/llvm/llvm-project/commit/cbebacef5e12c985ca32c894bcfddec9adb2fc85.diff
LOG: [AArch64] Add UQXTN2 patterns
Similar to the existing UQXTN and SQXTN2 patterns, we can generate a UQXTN2
from concat(Vd, trunc(min(X, 255)))
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
llvm/test/CodeGen/AArch64/qmovn.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index fe4589f4fbdae..643bcc33f9201 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5363,6 +5363,17 @@ def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
(v4i32 VImm8000)))),
(SQXTNv4i16 V128:$Vn)>;
+// concat_vectors(Vd, trunc(umin(X, 255))) -> UQXTRN(Vd, Vn)
+def : Pat<(v16i8 (concat_vectors
+ (v8i8 V64:$Vd),
+ (v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))))),
+ (UQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
+// concat_vectors(Vd, trunc(umin(X, 65535))) -> UQXTRN(Vd, Vn)
+def : Pat<(v8i16 (concat_vectors
+ (v4i16 V64:$Vd),
+ (v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))))),
+ (UQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
+
// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn)
// with reversed min/max
def : Pat<(v16i8 (concat_vectors
diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
index 4e8bfcd9d7516..0138bef9c3845 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
@@ -283,14 +283,12 @@ entry:
define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-CVT-LABEL: utesth_f16i16:
; CHECK-CVT: // %bb.0: // %entry
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: movi v1.2d, #0x00ffff0000ffff
-; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s
-; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT: umin v2.4s, v2.4s, v1.4s
-; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s
+; CHECK-CVT-NEXT: fcvtzu v2.4s, v0.4s
+; CHECK-CVT-NEXT: uqxtn v0.4h, v1.4s
+; CHECK-CVT-NEXT: uqxtn2 v0.8h, v2.4s
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: utesth_f16i16:
@@ -308,14 +306,12 @@ entry:
define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-CVT-LABEL: ustest_f16i16:
; CHECK-CVT: // %bb.0: // %entry
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: movi v1.2d, #0x00ffff0000ffff
-; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s
-; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT: umin v2.4s, v2.4s, v1.4s
-; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s
+; CHECK-CVT-NEXT: fcvtzu v2.4s, v0.4s
+; CHECK-CVT-NEXT: uqxtn v0.4h, v1.4s
+; CHECK-CVT-NEXT: uqxtn2 v0.8h, v2.4s
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: ustest_f16i16:
@@ -909,14 +905,12 @@ entry:
define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-CVT-LABEL: utesth_f16i16_mm:
; CHECK-CVT: // %bb.0: // %entry
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: movi v1.2d, #0x00ffff0000ffff
-; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s
-; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT: umin v2.4s, v2.4s, v1.4s
-; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s
+; CHECK-CVT-NEXT: fcvtzu v2.4s, v0.4s
+; CHECK-CVT-NEXT: uqxtn v0.4h, v1.4s
+; CHECK-CVT-NEXT: uqxtn2 v0.8h, v2.4s
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: utesth_f16i16_mm:
@@ -933,14 +927,12 @@ entry:
define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-CVT-LABEL: ustest_f16i16_mm:
; CHECK-CVT: // %bb.0: // %entry
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: movi v1.2d, #0x00ffff0000ffff
-; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s
-; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT: umin v2.4s, v2.4s, v1.4s
-; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s
+; CHECK-CVT-NEXT: fcvtzu v2.4s, v0.4s
+; CHECK-CVT-NEXT: uqxtn v0.4h, v1.4s
+; CHECK-CVT-NEXT: uqxtn2 v0.8h, v2.4s
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: ustest_f16i16_mm:
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index 16e04070b6543..b03d145d1408d 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -2509,12 +2509,10 @@ define <16 x i8> @test_unsigned_v16f32_v16i8(<16 x float> %f) {
define <8 x i16> @test_unsigned_v8f32_v8i16(<8 x float> %f) {
; CHECK-LABEL: test_unsigned_v8f32_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff
-; CHECK-NEXT: fcvtzu v1.4s, v1.4s
; CHECK-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s
-; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s
-; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: fcvtzu v1.4s, v1.4s
+; CHECK-NEXT: uqxtn v0.4h, v0.4s
+; CHECK-NEXT: uqxtn2 v0.8h, v1.4s
; CHECK-NEXT: ret
%x = call <8 x i16> @llvm.fptoui.sat.v8f32.v8i16(<8 x float> %f)
ret <8 x i16> %x
@@ -2523,17 +2521,14 @@ define <8 x i16> @test_unsigned_v8f32_v8i16(<8 x float> %f) {
define <16 x i16> @test_unsigned_v16f32_v16i16(<16 x float> %f) {
; CHECK-LABEL: test_unsigned_v16f32_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v4.2d, #0x00ffff0000ffff
-; CHECK-NEXT: fcvtzu v1.4s, v1.4s
; CHECK-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-NEXT: fcvtzu v3.4s, v3.4s
; CHECK-NEXT: fcvtzu v2.4s, v2.4s
-; CHECK-NEXT: umin v1.4s, v1.4s, v4.4s
-; CHECK-NEXT: umin v0.4s, v0.4s, v4.4s
-; CHECK-NEXT: umin v3.4s, v3.4s, v4.4s
-; CHECK-NEXT: umin v2.4s, v2.4s, v4.4s
-; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: uzp1 v1.8h, v2.8h, v3.8h
+; CHECK-NEXT: fcvtzu v4.4s, v1.4s
+; CHECK-NEXT: uqxtn v0.4h, v0.4s
+; CHECK-NEXT: uqxtn v1.4h, v2.4s
+; CHECK-NEXT: fcvtzu v2.4s, v3.4s
+; CHECK-NEXT: uqxtn2 v0.8h, v4.4s
+; CHECK-NEXT: uqxtn2 v1.8h, v2.4s
; CHECK-NEXT: ret
%x = call <16 x i16> @llvm.fptoui.sat.v16f32.v16i16(<16 x float> %f)
ret <16 x i16> %x
@@ -2632,12 +2627,10 @@ define <16 x i8> @test_unsigned_v16f16_v16i8(<16 x half> %f) {
;
; CHECK-FP16-LABEL: test_unsigned_v16f16_v16i8:
; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: movi v2.2d, #0xff00ff00ff00ff
-; CHECK-FP16-NEXT: fcvtzu v1.8h, v1.8h
; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h
-; CHECK-FP16-NEXT: umin v1.8h, v1.8h, v2.8h
-; CHECK-FP16-NEXT: umin v0.8h, v0.8h, v2.8h
-; CHECK-FP16-NEXT: uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-FP16-NEXT: fcvtzu v1.8h, v1.8h
+; CHECK-FP16-NEXT: uqxtn v0.8b, v0.8h
+; CHECK-FP16-NEXT: uqxtn2 v0.16b, v1.8h
; CHECK-FP16-NEXT: ret
%x = call <16 x i8> @llvm.fptoui.sat.v16f16.v16i8(<16 x half> %f)
ret <16 x i8> %x
diff --git a/llvm/test/CodeGen/AArch64/qmovn.ll b/llvm/test/CodeGen/AArch64/qmovn.ll
index dbdf9c58f8aba..35c172adbad3d 100644
--- a/llvm/test/CodeGen/AArch64/qmovn.ll
+++ b/llvm/test/CodeGen/AArch64/qmovn.ll
@@ -250,10 +250,8 @@ entry:
define <16 x i8> @unsigned_v8i16_to_v16i8(<8 x i8> %x, <8 x i16> %y) {
; CHECK-LABEL: unsigned_v8i16_to_v16i8:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v2.2d, #0xff00ff00ff00ff
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: umin v1.8h, v1.8h, v2.8h
-; CHECK-NEXT: xtn2 v0.16b, v1.8h
+; CHECK-NEXT: uqxtn2 v0.16b, v1.8h
; CHECK-NEXT: ret
entry:
%min = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %y, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>)
@@ -265,10 +263,8 @@ entry:
define <8 x i16> @unsigned_v4i32_to_v8i16(<4 x i16> %x, <4 x i32> %y) {
; CHECK-LABEL: unsigned_v4i32_to_v8i16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s
-; CHECK-NEXT: xtn2 v0.8h, v1.4s
+; CHECK-NEXT: uqxtn2 v0.8h, v1.4s
; CHECK-NEXT: ret
entry:
%min = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %y, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
More information about the llvm-commits
mailing list