[llvm] fbc329e - [AArch64] Add S/UQXTRN tablegen patterns.
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 2 23:57:31 PDT 2021
Author: David Green
Date: 2021-07-03T07:57:19+01:00
New Revision: fbc329efbdba3edad78a1651148b340bcc7bf032
URL: https://github.com/llvm/llvm-project/commit/fbc329efbdba3edad78a1651148b340bcc7bf032
DIFF: https://github.com/llvm/llvm-project/commit/fbc329efbdba3edad78a1651148b340bcc7bf032.diff
LOG: [AArch64] Add S/UQXTRN tablegen patterns.
This adds simple patterns for signed and unsigned saturating extract
narrow instructions. They combine a min/max/truncate into a single
instruction, providing that the immediates on the min/max are correct
for the saturation type. This is just handled in tablegen with some
extra patterns.
v2i64->v2i32 is not handled here as the min/max nodes are not legal,
making the lowering quite different.
Differential Revision: https://reviews.llvm.org/D103263
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/qmovn.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 7802144fb2c98..b921a6c4b884a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4224,6 +4224,37 @@ defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>;
defm : SIMDVectorLShiftLongBySizeBHSPats<zext>;
defm : SIMDVectorLShiftLongBySizeBHSPats<sext>;
+// Constant vector values, used in the S/UQXTN patterns below.
+def VImmFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 85))))>;
+def VImmFFFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 51))))>;
+def VImm7F: PatLeaf<(AArch64movi_shift (i32 127), (i32 0))>;
+def VImm80: PatLeaf<(AArch64mvni_shift (i32 127), (i32 0))>;
+def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>;
+def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>;
+
+// trunc(umin(X, 255)) -> UQXTRN v8i8
+def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))),
+ (UQXTNv8i8 V128:$Vn)>;
+// trunc(umin(X, 65535)) -> UQXTRN v4i16
+def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))),
+ (UQXTNv4i16 V128:$Vn)>;
+// trunc(smin(smax(X, -128), 128)) -> SQXTRN
+// with reversed min/max
+def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
+ (v8i16 VImm7F)))),
+ (SQXTNv8i8 V128:$Vn)>;
+def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
+ (v8i16 VImm80)))),
+ (SQXTNv8i8 V128:$Vn)>;
+// trunc(smin(smax(X, -32768), 32767)) -> SQXTRN
+// with reversed min/max
+def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
+ (v4i32 VImm7FFF)))),
+ (SQXTNv4i16 V128:$Vn)>;
+def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
+ (v4i32 VImm8000)))),
+ (SQXTNv4i16 V128:$Vn)>;
+
//===----------------------------------------------------------------------===//
// Advanced SIMD three vector instructions.
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/qmovn.ll b/llvm/test/CodeGen/AArch64/qmovn.ll
index 515f4d5bd114c..400cb0912ffb8 100644
--- a/llvm/test/CodeGen/AArch64/qmovn.ll
+++ b/llvm/test/CodeGen/AArch64/qmovn.ll
@@ -4,11 +4,7 @@
define <4 x i16> @vqmovni32_smaxmin(<4 x i32> %s0) {
; CHECK-LABEL: vqmovni32_smaxmin:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v1.4s, #127, msl #8
-; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: mvni v1.4s, #127, msl #8
-; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: sqxtn v0.4h, v0.4s
; CHECK-NEXT: ret
entry:
%c1 = icmp slt <4 x i32> %s0, <i32 32767, i32 32767, i32 32767, i32 32767>
@@ -22,11 +18,7 @@ entry:
define <4 x i16> @vqmovni32_sminmax(<4 x i32> %s0) {
; CHECK-LABEL: vqmovni32_sminmax:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mvni v1.4s, #127, msl #8
-; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: movi v1.4s, #127, msl #8
-; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: sqxtn v0.4h, v0.4s
; CHECK-NEXT: ret
entry:
%c1 = icmp sgt <4 x i32> %s0, <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
@@ -40,9 +32,7 @@ entry:
define <4 x i16> @vqmovni32_umaxmin(<4 x i32> %s0) {
; CHECK-LABEL: vqmovni32_umaxmin:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
-; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: uqxtn v0.4h, v0.4s
; CHECK-NEXT: ret
entry:
%c1 = icmp ult <4 x i32> %s0, <i32 65535, i32 65535, i32 65535, i32 65535>
@@ -54,11 +44,7 @@ entry:
define <8 x i8> @vqmovni16_smaxmin(<8 x i16> %s0) {
; CHECK-LABEL: vqmovni16_smaxmin:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v1.8h, #127
-; CHECK-NEXT: smin v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: mvni v1.8h, #127
-; CHECK-NEXT: smax v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: xtn v0.8b, v0.8h
+; CHECK-NEXT: sqxtn v0.8b, v0.8h
; CHECK-NEXT: ret
entry:
%c1 = icmp slt <8 x i16> %s0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
@@ -72,11 +58,7 @@ entry:
define <8 x i8> @vqmovni16_sminmax(<8 x i16> %s0) {
; CHECK-LABEL: vqmovni16_sminmax:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mvni v1.8h, #127
-; CHECK-NEXT: smax v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: movi v1.8h, #127
-; CHECK-NEXT: smin v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: xtn v0.8b, v0.8h
+; CHECK-NEXT: sqxtn v0.8b, v0.8h
; CHECK-NEXT: ret
entry:
%c1 = icmp sgt <8 x i16> %s0, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
@@ -90,9 +72,7 @@ entry:
define <8 x i8> @vqmovni16_umaxmin(<8 x i16> %s0) {
; CHECK-LABEL: vqmovni16_umaxmin:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v1.2d, #0xff00ff00ff00ff
-; CHECK-NEXT: umin v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: xtn v0.8b, v0.8h
+; CHECK-NEXT: uqxtn v0.8b, v0.8h
; CHECK-NEXT: ret
entry:
%c1 = icmp ult <8 x i16> %s0, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
More information about the llvm-commits
mailing list