[llvm] fbc329e - [AArch64] Add S/UQXTRN tablegen patterns.

Fri Jul 2 23:57:31 PDT 2021

Author: David Green
Date: 2021-07-03T07:57:19+01:00
New Revision: fbc329efbdba3edad78a1651148b340bcc7bf032

URL: https://github.com/llvm/llvm-project/commit/fbc329efbdba3edad78a1651148b340bcc7bf032
DIFF: https://github.com/llvm/llvm-project/commit/fbc329efbdba3edad78a1651148b340bcc7bf032.diff

LOG: [AArch64] Add S/UQXTRN tablegen patterns.

This adds simple patterns for signed and unsigned saturating extract
narrow instructions. They combine a min/max/truncate into a single
instruction, providing that the immediates on the min/max are correct
for the saturation type. This is just handled in tablegen with some
extra patterns.

v2i64->v2i32 is not handled here as the min/max nodes are not legal,
making the lowering quite different.

Differential Revision: https://reviews.llvm.org/D103263

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/test/CodeGen/AArch64/qmovn.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 7802144fb2c98..b921a6c4b884a 100644

--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4224,6 +4224,37 @@ defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>;
 defm : SIMDVectorLShiftLongBySizeBHSPats<zext>;
 defm : SIMDVectorLShiftLongBySizeBHSPats<sext>;
 
+// Constant vector values, used in the S/UQXTN patterns below.
+def VImmFF:   PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 85))))>;
+def VImmFFFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 51))))>;
+def VImm7F:   PatLeaf<(AArch64movi_shift (i32 127), (i32 0))>;
+def VImm80:   PatLeaf<(AArch64mvni_shift (i32 127), (i32 0))>;
+def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>;
+def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>;
+
+// trunc(umin(X, 255)) -> UQXTRN v8i8
+def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))),
+          (UQXTNv8i8 V128:$Vn)>;
+// trunc(umin(X, 65535)) -> UQXTRN v4i16
+def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))),
+          (UQXTNv4i16 V128:$Vn)>;
+// trunc(smin(smax(X, -128), 128)) -> SQXTRN
+//  with reversed min/max
+def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
+                             (v8i16 VImm7F)))),
+          (SQXTNv8i8 V128:$Vn)>;
+def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
+                             (v8i16 VImm80)))),
+          (SQXTNv8i8 V128:$Vn)>;
+// trunc(smin(smax(X, -32768), 32767)) -> SQXTRN
+//  with reversed min/max
+def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
+                              (v4i32 VImm7FFF)))),
+          (SQXTNv4i16 V128:$Vn)>;
+def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
+                              (v4i32 VImm8000)))),
+          (SQXTNv4i16 V128:$Vn)>;
+
 //===----------------------------------------------------------------------===//
 // Advanced SIMD three vector instructions.
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/test/CodeGen/AArch64/qmovn.ll b/llvm/test/CodeGen/AArch64/qmovn.ll
index 515f4d5bd114c..400cb0912ffb8 100644
--- a/llvm/test/CodeGen/AArch64/qmovn.ll
+++ b/llvm/test/CodeGen/AArch64/qmovn.ll
@@ -4,11 +4,7 @@
 define <4 x i16> @vqmovni32_smaxmin(<4 x i32> %s0) {
 ; CHECK-LABEL: vqmovni32_smaxmin:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v1.4s, #127, msl #8
-; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    mvni v1.4s, #127, msl #8
-; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    sqxtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
 entry:
   %c1 = icmp slt <4 x i32> %s0, <i32 32767, i32 32767, i32 32767, i32 32767>
@@ -22,11 +18,7 @@ entry:
 define <4 x i16> @vqmovni32_sminmax(<4 x i32> %s0) {
 ; CHECK-LABEL: vqmovni32_sminmax:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mvni v1.4s, #127, msl #8
-; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    movi v1.4s, #127, msl #8
-; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    sqxtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
 entry:
   %c1 = icmp sgt <4 x i32> %s0, <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
@@ -40,9 +32,7 @@ entry:
 define <4 x i16> @vqmovni32_umaxmin(<4 x i32> %s0) {
 ; CHECK-LABEL: vqmovni32_umaxmin:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v1.2d, #0x00ffff0000ffff
-; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    uqxtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
 entry:
   %c1 = icmp ult <4 x i32> %s0, <i32 65535, i32 65535, i32 65535, i32 65535>
@@ -54,11 +44,7 @@ entry:
 define <8 x i8> @vqmovni16_smaxmin(<8 x i16> %s0) {
 ; CHECK-LABEL: vqmovni16_smaxmin:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v1.8h, #127
-; CHECK-NEXT:    smin v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    mvni v1.8h, #127
-; CHECK-NEXT:    smax v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    xtn v0.8b, v0.8h
+; CHECK-NEXT:    sqxtn v0.8b, v0.8h
 ; CHECK-NEXT:    ret
 entry:
   %c1 = icmp slt <8 x i16> %s0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
@@ -72,11 +58,7 @@ entry:
 define <8 x i8> @vqmovni16_sminmax(<8 x i16> %s0) {
 ; CHECK-LABEL: vqmovni16_sminmax:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mvni v1.8h, #127
-; CHECK-NEXT:    smax v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    movi v1.8h, #127
-; CHECK-NEXT:    smin v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    xtn v0.8b, v0.8h
+; CHECK-NEXT:    sqxtn v0.8b, v0.8h
 ; CHECK-NEXT:    ret
 entry:
   %c1 = icmp sgt <8 x i16> %s0, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
@@ -90,9 +72,7 @@ entry:
 define <8 x i8> @vqmovni16_umaxmin(<8 x i16> %s0) {
 ; CHECK-LABEL: vqmovni16_umaxmin:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v1.2d, #0xff00ff00ff00ff
-; CHECK-NEXT:    umin v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    xtn v0.8b, v0.8h
+; CHECK-NEXT:    uqxtn v0.8b, v0.8h
 ; CHECK-NEXT:    ret
 entry:
   %c1 = icmp ult <8 x i16> %s0, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>