[llvm] 0b2aae4 - [AArch64] Zero extended extract_vector_elt pattern

David Green via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 15 07:56:35 PDT 2021


Author: David Green
Date: 2021-03-15T14:56:20Z
New Revision: 0b2aae42e5ea16a746d91a2945bf1e399fe485e3

URL: https://github.com/llvm/llvm-project/commit/0b2aae42e5ea16a746d91a2945bf1e399fe485e3
DIFF: https://github.com/llvm/llvm-project/commit/0b2aae42e5ea16a746d91a2945bf1e399fe485e3.diff

LOG: [AArch64] Zero extended extract_vector_elt pattern

This adds a pattern for i64 zext_inreg(i32 extract_vector_elt X),
producing a single UMOVvi16 instruction that is already expected to
clear the top bits. The exact pattern that this matches is
and(anyext(vector_extract X, lane), 0xff), similar to the sext patterns
higher up in the same file.

Differential Revision: https://reviews.llvm.org/D98599

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/test/CodeGen/AArch64/build-vector-extract.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 89c7234485f5..26a88aa597f3 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5329,6 +5329,13 @@ def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),
                (i32 0xffff)),
           (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>;
 
+def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
+            VectorIndexB:$idx)))), (i64 0xff))),
+          (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx)), sub_32)>;
+def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
+            VectorIndexH:$idx)))), (i64 0xffff))),
+          (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx)), sub_32)>;
+
 defm INS : SIMDIns;
 
 def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),

diff  --git a/llvm/test/CodeGen/AArch64/build-vector-extract.ll b/llvm/test/CodeGen/AArch64/build-vector-extract.ll
index b57148f2a927..2c263d7f5b75 100644
--- a/llvm/test/CodeGen/AArch64/build-vector-extract.ll
+++ b/llvm/test/CodeGen/AArch64/build-vector-extract.ll
@@ -208,7 +208,6 @@ define <2 x i64> @extract0_i16_zext_insert0_i64_undef(<8 x i16> %x) {
 ; CHECK-LABEL: extract0_i16_zext_insert0_i64_undef:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.h[0]
-; CHECK-NEXT:    and x8, x8, #0xffff
 ; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    ret
   %e = extractelement <8 x i16> %x, i32 0
@@ -221,7 +220,6 @@ define <2 x i64> @extract0_i16_zext_insert0_i64_zero(<8 x i16> %x) {
 ; CHECK-LABEL: extract0_i16_zext_insert0_i64_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.h[0]
-; CHECK-NEXT:    and x8, x8, #0xffff
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov v0.d[0], x8
 ; CHECK-NEXT:    ret
@@ -235,7 +233,6 @@ define <2 x i64> @extract1_i16_zext_insert0_i64_undef(<8 x i16> %x) {
 ; CHECK-LABEL: extract1_i16_zext_insert0_i64_undef:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.h[1]
-; CHECK-NEXT:    and x8, x8, #0xffff
 ; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    ret
   %e = extractelement <8 x i16> %x, i32 1
@@ -248,7 +245,6 @@ define <2 x i64> @extract1_i16_zext_insert0_i64_zero(<8 x i16> %x) {
 ; CHECK-LABEL: extract1_i16_zext_insert0_i64_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.h[1]
-; CHECK-NEXT:    and x8, x8, #0xffff
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov v0.d[0], x8
 ; CHECK-NEXT:    ret
@@ -262,7 +258,6 @@ define <2 x i64> @extract2_i16_zext_insert0_i64_undef(<8 x i16> %x) {
 ; CHECK-LABEL: extract2_i16_zext_insert0_i64_undef:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.h[2]
-; CHECK-NEXT:    and x8, x8, #0xffff
 ; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    ret
   %e = extractelement <8 x i16> %x, i32 2
@@ -275,7 +270,6 @@ define <2 x i64> @extract2_i16_zext_insert0_i64_zero(<8 x i16> %x) {
 ; CHECK-LABEL: extract2_i16_zext_insert0_i64_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.h[2]
-; CHECK-NEXT:    and x8, x8, #0xffff
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov v0.d[0], x8
 ; CHECK-NEXT:    ret
@@ -289,7 +283,6 @@ define <2 x i64> @extract3_i16_zext_insert0_i64_undef(<8 x i16> %x) {
 ; CHECK-LABEL: extract3_i16_zext_insert0_i64_undef:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.h[3]
-; CHECK-NEXT:    and x8, x8, #0xffff
 ; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    ret
   %e = extractelement <8 x i16> %x, i32 3
@@ -302,7 +295,6 @@ define <2 x i64> @extract3_i16_zext_insert0_i64_zero(<8 x i16> %x) {
 ; CHECK-LABEL: extract3_i16_zext_insert0_i64_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.h[3]
-; CHECK-NEXT:    and x8, x8, #0xffff
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov v0.d[0], x8
 ; CHECK-NEXT:    ret
@@ -316,7 +308,6 @@ define <2 x i64> @extract0_i16_zext_insert1_i64_undef(<8 x i16> %x) {
 ; CHECK-LABEL: extract0_i16_zext_insert1_i64_undef:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.h[0]
-; CHECK-NEXT:    and x8, x8, #0xffff
 ; CHECK-NEXT:    dup v0.2d, x8
 ; CHECK-NEXT:    ret
   %e = extractelement <8 x i16> %x, i32 0
@@ -329,7 +320,6 @@ define <2 x i64> @extract0_i16_zext_insert1_i64_zero(<8 x i16> %x) {
 ; CHECK-LABEL: extract0_i16_zext_insert1_i64_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.h[0]
-; CHECK-NEXT:    and x8, x8, #0xffff
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov v0.d[1], x8
 ; CHECK-NEXT:    ret
@@ -343,7 +333,6 @@ define <2 x i64> @extract1_i16_zext_insert1_i64_undef(<8 x i16> %x) {
 ; CHECK-LABEL: extract1_i16_zext_insert1_i64_undef:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.h[1]
-; CHECK-NEXT:    and x8, x8, #0xffff
 ; CHECK-NEXT:    dup v0.2d, x8
 ; CHECK-NEXT:    ret
   %e = extractelement <8 x i16> %x, i32 1
@@ -356,7 +345,6 @@ define <2 x i64> @extract1_i16_zext_insert1_i64_zero(<8 x i16> %x) {
 ; CHECK-LABEL: extract1_i16_zext_insert1_i64_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.h[1]
-; CHECK-NEXT:    and x8, x8, #0xffff
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov v0.d[1], x8
 ; CHECK-NEXT:    ret
@@ -370,7 +358,6 @@ define <2 x i64> @extract2_i16_zext_insert1_i64_undef(<8 x i16> %x) {
 ; CHECK-LABEL: extract2_i16_zext_insert1_i64_undef:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.h[2]
-; CHECK-NEXT:    and x8, x8, #0xffff
 ; CHECK-NEXT:    dup v0.2d, x8
 ; CHECK-NEXT:    ret
   %e = extractelement <8 x i16> %x, i32 2
@@ -383,7 +370,6 @@ define <2 x i64> @extract2_i16_zext_insert1_i64_zero(<8 x i16> %x) {
 ; CHECK-LABEL: extract2_i16_zext_insert1_i64_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.h[2]
-; CHECK-NEXT:    and x8, x8, #0xffff
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov v0.d[1], x8
 ; CHECK-NEXT:    ret
@@ -397,7 +383,6 @@ define <2 x i64> @extract3_i16_zext_insert1_i64_undef(<8 x i16> %x) {
 ; CHECK-LABEL: extract3_i16_zext_insert1_i64_undef:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.h[3]
-; CHECK-NEXT:    and x8, x8, #0xffff
 ; CHECK-NEXT:    dup v0.2d, x8
 ; CHECK-NEXT:    ret
   %e = extractelement <8 x i16> %x, i32 3
@@ -410,7 +395,6 @@ define <2 x i64> @extract3_i16_zext_insert1_i64_zero(<8 x i16> %x) {
 ; CHECK-LABEL: extract3_i16_zext_insert1_i64_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.h[3]
-; CHECK-NEXT:    and x8, x8, #0xffff
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov v0.d[1], x8
 ; CHECK-NEXT:    ret
@@ -426,7 +410,6 @@ define <2 x i64> @extract0_i8_zext_insert0_i64_undef(<16 x i8> %x) {
 ; CHECK-LABEL: extract0_i8_zext_insert0_i64_undef:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.b[0]
-; CHECK-NEXT:    and x8, x8, #0xff
 ; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    ret
   %e = extractelement <16 x i8> %x, i32 0
@@ -439,7 +422,6 @@ define <2 x i64> @extract0_i8_zext_insert0_i64_zero(<16 x i8> %x) {
 ; CHECK-LABEL: extract0_i8_zext_insert0_i64_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.b[0]
-; CHECK-NEXT:    and x8, x8, #0xff
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov v0.d[0], x8
 ; CHECK-NEXT:    ret
@@ -453,7 +435,6 @@ define <2 x i64> @extract1_i8_zext_insert0_i64_undef(<16 x i8> %x) {
 ; CHECK-LABEL: extract1_i8_zext_insert0_i64_undef:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.b[1]
-; CHECK-NEXT:    and x8, x8, #0xff
 ; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    ret
   %e = extractelement <16 x i8> %x, i32 1
@@ -466,7 +447,6 @@ define <2 x i64> @extract1_i8_zext_insert0_i64_zero(<16 x i8> %x) {
 ; CHECK-LABEL: extract1_i8_zext_insert0_i64_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.b[1]
-; CHECK-NEXT:    and x8, x8, #0xff
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov v0.d[0], x8
 ; CHECK-NEXT:    ret
@@ -480,7 +460,6 @@ define <2 x i64> @extract2_i8_zext_insert0_i64_undef(<16 x i8> %x) {
 ; CHECK-LABEL: extract2_i8_zext_insert0_i64_undef:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.b[2]
-; CHECK-NEXT:    and x8, x8, #0xff
 ; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    ret
   %e = extractelement <16 x i8> %x, i32 2
@@ -493,7 +472,6 @@ define <2 x i64> @extract2_i8_zext_insert0_i64_zero(<16 x i8> %x) {
 ; CHECK-LABEL: extract2_i8_zext_insert0_i64_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.b[2]
-; CHECK-NEXT:    and x8, x8, #0xff
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov v0.d[0], x8
 ; CHECK-NEXT:    ret
@@ -507,7 +485,6 @@ define <2 x i64> @extract3_i8_zext_insert0_i64_undef(<16 x i8> %x) {
 ; CHECK-LABEL: extract3_i8_zext_insert0_i64_undef:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.b[3]
-; CHECK-NEXT:    and x8, x8, #0xff
 ; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    ret
   %e = extractelement <16 x i8> %x, i32 3
@@ -520,7 +497,6 @@ define <2 x i64> @extract3_i8_zext_insert0_i64_zero(<16 x i8> %x) {
 ; CHECK-LABEL: extract3_i8_zext_insert0_i64_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.b[3]
-; CHECK-NEXT:    and x8, x8, #0xff
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov v0.d[0], x8
 ; CHECK-NEXT:    ret
@@ -534,7 +510,6 @@ define <2 x i64> @extract0_i8_zext_insert1_i64_undef(<16 x i8> %x) {
 ; CHECK-LABEL: extract0_i8_zext_insert1_i64_undef:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.b[0]
-; CHECK-NEXT:    and x8, x8, #0xff
 ; CHECK-NEXT:    dup v0.2d, x8
 ; CHECK-NEXT:    ret
   %e = extractelement <16 x i8> %x, i32 0
@@ -547,7 +522,6 @@ define <2 x i64> @extract0_i8_zext_insert1_i64_zero(<16 x i8> %x) {
 ; CHECK-LABEL: extract0_i8_zext_insert1_i64_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.b[0]
-; CHECK-NEXT:    and x8, x8, #0xff
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov v0.d[1], x8
 ; CHECK-NEXT:    ret
@@ -561,7 +535,6 @@ define <2 x i64> @extract1_i8_zext_insert1_i64_undef(<16 x i8> %x) {
 ; CHECK-LABEL: extract1_i8_zext_insert1_i64_undef:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.b[1]
-; CHECK-NEXT:    and x8, x8, #0xff
 ; CHECK-NEXT:    dup v0.2d, x8
 ; CHECK-NEXT:    ret
   %e = extractelement <16 x i8> %x, i32 1
@@ -574,7 +547,6 @@ define <2 x i64> @extract1_i8_zext_insert1_i64_zero(<16 x i8> %x) {
 ; CHECK-LABEL: extract1_i8_zext_insert1_i64_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.b[1]
-; CHECK-NEXT:    and x8, x8, #0xff
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov v0.d[1], x8
 ; CHECK-NEXT:    ret
@@ -588,7 +560,6 @@ define <2 x i64> @extract2_i8_zext_insert1_i64_undef(<16 x i8> %x) {
 ; CHECK-LABEL: extract2_i8_zext_insert1_i64_undef:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.b[2]
-; CHECK-NEXT:    and x8, x8, #0xff
 ; CHECK-NEXT:    dup v0.2d, x8
 ; CHECK-NEXT:    ret
   %e = extractelement <16 x i8> %x, i32 2
@@ -601,7 +572,6 @@ define <2 x i64> @extract2_i8_zext_insert1_i64_zero(<16 x i8> %x) {
 ; CHECK-LABEL: extract2_i8_zext_insert1_i64_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.b[2]
-; CHECK-NEXT:    and x8, x8, #0xff
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov v0.d[1], x8
 ; CHECK-NEXT:    ret
@@ -615,7 +585,6 @@ define <2 x i64> @extract3_i8_zext_insert1_i64_undef(<16 x i8> %x) {
 ; CHECK-LABEL: extract3_i8_zext_insert1_i64_undef:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.b[3]
-; CHECK-NEXT:    and x8, x8, #0xff
 ; CHECK-NEXT:    dup v0.2d, x8
 ; CHECK-NEXT:    ret
   %e = extractelement <16 x i8> %x, i32 3
@@ -628,7 +597,6 @@ define <2 x i64> @extract3_i8_zext_insert1_i64_zero(<16 x i8> %x) {
 ; CHECK-LABEL: extract3_i8_zext_insert1_i64_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    umov w8, v0.b[3]
-; CHECK-NEXT:    and x8, x8, #0xff
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov v0.d[1], x8
 ; CHECK-NEXT:    ret


        


More information about the llvm-commits mailing list