[llvm] r256176 - [AArch64] Add additional extract-extend patterns for smov

Mon Dec 21 10:31:25 PST 2015

Author: mssimpso
Date: Mon Dec 21 12:31:25 2015
New Revision: 256176

URL: http://llvm.org/viewvc/llvm-project?rev=256176&view=rev
Log:
[AArch64] Add additional extract-extend patterns for smov

This patch adds to the target description two additional patterns for matching
extract-extend operations to SMOV. The patterns catch the v16i8-to-i64 and
v8i16-to-i64 cases. The existing patterns miss these cases because the
extracted elements must first be legalized to i32, resulting in any_extend
nodes.

This was originally implemented as a DAG combine (r255895), but was reverted
due to failing out-of-tree tests.

Modified:
    llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/trunk/test/CodeGen/AArch64/arm64-neon-copy.ll

Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td?rev=256176&r1=256175&r2=256176&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td Mon Dec 21 12:31:25 2015
@@ -3806,6 +3806,13 @@ def : Pat<(sext_inreg (vector_extract (v
 def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
           (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>;
 
+def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
+            VectorIndexB:$idx)))), i8),
+          (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
+def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
+            VectorIndexH:$idx)))), i16),
+          (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
+
 // Extracting i8 or i16 elements will have the zero-extend transformed to
 // an 'and' mask by type legalization since neither i8 nor i16 are legal types
 // for AArch64. Match these patterns here since UMOV already zeroes out the high

Modified: llvm/trunk/test/CodeGen/AArch64/arm64-neon-copy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-neon-copy.ll?rev=256176&r1=256175&r2=256176&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-neon-copy.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-neon-copy.ll Mon Dec 21 12:31:25 2015
@@ -320,21 +320,20 @@ define i32 @smovw8h(<8 x i16> %tmp1) {
   ret i32 %tmp5
 }
 
-define i32 @smovx16b(<16 x i8> %tmp1) {
+define i64 @smovx16b(<16 x i8> %tmp1) {
 ; CHECK-LABEL: smovx16b:
-; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[8]
+; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8]
   %tmp3 = extractelement <16 x i8> %tmp1, i32 8
-  %tmp4 = sext i8 %tmp3 to i32
-  %tmp5 = add i32 %tmp4, %tmp4
-  ret i32 %tmp5
+  %tmp4 = sext i8 %tmp3 to i64
+  ret i64 %tmp4
 }
 
-define i32 @smovx8h(<8 x i16> %tmp1) {
+define i64 @smovx8h(<8 x i16> %tmp1) {
 ; CHECK-LABEL: smovx8h:
-; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
+; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2]
   %tmp3 = extractelement <8 x i16> %tmp1, i32 2
-  %tmp4 = sext i16 %tmp3 to i32
-  ret i32 %tmp4
+  %tmp4 = sext i16 %tmp3 to i64
+  ret i64 %tmp4
 }
 
 define i64 @smovx4s(<4 x i32> %tmp1) {