[llvm] r256176 - [AArch64] Add additional extract-extend patterns for smov
Matthew Simpson via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 21 10:31:25 PST 2015
Author: mssimpso
Date: Mon Dec 21 12:31:25 2015
New Revision: 256176
URL: http://llvm.org/viewvc/llvm-project?rev=256176&view=rev
Log:
[AArch64] Add additional extract-extend patterns for smov
This patch adds to the target description two additional patterns for matching
extract-extend operations to SMOV. The patterns catch the v16i8-to-i64 and
v8i16-to-i64 cases. The existing patterns miss these cases because the
extracted elements must first be legalized to i32, resulting in any_extend
nodes.
This was originally implemented as a DAG combine (r255895), but was reverted
due to failing out-of-tree tests.
Modified:
llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
llvm/trunk/test/CodeGen/AArch64/arm64-neon-copy.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td?rev=256176&r1=256175&r2=256176&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td Mon Dec 21 12:31:25 2015
@@ -3806,6 +3806,13 @@ def : Pat<(sext_inreg (vector_extract (v
def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
(i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>;
+def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
+ VectorIndexB:$idx)))), i8),
+ (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
+def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
+ VectorIndexH:$idx)))), i16),
+ (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
+
// Extracting i8 or i16 elements will have the zero-extend transformed to
// an 'and' mask by type legalization since neither i8 nor i16 are legal types
// for AArch64. Match these patterns here since UMOV already zeroes out the high
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-neon-copy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-neon-copy.ll?rev=256176&r1=256175&r2=256176&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-neon-copy.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-neon-copy.ll Mon Dec 21 12:31:25 2015
@@ -320,21 +320,20 @@ define i32 @smovw8h(<8 x i16> %tmp1) {
ret i32 %tmp5
}
-define i32 @smovx16b(<16 x i8> %tmp1) {
+define i64 @smovx16b(<16 x i8> %tmp1) {
; CHECK-LABEL: smovx16b:
-; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[8]
+; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8]
%tmp3 = extractelement <16 x i8> %tmp1, i32 8
- %tmp4 = sext i8 %tmp3 to i32
- %tmp5 = add i32 %tmp4, %tmp4
- ret i32 %tmp5
+ %tmp4 = sext i8 %tmp3 to i64
+ ret i64 %tmp4
}
-define i32 @smovx8h(<8 x i16> %tmp1) {
+define i64 @smovx8h(<8 x i16> %tmp1) {
; CHECK-LABEL: smovx8h:
-; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
+; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2]
%tmp3 = extractelement <8 x i16> %tmp1, i32 2
- %tmp4 = sext i16 %tmp3 to i32
- ret i32 %tmp4
+ %tmp4 = sext i16 %tmp3 to i64
+ ret i64 %tmp4
}
define i64 @smovx4s(<4 x i32> %tmp1) {
More information about the llvm-commits
mailing list