[llvm-commits] [llvm] r117475 - in /llvm/trunk: lib/Target/ARM/ARMInstrFormats.td lib/Target/ARM/ARMInstrNEON.td test/MC/ARM/neon-dup-encoding.ll
Owen Anderson
resistor at mac.com
Wed Oct 27 12:25:54 PDT 2010
Author: resistor
Date: Wed Oct 27 14:25:54 2010
New Revision: 117475
URL: http://llvm.org/viewvc/llvm-project?rev=117475&view=rev
Log:
Provide correct NEON encodings for vdup.
Added:
llvm/trunk/test/MC/ARM/neon-dup-encoding.ll
Modified:
llvm/trunk/lib/Target/ARM/ARMInstrFormats.td
llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
Modified: llvm/trunk/lib/Target/ARM/ARMInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrFormats.td?rev=117475&r1=117474&r2=117475&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrFormats.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrFormats.td Wed Oct 27 14:25:54 2010
@@ -1867,6 +1867,15 @@
let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
let Pattern = pattern;
list<Predicate> Predicates = [HasNEON];
+
+ bits<5> Vd;
+ bits<4> Rt;
+ bits<4> p;
+
+ let Inst{31-28} = p{3-0};
+ let Inst{7} = Vd{4};
+ let Inst{19-16} = Vd{3-0};
+ let Inst{15-12} = Rt{3-0};
}
class NVGetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
dag oops, dag iops, InstrItinClass itin,
@@ -1895,6 +1904,15 @@
let Inst{11-7} = 0b11000;
let Inst{6} = op6;
let Inst{4} = 0;
+
+ bits<5> Vd;
+ bits<5> Vm;
+ bits<4> lane;
+
+ let Inst{22} = Vd{4};
+ let Inst{15-12} = Vd{3-0};
+ let Inst{5} = Vm{4};
+ let Inst{3-0} = Vm{3-0};
}
// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON
Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=117475&r1=117474&r2=117475&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Oct 27 14:25:54 2010
@@ -3679,14 +3679,30 @@
// Inst{19-16} is partially specified depending on the element size.
-def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8>;
-def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16>;
-def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32>;
-def VDUPLNfd : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32>;
-def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8>;
-def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16>;
-def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32>;
-def VDUPLNfq : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32>;
+def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8> {
+ let Inst{19-17} = lane{2-0};
+}
+def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16> {
+ let Inst{19-18} = lane{1-0};
+}
+def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32> {
+ let Inst{19} = lane{0};
+}
+def VDUPLNfd : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32> {
+ let Inst{19} = lane{0};
+}
+def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8> {
+ let Inst{19-17} = lane{2-0};
+}
+def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16> {
+ let Inst{19-18} = lane{1-0};
+}
+def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32> {
+ let Inst{19} = lane{0};
+}
+def VDUPLNfq : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32> {
+ let Inst{19} = lane{0};
+}
def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
(v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
Added: llvm/trunk/test/MC/ARM/neon-dup-encoding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/neon-dup-encoding.ll?rev=117475&view=auto
==============================================================================
--- llvm/trunk/test/MC/ARM/neon-dup-encoding.ll (added)
+++ llvm/trunk/test/MC/ARM/neon-dup-encoding.ll Wed Oct 27 14:25:54 2010
@@ -0,0 +1,115 @@
+; RUN: llc -show-mc-encoding -march=arm -mcpu=cortex-a8 -mattr=+neon < %s | FileCheck %s
+
+define <8 x i8> @v_dup8(i8 %A) nounwind {
+; CHECK: vdup.8 d16, r0 @ encoding: [0x90,0x0b,0xc0,0xee]
+ %tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
+ %tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
+ %tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2
+ %tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3
+ %tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4
+ %tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5
+ %tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6
+ %tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7
+ ret <8 x i8> %tmp8
+}
+
+define <4 x i16> @v_dup16(i16 %A) nounwind {
+; CHECK: vdup.16 d16, r0 @ encoding: [0xb0,0x0b,0x80,0xee]
+ %tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
+ %tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
+ %tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2
+ %tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @v_dup32(i32 %A) nounwind {
+; CHECK: vdup.32 d16, r0 @ encoding: [0x90,0x0b,0x80,0xee]
+ %tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
+ %tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
+ ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @v_dupQ8(i8 %A) nounwind {
+; CHECK: vdup.8 q8, r0 @ encoding: [0x90,0x0b,0xe0,0xee]
+ %tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
+ %tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
+ %tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2
+ %tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3
+ %tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4
+ %tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5
+ %tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6
+ %tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7
+ %tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8
+ %tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9
+ %tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10
+ %tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11
+ %tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12
+ %tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13
+ %tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14
+ %tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15
+ ret <16 x i8> %tmp16
+}
+
+define <8 x i16> @v_dupQ16(i16 %A) nounwind {
+; CHECK: vdup.16 q8, r0 @ encoding: [0xb0,0x0b,0xa0,0xee]
+ %tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
+ %tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
+ %tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2
+ %tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3
+ %tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4
+ %tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5
+ %tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6
+ %tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7
+ ret <8 x i16> %tmp8
+}
+
+define <4 x i32> @v_dupQ32(i32 %A) nounwind {
+; CHECK: vdup.32 q8, r0 @ encoding: [0x90,0x0b,0xa0,0xee]
+ %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
+ %tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
+ %tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2
+ %tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3
+ ret <4 x i32> %tmp4
+}
+
+define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
+ %tmp1 = load <8 x i8>* %A
+; CHECK: vdup.8 d16, d16[1] @ encoding: [0x20,0x0c,0xf3,0xf3]
+ %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
+ %tmp1 = load <4 x i16>* %A
+; CHECK: vdup.16 d16, d16[1] @ encoding: [0x20,0x0c,0xf6,0xf3]
+ %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
+ %tmp1 = load <2 x i32>* %A
+; CHECK: vdup.32 d16, d16[1] @ encoding: [0x20,0x0c,0xfc,0xf3]
+ %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
+ ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
+ %tmp1 = load <8 x i8>* %A
+; CHECK: vdup.8 q8, d16[1] @ encoding: [0x60,0x0c,0xf3,0xf3]
+ %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
+ %tmp1 = load <4 x i16>* %A
+; CHECK: vdup.16 q8, d16[1] @ encoding: [0x60,0x0c,0xf6,0xf3]
+ %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
+ %tmp1 = load <2 x i32>* %A
+; CHECK: vdup.32 q8, d16[1] @ encoding: [0x60,0x0c,0xfc,0xf3]
+ %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+ ret <4 x i32> %tmp2
+}
More information about the llvm-commits
mailing list