[llvm-commits] [llvm] r83667 - /llvm/trunk/test/CodeGen/ARM/
Bob Wilson
bob.wilson at apple.com
Fri Oct 9 13:20:55 PDT 2009
Author: bwilson
Date: Fri Oct 9 15:20:54 2009
New Revision: 83667
URL: http://llvm.org/viewvc/llvm-project?rev=83667&view=rev
Log:
Merge a bunch of NEON tests into larger files so they run faster.
Added:
llvm/trunk/test/CodeGen/ARM/vbits.ll
llvm/trunk/test/CodeGen/ARM/vminmax.ll
llvm/trunk/test/CodeGen/ARM/vpminmax.ll
llvm/trunk/test/CodeGen/ARM/vqdmul.ll
llvm/trunk/test/CodeGen/ARM/vrec.ll
Removed:
llvm/trunk/test/CodeGen/ARM/vabal.ll
llvm/trunk/test/CodeGen/ARM/vabdl.ll
llvm/trunk/test/CodeGen/ARM/vacge.ll
llvm/trunk/test/CodeGen/ARM/vacgt.ll
llvm/trunk/test/CodeGen/ARM/vaddhn.ll
llvm/trunk/test/CodeGen/ARM/vaddl.ll
llvm/trunk/test/CodeGen/ARM/vaddw.ll
llvm/trunk/test/CodeGen/ARM/vand.ll
llvm/trunk/test/CodeGen/ARM/vbic.ll
llvm/trunk/test/CodeGen/ARM/vcls.ll
llvm/trunk/test/CodeGen/ARM/vclz.ll
llvm/trunk/test/CodeGen/ARM/vcvt_n.ll
llvm/trunk/test/CodeGen/ARM/vdup_lane.ll
llvm/trunk/test/CodeGen/ARM/veor.ll
llvm/trunk/test/CodeGen/ARM/vget_lane2.ll
llvm/trunk/test/CodeGen/ARM/vmax.ll
llvm/trunk/test/CodeGen/ARM/vmin.ll
llvm/trunk/test/CodeGen/ARM/vmlal.ll
llvm/trunk/test/CodeGen/ARM/vmlal_lane.ll
llvm/trunk/test/CodeGen/ARM/vmlsl.ll
llvm/trunk/test/CodeGen/ARM/vmlsl_lane.ll
llvm/trunk/test/CodeGen/ARM/vmovl.ll
llvm/trunk/test/CodeGen/ARM/vmovn.ll
llvm/trunk/test/CodeGen/ARM/vmul_lane.ll
llvm/trunk/test/CodeGen/ARM/vmull.ll
llvm/trunk/test/CodeGen/ARM/vmull_lane.ll
llvm/trunk/test/CodeGen/ARM/vmvn.ll
llvm/trunk/test/CodeGen/ARM/vorn.ll
llvm/trunk/test/CodeGen/ARM/vorr.ll
llvm/trunk/test/CodeGen/ARM/vpaddl.ll
llvm/trunk/test/CodeGen/ARM/vpmax.ll
llvm/trunk/test/CodeGen/ARM/vpmin.ll
llvm/trunk/test/CodeGen/ARM/vqRdmulh_lane.ll
llvm/trunk/test/CodeGen/ARM/vqabs.ll
llvm/trunk/test/CodeGen/ARM/vqdmlal.ll
llvm/trunk/test/CodeGen/ARM/vqdmlal_lanes.ll
llvm/trunk/test/CodeGen/ARM/vqdmlsl.ll
llvm/trunk/test/CodeGen/ARM/vqdmlsl_lanes.ll
llvm/trunk/test/CodeGen/ARM/vqdmulh.ll
llvm/trunk/test/CodeGen/ARM/vqdmulh_lane.ll
llvm/trunk/test/CodeGen/ARM/vqdmull.ll
llvm/trunk/test/CodeGen/ARM/vqdmull_lane.ll
llvm/trunk/test/CodeGen/ARM/vqmovn.ll
llvm/trunk/test/CodeGen/ARM/vqneg.ll
llvm/trunk/test/CodeGen/ARM/vqrshl.ll
llvm/trunk/test/CodeGen/ARM/vqrshrn.ll
llvm/trunk/test/CodeGen/ARM/vraddhn.ll
llvm/trunk/test/CodeGen/ARM/vrecpe.ll
llvm/trunk/test/CodeGen/ARM/vrecps.ll
llvm/trunk/test/CodeGen/ARM/vrhadd.ll
llvm/trunk/test/CodeGen/ARM/vrshl.ll
llvm/trunk/test/CodeGen/ARM/vrshrn.ll
llvm/trunk/test/CodeGen/ARM/vrsqrte.ll
llvm/trunk/test/CodeGen/ARM/vrsqrts.ll
llvm/trunk/test/CodeGen/ARM/vrsubhn.ll
llvm/trunk/test/CodeGen/ARM/vset_lane.ll
llvm/trunk/test/CodeGen/ARM/vshift_split.ll
llvm/trunk/test/CodeGen/ARM/vsubhn.ll
llvm/trunk/test/CodeGen/ARM/vsubl.ll
llvm/trunk/test/CodeGen/ARM/vsubw.ll
llvm/trunk/test/CodeGen/ARM/vtst.ll
Modified:
llvm/trunk/test/CodeGen/ARM/vaba.ll
llvm/trunk/test/CodeGen/ARM/vabd.ll
llvm/trunk/test/CodeGen/ARM/vabs.ll
llvm/trunk/test/CodeGen/ARM/vadd.ll
llvm/trunk/test/CodeGen/ARM/vcge.ll
llvm/trunk/test/CodeGen/ARM/vcgt.ll
llvm/trunk/test/CodeGen/ARM/vcnt.ll
llvm/trunk/test/CodeGen/ARM/vcvt.ll
llvm/trunk/test/CodeGen/ARM/vdup.ll
llvm/trunk/test/CodeGen/ARM/vget_lane.ll
llvm/trunk/test/CodeGen/ARM/vhadd.ll
llvm/trunk/test/CodeGen/ARM/vmla.ll
llvm/trunk/test/CodeGen/ARM/vmls.ll
llvm/trunk/test/CodeGen/ARM/vmov.ll
llvm/trunk/test/CodeGen/ARM/vmul.ll
llvm/trunk/test/CodeGen/ARM/vneg.ll
llvm/trunk/test/CodeGen/ARM/vpadd.ll
llvm/trunk/test/CodeGen/ARM/vqshl.ll
llvm/trunk/test/CodeGen/ARM/vqshrn.ll
llvm/trunk/test/CodeGen/ARM/vshift.ll
llvm/trunk/test/CodeGen/ARM/vshl.ll
llvm/trunk/test/CodeGen/ARM/vshrn.ll
llvm/trunk/test/CodeGen/ARM/vsub.ll
Modified: llvm/trunk/test/CodeGen/ARM/vaba.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vaba.ll?rev=83667&r1=83666&r2=83667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vaba.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vaba.ll Fri Oct 9 15:20:54 2009
@@ -135,3 +135,71 @@
declare <16 x i8> @llvm.arm.neon.vabau.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.arm.neon.vabau.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.arm.neon.vabau.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabals8:
+;CHECK: vabal.s8
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = load <8 x i8>* %C
+ %tmp4 = call <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabals16:
+;CHECK: vabal.s16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = load <4 x i16>* %C
+ %tmp4 = call <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabals32:
+;CHECK: vabal.s32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = load <2 x i32>* %C
+ %tmp4 = call <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+ ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabalu8:
+;CHECK: vabal.u8
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = load <8 x i8>* %C
+ %tmp4 = call <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabalu16:
+;CHECK: vabal.u16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = load <4 x i16>* %C
+ %tmp4 = call <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabalu32:
+;CHECK: vabal.u32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = load <2 x i32>* %C
+ %tmp4 = call <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+ ret <2 x i64> %tmp4
+}
+
+declare <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vabal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vabal.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vabal.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vabal.ll (removed)
@@ -1,69 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
-;CHECK: vabals8:
-;CHECK: vabal.s8
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i8>* %C
- %tmp4 = call <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
- ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK: vabals16:
-;CHECK: vabal.s16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
- %tmp4 = call <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
- ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK: vabals32:
-;CHECK: vabal.s32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
- %tmp4 = call <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
- ret <2 x i64> %tmp4
-}
-
-define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
-;CHECK: vabalu8:
-;CHECK: vabal.u8
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i8>* %C
- %tmp4 = call <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
- ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK: vabalu16:
-;CHECK: vabal.u16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
- %tmp4 = call <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
- ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK: vabalu32:
-;CHECK: vabal.u32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
- %tmp4 = call <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
- ret <2 x i64> %tmp4
-}
-
-declare <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
Modified: llvm/trunk/test/CodeGen/ARM/vabd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vabd.ll?rev=83667&r1=83666&r2=83667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vabd.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vabd.ll Fri Oct 9 15:20:54 2009
@@ -145,3 +145,65 @@
declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i16> @vabdls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabdls8:
+;CHECK: vabdl.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabdls16:
+;CHECK: vabdl.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabdls32:
+;CHECK: vabdl.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabdlu8:
+;CHECK: vabdl.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabdlu16:
+;CHECK: vabdl.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vabdlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabdlu32:
+;CHECK: vabdl.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vabdl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vabdl.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vabdl.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vabdl.ll (removed)
@@ -1,63 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i16> @vabdls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vabdls8:
-;CHECK: vabdl.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vabdls16:
-;CHECK: vabdl.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vabdls32:
-;CHECK: vabdl.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
-}
-
-define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vabdlu8:
-;CHECK: vabdl.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vabdlu16:
-;CHECK: vabdl.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @vabdlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vabdlu32:
-;CHECK: vabdl.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
-}
-
-declare <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
Modified: llvm/trunk/test/CodeGen/ARM/vabs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vabs.ll?rev=83667&r1=83666&r2=83667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vabs.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vabs.ll Fri Oct 9 15:20:54 2009
@@ -74,3 +74,58 @@
declare <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32>) nounwind readnone
declare <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float>) nounwind readnone
+define <8 x i8> @vqabss8(<8 x i8>* %A) nounwind {
+;CHECK: vqabss8:
+;CHECK: vqabs.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %tmp1)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqabss16(<4 x i16>* %A) nounwind {
+;CHECK: vqabss16:
+;CHECK: vqabs.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %tmp1)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqabss32(<2 x i32>* %A) nounwind {
+;CHECK: vqabss32:
+;CHECK: vqabs.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vqabsQs8(<16 x i8>* %A) nounwind {
+;CHECK: vqabsQs8:
+;CHECK: vqabs.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %tmp1)
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vqabsQs16(<8 x i16>* %A) nounwind {
+;CHECK: vqabsQs16:
+;CHECK: vqabs.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %tmp1)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vqabsQs32(<4 x i32>* %A) nounwind {
+;CHECK: vqabsQs32:
+;CHECK: vqabs.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vacge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vacge.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vacge.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vacge.ll (removed)
@@ -1,22 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vacgef32:
-;CHECK: vacge.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
- %tmp3 = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %tmp1, <2 x float> %tmp2)
- ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK: vacgeQf32:
-;CHECK: vacge.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %tmp1, <4 x float> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vacgt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vacgt.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vacgt.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vacgt.ll (removed)
@@ -1,22 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vacgtf32:
-;CHECK: vacgt.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
- %tmp3 = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %tmp1, <2 x float> %tmp2)
- ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK: vacgtQf32:
-;CHECK: vacgt.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %tmp1, <4 x float> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone
Modified: llvm/trunk/test/CodeGen/ARM/vadd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vadd.ll?rev=83667&r1=83666&r2=83667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vadd.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vadd.ll Fri Oct 9 15:20:54 2009
@@ -89,3 +89,189 @@
%tmp3 = add <4 x float> %tmp1, %tmp2
ret <4 x float> %tmp3
}
+
+define <8 x i8> @vaddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vaddhni16:
+;CHECK: vaddhn.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vaddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vaddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vaddhni32:
+;CHECK: vaddhn.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vaddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vaddhni64:
+;CHECK: vaddhn.i64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vaddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vraddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vraddhni16:
+;CHECK: vraddhn.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vraddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vraddhni32:
+;CHECK: vraddhn.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vraddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vraddhni64:
+;CHECK: vraddhn.i64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddls8:
+;CHECK: vaddl.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddls16:
+;CHECK: vaddl.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddls32:
+;CHECK: vaddl.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddlu8:
+;CHECK: vaddl.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddlu16:
+;CHECK: vaddl.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddlu32:
+;CHECK: vaddl.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+define <8 x i16> @vaddws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddws8:
+;CHECK: vaddw.s8
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddws16:
+;CHECK: vaddw.s16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddws32:
+;CHECK: vaddw.s32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddwu8:
+;CHECK: vaddw.u8
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddwu16:
+;CHECK: vaddw.u16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddwu32:
+;CHECK: vaddw.u32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vaddhn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vaddhn.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vaddhn.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vaddhn.ll (removed)
@@ -1,32 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @vaddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vaddhni16:
-;CHECK: vaddhn.i16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = call <8 x i8> @llvm.arm.neon.vaddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
- ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @vaddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vaddhni32:
-;CHECK: vaddhn.i32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = call <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
- ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @vaddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vaddhni64:
-;CHECK: vaddhn.i64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
- %tmp3 = call <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
- ret <2 x i32> %tmp3
-}
-
-declare <8 x i8> @llvm.arm.neon.vaddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vaddl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vaddl.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vaddl.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vaddl.ll (removed)
@@ -1,63 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vaddls8:
-;CHECK: vaddl.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vaddls16:
-;CHECK: vaddl.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vaddls32:
-;CHECK: vaddl.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
-}
-
-define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vaddlu8:
-;CHECK: vaddl.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vaddlu16:
-;CHECK: vaddl.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vaddlu32:
-;CHECK: vaddl.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
-}
-
-declare <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vaddw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vaddw.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vaddw.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vaddw.ll (removed)
@@ -1,63 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i16> @vaddws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vaddws8:
-;CHECK: vaddw.s8
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vaddws16:
-;CHECK: vaddw.s16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vaddws32:
-;CHECK: vaddw.s32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
-}
-
-define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vaddwu8:
-;CHECK: vaddw.u8
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vaddwu16:
-;CHECK: vaddw.u16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vaddwu32:
-;CHECK: vaddw.u32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
-}
-
-declare <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vand.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vand.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vand.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vand.ll (removed)
@@ -1,73 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: v_andi8:
-;CHECK: vand
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = and <8 x i8> %tmp1, %tmp2
- ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: v_andi16:
-;CHECK: vand
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = and <4 x i16> %tmp1, %tmp2
- ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: v_andi32:
-;CHECK: vand
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = and <2 x i32> %tmp1, %tmp2
- ret <2 x i32> %tmp3
-}
-
-define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: v_andi64:
-;CHECK: vand
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
- %tmp3 = and <1 x i64> %tmp1, %tmp2
- ret <1 x i64> %tmp3
-}
-
-define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: v_andQi8:
-;CHECK: vand
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
- %tmp3 = and <16 x i8> %tmp1, %tmp2
- ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: v_andQi16:
-;CHECK: vand
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = and <8 x i16> %tmp1, %tmp2
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: v_andQi32:
-;CHECK: vand
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = and <4 x i32> %tmp1, %tmp2
- ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: v_andQi64:
-;CHECK: vand
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
- %tmp3 = and <2 x i64> %tmp1, %tmp2
- ret <2 x i64> %tmp3
-}
Removed: llvm/trunk/test/CodeGen/ARM/vbic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vbic.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vbic.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vbic.ll (removed)
@@ -1,81 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: v_bici8:
-;CHECK: vbic
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
- %tmp4 = and <8 x i8> %tmp1, %tmp3
- ret <8 x i8> %tmp4
-}
-
-define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: v_bici16:
-;CHECK: vbic
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
- %tmp4 = and <4 x i16> %tmp1, %tmp3
- ret <4 x i16> %tmp4
-}
-
-define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: v_bici32:
-;CHECK: vbic
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
- %tmp4 = and <2 x i32> %tmp1, %tmp3
- ret <2 x i32> %tmp4
-}
-
-define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: v_bici64:
-;CHECK: vbic
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
- %tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
- %tmp4 = and <1 x i64> %tmp1, %tmp3
- ret <1 x i64> %tmp4
-}
-
-define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: v_bicQi8:
-;CHECK: vbic
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
- %tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
- %tmp4 = and <16 x i8> %tmp1, %tmp3
- ret <16 x i8> %tmp4
-}
-
-define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: v_bicQi16:
-;CHECK: vbic
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
- %tmp4 = and <8 x i16> %tmp1, %tmp3
- ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: v_bicQi32:
-;CHECK: vbic
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
- %tmp4 = and <4 x i32> %tmp1, %tmp3
- ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: v_bicQi64:
-;CHECK: vbic
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
- %tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
- %tmp4 = and <2 x i64> %tmp1, %tmp3
- ret <2 x i64> %tmp4
-}
Added: llvm/trunk/test/CodeGen/ARM/vbits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vbits.ll?rev=83667&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vbits.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/vbits.ll Fri Oct 9 15:20:54 2009
@@ -0,0 +1,507 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_andi8:
+;CHECK: vand
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = and <8 x i8> %tmp1, %tmp2
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_andi16:
+;CHECK: vand
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = and <4 x i16> %tmp1, %tmp2
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_andi32:
+;CHECK: vand
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = and <2 x i32> %tmp1, %tmp2
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_andi64:
+;CHECK: vand
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = and <1 x i64> %tmp1, %tmp2
+ ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_andQi8:
+;CHECK: vand
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = and <16 x i8> %tmp1, %tmp2
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_andQi16:
+;CHECK: vand
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = and <8 x i16> %tmp1, %tmp2
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_andQi32:
+;CHECK: vand
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = and <4 x i32> %tmp1, %tmp2
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_andQi64:
+;CHECK: vand
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = and <2 x i64> %tmp1, %tmp2
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_bici8:
+;CHECK: vbic
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+ %tmp4 = and <8 x i8> %tmp1, %tmp3
+ ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_bici16:
+;CHECK: vbic
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
+ %tmp4 = and <4 x i16> %tmp1, %tmp3
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_bici32:
+;CHECK: vbic
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
+ %tmp4 = and <2 x i32> %tmp1, %tmp3
+ ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_bici64:
+;CHECK: vbic
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
+ %tmp4 = and <1 x i64> %tmp1, %tmp3
+ ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_bicQi8:
+;CHECK: vbic
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+ %tmp4 = and <16 x i8> %tmp1, %tmp3
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_bicQi16:
+;CHECK: vbic
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+ %tmp4 = and <8 x i16> %tmp1, %tmp3
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_bicQi32:
+;CHECK: vbic
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+ %tmp4 = and <4 x i32> %tmp1, %tmp3
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_bicQi64:
+;CHECK: vbic
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
+ %tmp4 = and <2 x i64> %tmp1, %tmp3
+ ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_eori8:
+;CHECK: veor
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = xor <8 x i8> %tmp1, %tmp2
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_eori16:
+;CHECK: veor
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = xor <4 x i16> %tmp1, %tmp2
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_eori32:
+;CHECK: veor
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = xor <2 x i32> %tmp1, %tmp2
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_eori64:
+;CHECK: veor
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = xor <1 x i64> %tmp1, %tmp2
+ ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_eorQi8:
+;CHECK: veor
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = xor <16 x i8> %tmp1, %tmp2
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_eorQi16:
+;CHECK: veor
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = xor <8 x i16> %tmp1, %tmp2
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_eorQi32:
+;CHECK: veor
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = xor <4 x i32> %tmp1, %tmp2
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_eorQi64:
+;CHECK: veor
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = xor <2 x i64> %tmp1, %tmp2
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind {
+;CHECK: v_mvni8:
+;CHECK: vmvn
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind {
+;CHECK: v_mvni16:
+;CHECK: vmvn
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 >
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind {
+;CHECK: v_mvni32:
+;CHECK: vmvn
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 >
+ ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind {
+;CHECK: v_mvni64:
+;CHECK: vmvn
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = xor <1 x i64> %tmp1, < i64 -1 >
+ ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind {
+;CHECK: v_mvnQi8:
+;CHECK: vmvn
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind {
+;CHECK: v_mvnQi16:
+;CHECK: vmvn
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind {
+;CHECK: v_mvnQi32:
+;CHECK: vmvn
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 >
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind {
+;CHECK: v_mvnQi64:
+;CHECK: vmvn
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 >
+ ret <2 x i64> %tmp2
+}
+
+define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_orri8:
+;CHECK: vorr
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = or <8 x i8> %tmp1, %tmp2
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_orri16:
+;CHECK: vorr
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = or <4 x i16> %tmp1, %tmp2
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_orri32:
+;CHECK: vorr
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = or <2 x i32> %tmp1, %tmp2
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_orri64:
+;CHECK: vorr
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = or <1 x i64> %tmp1, %tmp2
+ ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_orrQi8:
+;CHECK: vorr
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = or <16 x i8> %tmp1, %tmp2
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_orrQi16:
+;CHECK: vorr
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = or <8 x i16> %tmp1, %tmp2
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_orrQi32:
+;CHECK: vorr
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = or <4 x i32> %tmp1, %tmp2
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_orrQi64:
+;CHECK: vorr
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = or <2 x i64> %tmp1, %tmp2
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_orni8:
+;CHECK: vorn
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+ %tmp4 = or <8 x i8> %tmp1, %tmp3
+ ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_orni16:
+;CHECK: vorn
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
+ %tmp4 = or <4 x i16> %tmp1, %tmp3
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_orni32:
+;CHECK: vorn
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
+ %tmp4 = or <2 x i32> %tmp1, %tmp3
+ ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_orni64:
+;CHECK: vorn
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
+ %tmp4 = or <1 x i64> %tmp1, %tmp3
+ ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_ornQi8:
+;CHECK: vorn
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+ %tmp4 = or <16 x i8> %tmp1, %tmp3
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_ornQi16:
+;CHECK: vorn
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+ %tmp4 = or <8 x i16> %tmp1, %tmp3
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_ornQi32:
+;CHECK: vorn
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+ %tmp4 = or <4 x i32> %tmp1, %tmp3
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_ornQi64:
+;CHECK: vorn
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
+ %tmp4 = or <2 x i64> %tmp1, %tmp3
+ ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtsti8:
+;CHECK: vtst.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = and <8 x i8> %tmp1, %tmp2
+ %tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer
+ %tmp5 = sext <8 x i1> %tmp4 to <8 x i8>
+ ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vtsti16:
+;CHECK: vtst.i16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = and <4 x i16> %tmp1, %tmp2
+ %tmp4 = icmp ne <4 x i16> %tmp3, zeroinitializer
+ %tmp5 = sext <4 x i1> %tmp4 to <4 x i16>
+ ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vtsti32:
+;CHECK: vtst.i32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = and <2 x i32> %tmp1, %tmp2
+ %tmp4 = icmp ne <2 x i32> %tmp3, zeroinitializer
+ %tmp5 = sext <2 x i1> %tmp4 to <2 x i32>
+ ret <2 x i32> %tmp5
+}
+
+define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vtstQi8:
+;CHECK: vtst.i8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = and <16 x i8> %tmp1, %tmp2
+ %tmp4 = icmp ne <16 x i8> %tmp3, zeroinitializer
+ %tmp5 = sext <16 x i1> %tmp4 to <16 x i8>
+ ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vtstQi16:
+;CHECK: vtst.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = and <8 x i16> %tmp1, %tmp2
+ %tmp4 = icmp ne <8 x i16> %tmp3, zeroinitializer
+ %tmp5 = sext <8 x i1> %tmp4 to <8 x i16>
+ ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vtstQi32:
+;CHECK: vtst.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = and <4 x i32> %tmp1, %tmp2
+ %tmp4 = icmp ne <4 x i32> %tmp3, zeroinitializer
+ %tmp5 = sext <4 x i1> %tmp4 to <4 x i32>
+ ret <4 x i32> %tmp5
+}
Modified: llvm/trunk/test/CodeGen/ARM/vcge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vcge.ll?rev=83667&r1=83666&r2=83667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vcge.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vcge.ll Fri Oct 9 15:20:54 2009
@@ -139,3 +139,24 @@
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
}
+
+define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vacgef32:
+;CHECK: vacge.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %tmp1, <2 x float> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vacgeQf32:
+;CHECK: vacge.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %tmp1, <4 x float> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone
Modified: llvm/trunk/test/CodeGen/ARM/vcgt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vcgt.ll?rev=83667&r1=83666&r2=83667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vcgt.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vcgt.ll Fri Oct 9 15:20:54 2009
@@ -139,3 +139,24 @@
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
}
+
+define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vacgtf32:
+;CHECK: vacgt.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %tmp1, <2 x float> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vacgtQf32:
+;CHECK: vacgt.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %tmp1, <4 x float> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vcls.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vcls.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vcls.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vcls.ll (removed)
@@ -1,57 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
-;CHECK: vclss8:
-;CHECK: vcls.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1)
- ret <8 x i8> %tmp2
-}
-
-define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
-;CHECK: vclss16:
-;CHECK: vcls.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1)
- ret <4 x i16> %tmp2
-}
-
-define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
-;CHECK: vclss32:
-;CHECK: vcls.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1)
- ret <2 x i32> %tmp2
-}
-
-define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
-;CHECK: vclsQs8:
-;CHECK: vcls.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1)
- ret <16 x i8> %tmp2
-}
-
-define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
-;CHECK: vclsQs16:
-;CHECK: vcls.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1)
- ret <8 x i16> %tmp2
-}
-
-define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind {
-;CHECK: vclsQs32:
-;CHECK: vcls.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1)
- ret <4 x i32> %tmp2
-}
-
-declare <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) nounwind readnone
-
-declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vclz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vclz.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vclz.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vclz.ll (removed)
@@ -1,57 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
-;CHECK: vclz8:
-;CHECK: vclz.i8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = call <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8> %tmp1)
- ret <8 x i8> %tmp2
-}
-
-define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
-;CHECK: vclz16:
-;CHECK: vclz.i16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = call <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16> %tmp1)
- ret <4 x i16> %tmp2
-}
-
-define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
-;CHECK: vclz32:
-;CHECK: vclz.i32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32> %tmp1)
- ret <2 x i32> %tmp2
-}
-
-define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
-;CHECK: vclzQ8:
-;CHECK: vclz.i8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = call <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8> %tmp1)
- ret <16 x i8> %tmp2
-}
-
-define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
-;CHECK: vclzQ16:
-;CHECK: vclz.i16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = call <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16> %tmp1)
- ret <8 x i16> %tmp2
-}
-
-define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
-;CHECK: vclzQ32:
-;CHECK: vclz.i32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = call <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32> %tmp1)
- ret <4 x i32> %tmp2
-}
-
-declare <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32>) nounwind readnone
-
-declare <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32>) nounwind readnone
Modified: llvm/trunk/test/CodeGen/ARM/vcnt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vcnt.ll?rev=83667&r1=83666&r2=83667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vcnt.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vcnt.ll Fri Oct 9 15:20:54 2009
@@ -18,3 +18,115 @@
declare <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8>) nounwind readnone
declare <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8>) nounwind readnone
+
+define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
+;CHECK: vclz8:
+;CHECK: vclz.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8> %tmp1)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
+;CHECK: vclz16:
+;CHECK: vclz.i16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16> %tmp1)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
+;CHECK: vclz32:
+;CHECK: vclz.i32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
+;CHECK: vclzQ8:
+;CHECK: vclz.i8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8> %tmp1)
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
+;CHECK: vclzQ16:
+;CHECK: vclz.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16> %tmp1)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
+;CHECK: vclzQ32:
+;CHECK: vclz.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32>) nounwind readnone
+
+define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
+;CHECK: vclss8:
+;CHECK: vcls.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
+;CHECK: vclss16:
+;CHECK: vcls.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
+;CHECK: vclss32:
+;CHECK: vcls.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
+;CHECK: vclsQs8:
+;CHECK: vcls.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1)
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
+;CHECK: vclsQs16:
+;CHECK: vcls.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind {
+;CHECK: vclsQs32:
+;CHECK: vcls.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) nounwind readnone
Modified: llvm/trunk/test/CodeGen/ARM/vcvt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vcvt.ll?rev=83667&r1=83666&r2=83667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vcvt.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vcvt.ll Fri Oct 9 15:20:54 2009
@@ -63,3 +63,78 @@
%tmp2 = uitofp <4 x i32> %tmp1 to <4 x float>
ret <4 x float> %tmp2
}
+
+define <2 x i32> @vcvt_n_f32tos32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_n_f32tos32:
+;CHECK: vcvt.s32.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %tmp1, i32 1)
+ ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @vcvt_n_f32tou32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_n_f32tou32:
+;CHECK: vcvt.u32.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %tmp1, i32 1)
+ ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vcvt_n_s32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_n_s32tof32:
+;CHECK: vcvt.f32.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
+ ret <2 x float> %tmp2
+}
+
+define <2 x float> @vcvt_n_u32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_n_u32tof32:
+;CHECK: vcvt.f32.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
+ ret <2 x float> %tmp2
+}
+
+declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+
+define <4 x i32> @vcvtQ_n_f32tos32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_n_f32tos32:
+;CHECK: vcvt.s32.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %tmp1, i32 1)
+ ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @vcvtQ_n_f32tou32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_n_f32tou32:
+;CHECK: vcvt.u32.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %tmp1, i32 1)
+ ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vcvtQ_n_s32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_n_s32tof32:
+;CHECK: vcvt.f32.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
+ ret <4 x float> %tmp2
+}
+
+define <4 x float> @vcvtQ_n_u32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_n_u32tof32:
+;CHECK: vcvt.f32.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
+ ret <4 x float> %tmp2
+}
+
+declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
+
Removed: llvm/trunk/test/CodeGen/ARM/vcvt_n.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vcvt_n.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vcvt_n.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vcvt_n.ll (removed)
@@ -1,76 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind {
-;CHECK: vcvt_f32tos32:
-;CHECK: vcvt.s32.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %tmp1, i32 1)
- ret <2 x i32> %tmp2
-}
-
-define <2 x i32> @vcvt_f32tou32(<2 x float>* %A) nounwind {
-;CHECK: vcvt_f32tou32:
-;CHECK: vcvt.u32.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %tmp1, i32 1)
- ret <2 x i32> %tmp2
-}
-
-define <2 x float> @vcvt_s32tof32(<2 x i32>* %A) nounwind {
-;CHECK: vcvt_s32tof32:
-;CHECK: vcvt.f32.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
- ret <2 x float> %tmp2
-}
-
-define <2 x float> @vcvt_u32tof32(<2 x i32>* %A) nounwind {
-;CHECK: vcvt_u32tof32:
-;CHECK: vcvt.f32.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
- ret <2 x float> %tmp2
-}
-
-declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) nounwind readnone
-declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
-declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
-
-define <4 x i32> @vcvtQ_f32tos32(<4 x float>* %A) nounwind {
-;CHECK: vcvtQ_f32tos32:
-;CHECK: vcvt.s32.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %tmp1, i32 1)
- ret <4 x i32> %tmp2
-}
-
-define <4 x i32> @vcvtQ_f32tou32(<4 x float>* %A) nounwind {
-;CHECK: vcvtQ_f32tou32:
-;CHECK: vcvt.u32.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %tmp1, i32 1)
- ret <4 x i32> %tmp2
-}
-
-define <4 x float> @vcvtQ_s32tof32(<4 x i32>* %A) nounwind {
-;CHECK: vcvtQ_s32tof32:
-;CHECK: vcvt.f32.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
- ret <4 x float> %tmp2
-}
-
-define <4 x float> @vcvtQ_u32tof32(<4 x i32>* %A) nounwind {
-;CHECK: vcvtQ_u32tof32:
-;CHECK: vcvt.f32.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
- ret <4 x float> %tmp2
-}
-
-declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
-
Modified: llvm/trunk/test/CodeGen/ARM/vdup.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vdup.ll?rev=83667&r1=83666&r2=83667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vdup.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vdup.ll Fri Oct 9 15:20:54 2009
@@ -179,3 +179,91 @@
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %tmp2
}
+
+define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
+;CHECK: vduplane8:
+;CHECK: vdup.8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
+;CHECK: vduplane16:
+;CHECK: vdup.16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
+;CHECK: vduplane32:
+;CHECK: vdup.32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
+ ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
+;CHECK: vduplanefloat:
+;CHECK: vdup.32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
+ ret <2 x float> %tmp2
+}
+
+define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
+;CHECK: vduplaneQ8:
+;CHECK: vdup.8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
+;CHECK: vduplaneQ16:
+;CHECK: vdup.16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
+;CHECK: vduplaneQ32:
+;CHECK: vdup.32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+ ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
+;CHECK: vduplaneQfloat:
+;CHECK: vdup.32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+ ret <4 x float> %tmp2
+}
+
+define arm_apcscc <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
+entry:
+ %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x i64> %0
+}
+
+define arm_apcscc <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone {
+entry:
+ %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ ret <2 x i64> %0
+}
+
+define arm_apcscc <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone {
+entry:
+ %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x double> %0
+}
+
+define arm_apcscc <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone {
+entry:
+ %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+ ret <2 x double> %0
+}
Removed: llvm/trunk/test/CodeGen/ARM/vdup_lane.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vdup_lane.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vdup_lane.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vdup_lane.ll (removed)
@@ -1,89 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
-;CHECK: vduplane8:
-;CHECK: vdup.8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
- ret <8 x i8> %tmp2
-}
-
-define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
-;CHECK: vduplane16:
-;CHECK: vdup.16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
- ret <4 x i16> %tmp2
-}
-
-define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
-;CHECK: vduplane32:
-;CHECK: vdup.32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
- ret <2 x i32> %tmp2
-}
-
-define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
-;CHECK: vduplanefloat:
-;CHECK: vdup.32
- %tmp1 = load <2 x float>* %A
- %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
- ret <2 x float> %tmp2
-}
-
-define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
-;CHECK: vduplaneQ8:
-;CHECK: vdup.8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
- ret <16 x i8> %tmp2
-}
-
-define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
-;CHECK: vduplaneQ16:
-;CHECK: vdup.16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
- ret <8 x i16> %tmp2
-}
-
-define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
-;CHECK: vduplaneQ32:
-;CHECK: vdup.32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
- ret <4 x i32> %tmp2
-}
-
-define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
-;CHECK: vduplaneQfloat:
-;CHECK: vdup.32
- %tmp1 = load <2 x float>* %A
- %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
- ret <4 x float> %tmp2
-}
-
-define arm_apcscc <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
-entry:
- %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
- ret <2 x i64> %0
-}
-
-define arm_apcscc <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone {
-entry:
- %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
- ret <2 x i64> %0
-}
-
-define arm_apcscc <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone {
-entry:
- %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 1, i32 1>
- ret <2 x double> %0
-}
-
-define arm_apcscc <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone {
-entry:
- %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0>
- ret <2 x double> %0
-}
Removed: llvm/trunk/test/CodeGen/ARM/veor.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/veor.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/veor.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/veor.ll (removed)
@@ -1,73 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: v_eori8:
-;CHECK: veor
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = xor <8 x i8> %tmp1, %tmp2
- ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: v_eori16:
-;CHECK: veor
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = xor <4 x i16> %tmp1, %tmp2
- ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: v_eori32:
-;CHECK: veor
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = xor <2 x i32> %tmp1, %tmp2
- ret <2 x i32> %tmp3
-}
-
-define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: v_eori64:
-;CHECK: veor
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
- %tmp3 = xor <1 x i64> %tmp1, %tmp2
- ret <1 x i64> %tmp3
-}
-
-define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: v_eorQi8:
-;CHECK: veor
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
- %tmp3 = xor <16 x i8> %tmp1, %tmp2
- ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: v_eorQi16:
-;CHECK: veor
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = xor <8 x i16> %tmp1, %tmp2
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: v_eorQi32:
-;CHECK: veor
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = xor <4 x i32> %tmp1, %tmp2
- ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: v_eorQi64:
-;CHECK: veor
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
- %tmp3 = xor <2 x i64> %tmp1, %tmp2
- ret <2 x i64> %tmp3
-}
Modified: llvm/trunk/test/CodeGen/ARM/vget_lane.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vget_lane.ll?rev=83667&r1=83666&r2=83667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vget_lane.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vget_lane.ll Fri Oct 9 15:20:54 2009
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -mattr=+neon | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
;CHECK: vget_lanes8:
@@ -91,3 +93,120 @@
%tmp3 = extractelement <4 x i32> %tmp2, i32 1
ret i32 %tmp3
}
+
+define arm_aapcs_vfpcc void @test_vget_laneu16() nounwind {
+entry:
+; CHECK: vmov.u16 r0, d0[1]
+ %arg0_uint16x4_t = alloca <4 x i16> ; <<4 x i16>*> [#uses=1]
+ %out_uint16_t = alloca i16 ; <i16*> [#uses=1]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ %0 = load <4 x i16>* %arg0_uint16x4_t, align 8 ; <<4 x i16>> [#uses=1]
+ %1 = extractelement <4 x i16> %0, i32 1 ; <i16> [#uses=1]
+ store i16 %1, i16* %out_uint16_t, align 2
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define arm_aapcs_vfpcc void @test_vget_laneu8() nounwind {
+entry:
+; CHECK: vmov.u8 r0, d0[1]
+ %arg0_uint8x8_t = alloca <8 x i8> ; <<8 x i8>*> [#uses=1]
+ %out_uint8_t = alloca i8 ; <i8*> [#uses=1]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ %0 = load <8 x i8>* %arg0_uint8x8_t, align 8 ; <<8 x i8>> [#uses=1]
+ %1 = extractelement <8 x i8> %0, i32 1 ; <i8> [#uses=1]
+ store i8 %1, i8* %out_uint8_t, align 1
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define arm_aapcs_vfpcc void @test_vgetQ_laneu16() nounwind {
+entry:
+; CHECK: vmov.u16 r0, d0[1]
+ %arg0_uint16x8_t = alloca <8 x i16> ; <<8 x i16>*> [#uses=1]
+ %out_uint16_t = alloca i16 ; <i16*> [#uses=1]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ %0 = load <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1]
+ %1 = extractelement <8 x i16> %0, i32 1 ; <i16> [#uses=1]
+ store i16 %1, i16* %out_uint16_t, align 2
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define arm_aapcs_vfpcc void @test_vgetQ_laneu8() nounwind {
+entry:
+; CHECK: vmov.u8 r0, d0[1]
+ %arg0_uint8x16_t = alloca <16 x i8> ; <<16 x i8>*> [#uses=1]
+ %out_uint8_t = alloca i8 ; <i8*> [#uses=1]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ %0 = load <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1]
+ %1 = extractelement <16 x i8> %0, i32 1 ; <i8> [#uses=1]
+ store i8 %1, i8* %out_uint8_t, align 1
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+define <8 x i8> @vset_lane8(<8 x i8>* %A, i8 %B) nounwind {
+;CHECK: vset_lane8:
+;CHECK: vmov.8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = insertelement <8 x i8> %tmp1, i8 %B, i32 1
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vset_lane16(<4 x i16>* %A, i16 %B) nounwind {
+;CHECK: vset_lane16:
+;CHECK: vmov.16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = insertelement <4 x i16> %tmp1, i16 %B, i32 1
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vset_lane32(<2 x i32>* %A, i32 %B) nounwind {
+;CHECK: vset_lane32:
+;CHECK: vmov.32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = insertelement <2 x i32> %tmp1, i32 %B, i32 1
+ ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vsetQ_lane8(<16 x i8>* %A, i8 %B) nounwind {
+;CHECK: vsetQ_lane8:
+;CHECK: vmov.8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = insertelement <16 x i8> %tmp1, i8 %B, i32 1
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind {
+;CHECK: vsetQ_lane16:
+;CHECK: vmov.16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = insertelement <8 x i16> %tmp1, i16 %B, i32 1
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
+;CHECK: vsetQ_lane32:
+;CHECK: vmov.32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1
+ ret <4 x i32> %tmp2
+}
+
+define arm_aapcs_vfpcc <2 x float> @test_vset_lanef32(float %arg0_float32_t, <2 x float> %arg1_float32x2_t) nounwind {
+;CHECK: test_vset_lanef32:
+;CHECK: fcpys
+;CHECK: fcpys
+entry:
+ %0 = insertelement <2 x float> %arg1_float32x2_t, float %arg0_float32_t, i32 1 ; <<2 x float>> [#uses=1]
+ ret <2 x float> %0
+}
Removed: llvm/trunk/test/CodeGen/ARM/vget_lane2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vget_lane2.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vget_lane2.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vget_lane2.ll (removed)
@@ -1,63 +0,0 @@
-; RUN: llc < %s -mattr=+neon | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
-target triple = "thumbv7-elf"
-
-define arm_aapcs_vfpcc void @test_vget_laneu16() nounwind {
-entry:
-; CHECK: vmov.u16 r0, d0[1]
- %arg0_uint16x4_t = alloca <4 x i16> ; <<4 x i16>*> [#uses=1]
- %out_uint16_t = alloca i16 ; <i16*> [#uses=1]
- %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- %0 = load <4 x i16>* %arg0_uint16x4_t, align 8 ; <<4 x i16>> [#uses=1]
- %1 = extractelement <4 x i16> %0, i32 1 ; <i16> [#uses=1]
- store i16 %1, i16* %out_uint16_t, align 2
- br label %return
-
-return: ; preds = %entry
- ret void
-}
-
-define arm_aapcs_vfpcc void @test_vget_laneu8() nounwind {
-entry:
-; CHECK: vmov.u8 r0, d0[1]
- %arg0_uint8x8_t = alloca <8 x i8> ; <<8 x i8>*> [#uses=1]
- %out_uint8_t = alloca i8 ; <i8*> [#uses=1]
- %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- %0 = load <8 x i8>* %arg0_uint8x8_t, align 8 ; <<8 x i8>> [#uses=1]
- %1 = extractelement <8 x i8> %0, i32 1 ; <i8> [#uses=1]
- store i8 %1, i8* %out_uint8_t, align 1
- br label %return
-
-return: ; preds = %entry
- ret void
-}
-
-define arm_aapcs_vfpcc void @test_vgetQ_laneu16() nounwind {
-entry:
-; CHECK: vmov.u16 r0, d0[1]
- %arg0_uint16x8_t = alloca <8 x i16> ; <<8 x i16>*> [#uses=1]
- %out_uint16_t = alloca i16 ; <i16*> [#uses=1]
- %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- %0 = load <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1]
- %1 = extractelement <8 x i16> %0, i32 1 ; <i16> [#uses=1]
- store i16 %1, i16* %out_uint16_t, align 2
- br label %return
-
-return: ; preds = %entry
- ret void
-}
-
-define arm_aapcs_vfpcc void @test_vgetQ_laneu8() nounwind {
-entry:
-; CHECK: vmov.u8 r0, d0[1]
- %arg0_uint8x16_t = alloca <16 x i8> ; <<16 x i8>*> [#uses=1]
- %out_uint8_t = alloca i8 ; <i8*> [#uses=1]
- %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- %0 = load <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1]
- %1 = extractelement <16 x i8> %0, i32 1 ; <i8> [#uses=1]
- store i8 %1, i8* %out_uint8_t, align 1
- br label %return
-
-return: ; preds = %entry
- ret void
-}
Modified: llvm/trunk/test/CodeGen/ARM/vhadd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vhadd.ll?rev=83667&r1=83666&r2=83667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vhadd.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vhadd.ll Fri Oct 9 15:20:54 2009
@@ -123,3 +123,127 @@
declare <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i8> @vrhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrhadds8:
+;CHECK: vrhadd.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrhadds16:
+;CHECK: vrhadd.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrhadds32:
+;CHECK: vrhadd.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vrhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrhaddu8:
+;CHECK: vrhadd.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrhaddu16:
+;CHECK: vrhadd.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrhaddu32:
+;CHECK: vrhadd.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @vrhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrhaddQs8:
+;CHECK: vrhadd.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrhaddQs16:
+;CHECK: vrhadd.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrhaddQs32:
+;CHECK: vrhadd.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vrhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrhaddQu8:
+;CHECK: vrhadd.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrhaddQu16:
+;CHECK: vrhadd.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrhaddQu32:
+;CHECK: vrhadd.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vmax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmax.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vmax.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vmax.ll (removed)
@@ -1,147 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @vmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vmaxs8:
-;CHECK: vmax.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @vmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vmaxs16:
-;CHECK: vmax.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @vmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vmaxs32:
-;CHECK: vmax.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i32> %tmp3
-}
-
-define <8 x i8> @vmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vmaxu8:
-;CHECK: vmax.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @vmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vmaxu16:
-;CHECK: vmax.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @vmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vmaxu32:
-;CHECK: vmax.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i32> %tmp3
-}
-
-define <2 x float> @vmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vmaxf32:
-;CHECK: vmax.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
- %tmp3 = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
- ret <2 x float> %tmp3
-}
-
-define <16 x i8> @vmaxQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vmaxQs8:
-;CHECK: vmax.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
- %tmp3 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
- ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @vmaxQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vmaxQs16:
-;CHECK: vmax.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vmaxQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vmaxQs32:
-;CHECK: vmax.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-define <16 x i8> @vmaxQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vmaxQu8:
-;CHECK: vmax.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
- %tmp3 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
- ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @vmaxQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vmaxQu16:
-;CHECK: vmax.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vmaxQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vmaxQu32:
-;CHECK: vmax.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-define <4 x float> @vmaxQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK: vmaxQf32:
-;CHECK: vmax.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
- %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
- ret <4 x float> %tmp3
-}
-
-declare <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
-
-declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vmin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmin.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vmin.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vmin.ll (removed)
@@ -1,147 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @vmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vmins8:
-;CHECK: vmin.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @vmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vmins16:
-;CHECK: vmin.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @vmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vmins32:
-;CHECK: vmin.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i32> %tmp3
-}
-
-define <8 x i8> @vminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vminu8:
-;CHECK: vmin.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @vminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vminu16:
-;CHECK: vmin.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @vminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vminu32:
-;CHECK: vmin.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i32> %tmp3
-}
-
-define <2 x float> @vminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vminf32:
-;CHECK: vmin.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
- %tmp3 = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
- ret <2 x float> %tmp3
-}
-
-define <16 x i8> @vminQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vminQs8:
-;CHECK: vmin.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
- %tmp3 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
- ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @vminQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vminQs16:
-;CHECK: vmin.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vminQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vminQs32:
-;CHECK: vmin.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-define <16 x i8> @vminQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vminQu8:
-;CHECK: vmin.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
- %tmp3 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
- ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @vminQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vminQu16:
-;CHECK: vmin.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vminQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vminQu32:
-;CHECK: vmin.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-define <4 x float> @vminQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK: vminQf32:
-;CHECK: vmin.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
- %tmp3 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
- ret <4 x float> %tmp3
-}
-
-declare <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
-
-declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
Added: llvm/trunk/test/CodeGen/ARM/vminmax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vminmax.ll?rev=83667&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vminmax.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/vminmax.ll Fri Oct 9 15:20:54 2009
@@ -0,0 +1,293 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmins8:
+;CHECK: vmin.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmins16:
+;CHECK: vmin.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmins32:
+;CHECK: vmin.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vminu8:
+;CHECK: vmin.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vminu16:
+;CHECK: vmin.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vminu32:
+;CHECK: vmin.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vminf32:
+;CHECK: vmin.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+ ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vminQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vminQs8:
+;CHECK: vmin.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vminQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vminQs16:
+;CHECK: vmin.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vminQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vminQs32:
+;CHECK: vmin.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vminQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vminQu8:
+;CHECK: vmin.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vminQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vminQu16:
+;CHECK: vmin.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vminQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vminQu32:
+;CHECK: vmin.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vminQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vminQf32:
+;CHECK: vmin.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+ ret <4 x float> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i8> @vmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmaxs8:
+;CHECK: vmax.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmaxs16:
+;CHECK: vmax.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmaxs32:
+;CHECK: vmax.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmaxu8:
+;CHECK: vmax.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmaxu16:
+;CHECK: vmax.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmaxu32:
+;CHECK: vmax.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vmaxf32:
+;CHECK: vmax.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+ ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vmaxQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmaxQs8:
+;CHECK: vmax.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vmaxQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vmaxQs16:
+;CHECK: vmax.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmaxQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vmaxQs32:
+;CHECK: vmax.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vmaxQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmaxQu8:
+;CHECK: vmax.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vmaxQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vmaxQu16:
+;CHECK: vmax.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmaxQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vmaxQu32:
+;CHECK: vmax.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vmaxQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vmaxQf32:
+;CHECK: vmax.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+ ret <4 x float> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
Modified: llvm/trunk/test/CodeGen/ARM/vmla.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmla.ll?rev=83667&r1=83666&r2=83667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vmla.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vmla.ll Fri Oct 9 15:20:54 2009
@@ -87,3 +87,107 @@
%tmp5 = add <4 x float> %tmp1, %tmp4
ret <4 x float> %tmp5
}
+
+define <8 x i16> @vmlals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlals8:
+;CHECK: vmlal.s8
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = load <8 x i8>* %C
+ %tmp4 = call <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlals16:
+;CHECK: vmlal.s16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = load <4 x i16>* %C
+ %tmp4 = call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlals32:
+;CHECK: vmlal.s32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = load <2 x i32>* %C
+ %tmp4 = call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+ ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlalu8:
+;CHECK: vmlal.u8
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = load <8 x i8>* %C
+ %tmp4 = call <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlalu16:
+;CHECK: vmlal.u16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = load <4 x i16>* %C
+ %tmp4 = call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlalu32:
+;CHECK: vmlal.u32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = load <2 x i32>* %C
+ %tmp4 = call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+ ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_lanes16
+; CHECK: vmlal.s16 q0, d2, d3[1]
+ %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+ %1 = tail call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_lanes32
+; CHECK: vmlal.s32 q0, d2, d3[1]
+ %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = tail call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlal_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_laneu16
+; CHECK: vmlal.u16 q0, d2, d3[1]
+ %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+ %1 = tail call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlal_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_laneu32
+; CHECK: vmlal.u32 q0, d2, d3[1]
+ %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = tail call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %1
+}
+
+declare <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vmlal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmlal.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vmlal.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vmlal.ll (removed)
@@ -1,69 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i16> @vmlals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
-;CHECK: vmlals8:
-;CHECK: vmlal.s8
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i8>* %C
- %tmp4 = call <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
- ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK: vmlals16:
-;CHECK: vmlal.s16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
- %tmp4 = call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
- ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK: vmlals32:
-;CHECK: vmlal.s32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
- %tmp4 = call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
- ret <2 x i64> %tmp4
-}
-
-define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
-;CHECK: vmlalu8:
-;CHECK: vmlal.u8
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i8>* %C
- %tmp4 = call <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
- ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK: vmlalu16:
-;CHECK: vmlal.u16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
- %tmp4 = call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
- ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK: vmlalu32:
-;CHECK: vmlal.u32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
- %tmp4 = call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
- ret <2 x i64> %tmp4
-}
-
-declare <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vmlal_lane.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmlal_lane.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vmlal_lane.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vmlal_lane.ll (removed)
@@ -1,47 +0,0 @@
-; RUN: llc -mattr=+neon < %s | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
-target triple = "thumbv7-elf"
-
-define arm_aapcs_vfpcc <4 x i32> @test_vmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
-entry:
-; CHECK: test_vmlal_lanes16
-; CHECK: vmlal.s16 q0, d2, d3[1]
- %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
- %1 = tail call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %1
-}
-
-declare <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-
-define arm_aapcs_vfpcc <2 x i64> @test_vmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
-entry:
-; CHECK: test_vmlal_lanes32
-; CHECK: vmlal.s32 q0, d2, d3[1]
- %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
- %1 = tail call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %1
-}
-
-declare <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
-
-define arm_aapcs_vfpcc <4 x i32> @test_vmlal_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone {
-entry:
-; CHECK: test_vmlal_laneu16
-; CHECK: vmlal.u16 q0, d2, d3[1]
- %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
- %1 = tail call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %1
-}
-
-declare <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-
-define arm_aapcs_vfpcc <2 x i64> @test_vmlal_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone {
-entry:
-; CHECK: test_vmlal_laneu32
-; CHECK: vmlal.u32 q0, d2, d3[1]
- %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
- %1 = tail call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %1
-}
-
-declare <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
Modified: llvm/trunk/test/CodeGen/ARM/vmls.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmls.ll?rev=83667&r1=83666&r2=83667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vmls.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vmls.ll Fri Oct 9 15:20:54 2009
@@ -87,3 +87,107 @@
%tmp5 = sub <4 x float> %tmp1, %tmp4
ret <4 x float> %tmp5
}
+
+define <8 x i16> @vmlsls8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlsls8:
+;CHECK: vmlsl.s8
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = load <8 x i8>* %C
+ %tmp4 = call <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlsls16:
+;CHECK: vmlsl.s16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = load <4 x i16>* %C
+ %tmp4 = call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlsls32:
+;CHECK: vmlsl.s32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = load <2 x i32>* %C
+ %tmp4 = call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+ ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlslu8:
+;CHECK: vmlsl.u8
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = load <8 x i8>* %C
+ %tmp4 = call <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlslu16:
+;CHECK: vmlsl.u16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = load <4 x i16>* %C
+ %tmp4 = call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlslu32:
+;CHECK: vmlsl.u32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = load <2 x i32>* %C
+ %tmp4 = call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+ ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_lanes16
+; CHECK: vmlsl.s16 q0, d2, d3[1]
+ %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+ %1 = tail call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_lanes32
+; CHECK: vmlsl.s32 q0, d2, d3[1]
+ %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = tail call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_laneu16
+; CHECK: vmlsl.u16 q0, d2, d3[1]
+ %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+ %1 = tail call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_laneu32
+; CHECK: vmlsl.u32 q0, d2, d3[1]
+ %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = tail call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %1
+}
+
+declare <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vmlsl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmlsl.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vmlsl.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vmlsl.ll (removed)
@@ -1,69 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i16> @vmlsls8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
-;CHECK: vmlsls8:
-;CHECK: vmlsl.s8
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i8>* %C
- %tmp4 = call <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
- ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK: vmlsls16:
-;CHECK: vmlsl.s16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
- %tmp4 = call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
- ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK: vmlsls32:
-;CHECK: vmlsl.s32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
- %tmp4 = call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
- ret <2 x i64> %tmp4
-}
-
-define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
-;CHECK: vmlslu8:
-;CHECK: vmlsl.u8
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i8>* %C
- %tmp4 = call <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
- ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK: vmlslu16:
-;CHECK: vmlsl.u16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
- %tmp4 = call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
- ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK: vmlslu32:
-;CHECK: vmlsl.u32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
- %tmp4 = call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
- ret <2 x i64> %tmp4
-}
-
-declare <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vmlsl_lane.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmlsl_lane.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vmlsl_lane.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vmlsl_lane.ll (removed)
@@ -1,47 +0,0 @@
-; RUN: llc -mattr=+neon < %s | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
-target triple = "thumbv7-elf"
-
-define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
-entry:
-; CHECK: test_vmlsl_lanes16
-; CHECK: vmlsl.s16 q0, d2, d3[1]
- %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
- %1 = tail call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %1
-}
-
-declare <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-
-define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
-entry:
-; CHECK: test_vmlsl_lanes32
-; CHECK: vmlsl.s32 q0, d2, d3[1]
- %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
- %1 = tail call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %1
-}
-
-declare <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
-
-define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone {
-entry:
-; CHECK: test_vmlsl_laneu16
-; CHECK: vmlsl.u16 q0, d2, d3[1]
- %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
- %1 = tail call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %1
-}
-
-declare <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-
-define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone {
-entry:
-; CHECK: test_vmlsl_laneu32
-; CHECK: vmlsl.u32 q0, d2, d3[1]
- %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
- %1 = tail call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %1
-}
-
-declare <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
Modified: llvm/trunk/test/CodeGen/ARM/vmov.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmov.ll?rev=83667&r1=83666&r2=83667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vmov.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vmov.ll Fri Oct 9 15:20:54 2009
@@ -133,3 +133,171 @@
;CHECK: vmov.i64
ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
}
+
+define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
+;CHECK: vmovls8:
+;CHECK: vmovl.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8> %tmp1)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind {
+;CHECK: vmovls16:
+;CHECK: vmovl.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind {
+;CHECK: vmovls32:
+;CHECK: vmovl.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32> %tmp1)
+ ret <2 x i64> %tmp2
+}
+
+define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind {
+;CHECK: vmovlu8:
+;CHECK: vmovl.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8> %tmp1)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind {
+;CHECK: vmovlu16:
+;CHECK: vmovl.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind {
+;CHECK: vmovlu32:
+;CHECK: vmovl.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32> %tmp1)
+ ret <2 x i64> %tmp2
+}
+
+declare <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32>) nounwind readnone
+
+define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind {
+;CHECK: vmovni16:
+;CHECK: vmovn.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16> %tmp1)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind {
+;CHECK: vmovni32:
+;CHECK: vmovn.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32> %tmp1)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind {
+;CHECK: vmovni64:
+;CHECK: vmovn.i64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64>) nounwind readnone
+
+define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
+;CHECK: vqmovns16:
+;CHECK: vqmovn.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %tmp1)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqmovns32(<4 x i32>* %A) nounwind {
+;CHECK: vqmovns32:
+;CHECK: vqmovn.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %tmp1)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqmovns64(<2 x i64>* %A) nounwind {
+;CHECK: vqmovns64:
+;CHECK: vqmovn.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqmovnu16(<8 x i16>* %A) nounwind {
+;CHECK: vqmovnu16:
+;CHECK: vqmovn.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %tmp1)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqmovnu32(<4 x i32>* %A) nounwind {
+;CHECK: vqmovnu32:
+;CHECK: vqmovn.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %tmp1)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqmovnu64(<2 x i64>* %A) nounwind {
+;CHECK: vqmovnu64:
+;CHECK: vqmovn.u64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqmovuns16(<8 x i16>* %A) nounwind {
+;CHECK: vqmovuns16:
+;CHECK: vqmovun.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %tmp1)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqmovuns32(<4 x i32>* %A) nounwind {
+;CHECK: vqmovuns32:
+;CHECK: vqmovun.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %tmp1)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqmovuns64(<2 x i64>* %A) nounwind {
+;CHECK: vqmovuns64:
+;CHECK: vqmovun.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vmovl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmovl.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vmovl.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vmovl.ll (removed)
@@ -1,57 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
-;CHECK: vmovls8:
-;CHECK: vmovl.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = call <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8> %tmp1)
- ret <8 x i16> %tmp2
-}
-
-define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind {
-;CHECK: vmovls16:
-;CHECK: vmovl.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %tmp1)
- ret <4 x i32> %tmp2
-}
-
-define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind {
-;CHECK: vmovls32:
-;CHECK: vmovl.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = call <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32> %tmp1)
- ret <2 x i64> %tmp2
-}
-
-define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind {
-;CHECK: vmovlu8:
-;CHECK: vmovl.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = call <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8> %tmp1)
- ret <8 x i16> %tmp2
-}
-
-define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind {
-;CHECK: vmovlu16:
-;CHECK: vmovl.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = call <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16> %tmp1)
- ret <4 x i32> %tmp2
-}
-
-define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind {
-;CHECK: vmovlu32:
-;CHECK: vmovl.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = call <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32> %tmp1)
- ret <2 x i64> %tmp2
-}
-
-declare <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vmovn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmovn.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vmovn.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vmovn.ll (removed)
@@ -1,29 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind {
-;CHECK: vmovni16:
-;CHECK: vmovn.i16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = call <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16> %tmp1)
- ret <8 x i8> %tmp2
-}
-
-define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind {
-;CHECK: vmovni32:
-;CHECK: vmovn.i32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = call <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32> %tmp1)
- ret <4 x i16> %tmp2
-}
-
-define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind {
-;CHECK: vmovni64:
-;CHECK: vmovn.i64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64> %tmp1)
- ret <2 x i32> %tmp2
-}
-
-declare <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64>) nounwind readnone
Modified: llvm/trunk/test/CodeGen/ARM/vmul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmul.ll?rev=83667&r1=83666&r2=83667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vmul.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vmul.ll Fri Oct 9 15:20:54 2009
@@ -92,3 +92,166 @@
declare <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+
+define arm_aapcs_vfpcc <2 x float> @test_vmul_lanef32(<2 x float> %arg0_float32x2_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmul_lanef32:
+; CHECK: vmul.f32 d0, d0, d1[0]
+ %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <2 x i32> zeroinitializer ; <<2 x float>> [#uses=1]
+ %1 = fmul <2 x float> %0, %arg0_float32x2_t ; <<2 x float>> [#uses=1]
+ ret <2 x float> %1
+}
+
+define arm_aapcs_vfpcc <4 x i16> @test_vmul_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmul_lanes16:
+; CHECK: vmul.i16 d0, d0, d1[1]
+ %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses$
+ %1 = mul <4 x i16> %0, %arg0_int16x4_t ; <<4 x i16>> [#uses=1]
+ ret <4 x i16> %1
+}
+
+define arm_aapcs_vfpcc <2 x i32> @test_vmul_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmul_lanes32:
+; CHECK: vmul.i32 d0, d0, d1[1]
+ %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = mul <2 x i32> %0, %arg0_int32x2_t ; <<2 x i32>> [#uses=1]
+ ret <2 x i32> %1
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vmulQ_lanef32(<4 x float> %arg0_float32x4_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmulQ_lanef32:
+; CHECK: vmul.f32 q0, q0, d2[1]
+ %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>$
+ %1 = fmul <4 x float> %0, %arg0_float32x4_t ; <<4 x float>> [#uses=1]
+ ret <4 x float> %1
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmulQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmulQ_lanes16:
+; CHECK: vmul.i16 q0, q0, d2[1]
+ %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %1 = mul <8 x i16> %0, %arg0_int16x8_t ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmulQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmulQ_lanes32:
+; CHECK: vmul.i32 q0, q0, d2[1]
+ %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses$
+ %1 = mul <4 x i32> %0, %arg0_int32x4_t ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmulls8:
+;CHECK: vmull.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmulls16:
+;CHECK: vmull.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmulls32:
+;CHECK: vmull.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmullu8:
+;CHECK: vmull.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmullu16:
+;CHECK: vmull.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmullu32:
+;CHECK: vmull.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmullp8:
+;CHECK: vmull.p8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmull_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_lanes16
+; CHECK: vmull.s16 q0, d0, d1[1]
+ %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+ %1 = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_lanes32
+; CHECK: vmull.s32 q0, d0, d1[1]
+ %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_laneu16
+; CHECK: vmull.u16 q0, d0, d1[1]
+ %0 = shufflevector <4 x i16> %arg1_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+ %1 = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %arg0_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_laneu32
+; CHECK: vmull.u32 q0, d0, d1[1]
+ %0 = shufflevector <2 x i32> %arg1_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %arg0_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %1
+}
+
+declare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vmul_lane.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmul_lane.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vmul_lane.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vmul_lane.ll (removed)
@@ -1,57 +0,0 @@
-; RUN: llc -mattr=+neon < %s | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
-target triple = "thumbv7-elf"
-
-define arm_aapcs_vfpcc <2 x float> @test_vmul_lanef32(<2 x float> %arg0_float32x2_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
-entry:
-; CHECK: test_vmul_lanef32:
-; CHECK: vmul.f32 d0, d0, d1[0]
- %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <2 x i32> zeroinitializer ; <<2 x float>> [#uses=1]
- %1 = fmul <2 x float> %0, %arg0_float32x2_t ; <<2 x float>> [#uses=1]
- ret <2 x float> %1
-}
-
-define arm_aapcs_vfpcc <4 x i16> @test_vmul_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
-entry:
-; CHECK: test_vmul_lanes16:
-; CHECK: vmul.i16 d0, d0, d1[1]
- %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses$
- %1 = mul <4 x i16> %0, %arg0_int16x4_t ; <<4 x i16>> [#uses=1]
- ret <4 x i16> %1
-}
-
-define arm_aapcs_vfpcc <2 x i32> @test_vmul_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
-entry:
-; CHECK: test_vmul_lanes32:
-; CHECK: vmul.i32 d0, d0, d1[1]
- %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
- %1 = mul <2 x i32> %0, %arg0_int32x2_t ; <<2 x i32>> [#uses=1]
- ret <2 x i32> %1
-}
-
-define arm_aapcs_vfpcc <4 x float> @test_vmulQ_lanef32(<4 x float> %arg0_float32x4_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
-entry:
-; CHECK: test_vmulQ_lanef32:
-; CHECK: vmul.f32 q0, q0, d2[1]
- %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>$
- %1 = fmul <4 x float> %0, %arg0_float32x4_t ; <<4 x float>> [#uses=1]
- ret <4 x float> %1
-}
-
-define arm_aapcs_vfpcc <8 x i16> @test_vmulQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
-entry:
-; CHECK: test_vmulQ_lanes16:
-; CHECK: vmul.i16 q0, q0, d2[1]
- %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
- %1 = mul <8 x i16> %0, %arg0_int16x8_t ; <<8 x i16>> [#uses=1]
- ret <8 x i16> %1
-}
-
-define arm_aapcs_vfpcc <4 x i32> @test_vmulQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
-entry:
-; CHECK: test_vmulQ_lanes32:
-; CHECK: vmul.i32 q0, q0, d2[1]
- %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses$
- %1 = mul <4 x i32> %0, %arg0_int32x4_t ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %1
-}
Removed: llvm/trunk/test/CodeGen/ARM/vmull.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmull.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vmull.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vmull.ll (removed)
@@ -1,74 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vmulls8:
-;CHECK: vmull.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vmulls16:
-;CHECK: vmull.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vmulls32:
-;CHECK: vmull.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
-}
-
-define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vmullu8:
-;CHECK: vmull.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vmullu16:
-;CHECK: vmull.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vmullu32:
-;CHECK: vmull.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
-}
-
-define <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vmullp8:
-;CHECK: vmull.p8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-declare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vmull_lane.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmull_lane.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vmull_lane.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vmull_lane.ll (removed)
@@ -1,47 +0,0 @@
-; RUN: llc -mattr=+neon < %s | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
-target triple = "thumbv7-elf"
-
-define arm_aapcs_vfpcc <4 x i32> @test_vmull_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
-entry:
-; CHECK: test_vmull_lanes16
-; CHECK: vmull.s16 q0, d0, d1[1]
- %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
- %1 = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %1
-}
-
-declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-
-define arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
-entry:
-; CHECK: test_vmull_lanes32
-; CHECK: vmull.s32 q0, d0, d1[1]
- %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
- %1 = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %1
-}
-
-declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
-
-define arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone {
-entry:
-; CHECK: test_vmull_laneu16
-; CHECK: vmull.u16 q0, d0, d1[1]
- %0 = shufflevector <4 x i16> %arg1_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
- %1 = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %arg0_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %1
-}
-
-declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-
-define arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone {
-entry:
-; CHECK: test_vmull_laneu32
-; CHECK: vmull.u32 q0, d0, d1[1]
- %0 = shufflevector <2 x i32> %arg1_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
- %1 = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %arg0_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %1
-}
-
-declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vmvn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vmvn.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vmvn.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vmvn.ll (removed)
@@ -1,65 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind {
-;CHECK: v_mvni8:
-;CHECK: vmvn
- %tmp1 = load <8 x i8>* %A
- %tmp2 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
- ret <8 x i8> %tmp2
-}
-
-define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind {
-;CHECK: v_mvni16:
-;CHECK: vmvn
- %tmp1 = load <4 x i16>* %A
- %tmp2 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 >
- ret <4 x i16> %tmp2
-}
-
-define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind {
-;CHECK: v_mvni32:
-;CHECK: vmvn
- %tmp1 = load <2 x i32>* %A
- %tmp2 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 >
- ret <2 x i32> %tmp2
-}
-
-define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind {
-;CHECK: v_mvni64:
-;CHECK: vmvn
- %tmp1 = load <1 x i64>* %A
- %tmp2 = xor <1 x i64> %tmp1, < i64 -1 >
- ret <1 x i64> %tmp2
-}
-
-define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind {
-;CHECK: v_mvnQi8:
-;CHECK: vmvn
- %tmp1 = load <16 x i8>* %A
- %tmp2 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
- ret <16 x i8> %tmp2
-}
-
-define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind {
-;CHECK: v_mvnQi16:
-;CHECK: vmvn
- %tmp1 = load <8 x i16>* %A
- %tmp2 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
- ret <8 x i16> %tmp2
-}
-
-define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind {
-;CHECK: v_mvnQi32:
-;CHECK: vmvn
- %tmp1 = load <4 x i32>* %A
- %tmp2 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 >
- ret <4 x i32> %tmp2
-}
-
-define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind {
-;CHECK: v_mvnQi64:
-;CHECK: vmvn
- %tmp1 = load <2 x i64>* %A
- %tmp2 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 >
- ret <2 x i64> %tmp2
-}
Modified: llvm/trunk/test/CodeGen/ARM/vneg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vneg.ll?rev=83667&r1=83666&r2=83667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vneg.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vneg.ll Fri Oct 9 15:20:54 2009
@@ -63,3 +63,59 @@
%tmp2 = sub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1
ret <4 x float> %tmp2
}
+
+define <8 x i8> @vqnegs8(<8 x i8>* %A) nounwind {
+;CHECK: vqnegs8:
+;CHECK: vqneg.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %tmp1)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqnegs16(<4 x i16>* %A) nounwind {
+;CHECK: vqnegs16:
+;CHECK: vqneg.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %tmp1)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqnegs32(<2 x i32>* %A) nounwind {
+;CHECK: vqnegs32:
+;CHECK: vqneg.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vqnegQs8(<16 x i8>* %A) nounwind {
+;CHECK: vqnegQs8:
+;CHECK: vqneg.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %tmp1)
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vqnegQs16(<8 x i16>* %A) nounwind {
+;CHECK: vqnegQs16:
+;CHECK: vqneg.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %tmp1)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vqnegQs32(<4 x i32>* %A) nounwind {
+;CHECK: vqnegQs32:
+;CHECK: vqneg.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vorn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vorn.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vorn.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vorn.ll (removed)
@@ -1,81 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: v_orni8:
-;CHECK: vorn
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
- %tmp4 = or <8 x i8> %tmp1, %tmp3
- ret <8 x i8> %tmp4
-}
-
-define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: v_orni16:
-;CHECK: vorn
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
- %tmp4 = or <4 x i16> %tmp1, %tmp3
- ret <4 x i16> %tmp4
-}
-
-define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: v_orni32:
-;CHECK: vorn
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
- %tmp4 = or <2 x i32> %tmp1, %tmp3
- ret <2 x i32> %tmp4
-}
-
-define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: v_orni64:
-;CHECK: vorn
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
- %tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
- %tmp4 = or <1 x i64> %tmp1, %tmp3
- ret <1 x i64> %tmp4
-}
-
-define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: v_ornQi8:
-;CHECK: vorn
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
- %tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
- %tmp4 = or <16 x i8> %tmp1, %tmp3
- ret <16 x i8> %tmp4
-}
-
-define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: v_ornQi16:
-;CHECK: vorn
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
- %tmp4 = or <8 x i16> %tmp1, %tmp3
- ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: v_ornQi32:
-;CHECK: vorn
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
- %tmp4 = or <4 x i32> %tmp1, %tmp3
- ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: v_ornQi64:
-;CHECK: vorn
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
- %tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
- %tmp4 = or <2 x i64> %tmp1, %tmp3
- ret <2 x i64> %tmp4
-}
Removed: llvm/trunk/test/CodeGen/ARM/vorr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vorr.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vorr.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vorr.ll (removed)
@@ -1,73 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: v_orri8:
-;CHECK: vorr
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = or <8 x i8> %tmp1, %tmp2
- ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: v_orri16:
-;CHECK: vorr
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = or <4 x i16> %tmp1, %tmp2
- ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: v_orri32:
-;CHECK: vorr
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = or <2 x i32> %tmp1, %tmp2
- ret <2 x i32> %tmp3
-}
-
-define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: v_orri64:
-;CHECK: vorr
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
- %tmp3 = or <1 x i64> %tmp1, %tmp2
- ret <1 x i64> %tmp3
-}
-
-define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: v_orrQi8:
-;CHECK: vorr
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
- %tmp3 = or <16 x i8> %tmp1, %tmp2
- ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: v_orrQi16:
-;CHECK: vorr
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = or <8 x i16> %tmp1, %tmp2
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: v_orrQi32:
-;CHECK: vorr
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = or <4 x i32> %tmp1, %tmp2
- ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: v_orrQi64:
-;CHECK: vorr
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
- %tmp3 = or <2 x i64> %tmp1, %tmp2
- ret <2 x i64> %tmp3
-}
Modified: llvm/trunk/test/CodeGen/ARM/vpadd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vpadd.ll?rev=83667&r1=83666&r2=83667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vpadd.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vpadd.ll Fri Oct 9 15:20:54 2009
@@ -41,3 +41,115 @@
declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+define <4 x i16> @vpaddls8(<8 x i8>* %A) nounwind {
+;CHECK: vpaddls8:
+;CHECK: vpaddl.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %tmp1)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vpaddls16(<4 x i16>* %A) nounwind {
+;CHECK: vpaddls16:
+;CHECK: vpaddl.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vpaddls32(<2 x i32>* %A) nounwind {
+;CHECK: vpaddls32:
+;CHECK: vpaddl.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %tmp1)
+ ret <1 x i64> %tmp2
+}
+
+define <4 x i16> @vpaddlu8(<8 x i8>* %A) nounwind {
+;CHECK: vpaddlu8:
+;CHECK: vpaddl.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %tmp1)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vpaddlu16(<4 x i16>* %A) nounwind {
+;CHECK: vpaddlu16:
+;CHECK: vpaddl.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vpaddlu32(<2 x i32>* %A) nounwind {
+;CHECK: vpaddlu32:
+;CHECK: vpaddl.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %tmp1)
+ ret <1 x i64> %tmp2
+}
+
+define <8 x i16> @vpaddlQs8(<16 x i8>* %A) nounwind {
+;CHECK: vpaddlQs8:
+;CHECK: vpaddl.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %tmp1)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vpaddlQs16(<8 x i16>* %A) nounwind {
+;CHECK: vpaddlQs16:
+;CHECK: vpaddl.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vpaddlQs32(<4 x i32>* %A) nounwind {
+;CHECK: vpaddlQs32:
+;CHECK: vpaddl.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %tmp1)
+ ret <2 x i64> %tmp2
+}
+
+define <8 x i16> @vpaddlQu8(<16 x i8>* %A) nounwind {
+;CHECK: vpaddlQu8:
+;CHECK: vpaddl.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %tmp1)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vpaddlQu16(<8 x i16>* %A) nounwind {
+;CHECK: vpaddlQu16:
+;CHECK: vpaddl.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vpaddlQu32(<4 x i32>* %A) nounwind {
+;CHECK: vpaddlQu32:
+;CHECK: vpaddl.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %tmp1)
+ ret <2 x i64> %tmp2
+}
+
+declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) nounwind readnone
+
+declare <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vpaddl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vpaddl.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vpaddl.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vpaddl.ll (removed)
@@ -1,113 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <4 x i16> @vpaddls8(<8 x i8>* %A) nounwind {
-;CHECK: vpaddls8:
-;CHECK: vpaddl.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %tmp1)
- ret <4 x i16> %tmp2
-}
-
-define <2 x i32> @vpaddls16(<4 x i16>* %A) nounwind {
-;CHECK: vpaddls16:
-;CHECK: vpaddl.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %tmp1)
- ret <2 x i32> %tmp2
-}
-
-define <1 x i64> @vpaddls32(<2 x i32>* %A) nounwind {
-;CHECK: vpaddls32:
-;CHECK: vpaddl.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %tmp1)
- ret <1 x i64> %tmp2
-}
-
-define <4 x i16> @vpaddlu8(<8 x i8>* %A) nounwind {
-;CHECK: vpaddlu8:
-;CHECK: vpaddl.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %tmp1)
- ret <4 x i16> %tmp2
-}
-
-define <2 x i32> @vpaddlu16(<4 x i16>* %A) nounwind {
-;CHECK: vpaddlu16:
-;CHECK: vpaddl.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %tmp1)
- ret <2 x i32> %tmp2
-}
-
-define <1 x i64> @vpaddlu32(<2 x i32>* %A) nounwind {
-;CHECK: vpaddlu32:
-;CHECK: vpaddl.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %tmp1)
- ret <1 x i64> %tmp2
-}
-
-define <8 x i16> @vpaddlQs8(<16 x i8>* %A) nounwind {
-;CHECK: vpaddlQs8:
-;CHECK: vpaddl.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %tmp1)
- ret <8 x i16> %tmp2
-}
-
-define <4 x i32> @vpaddlQs16(<8 x i16>* %A) nounwind {
-;CHECK: vpaddlQs16:
-;CHECK: vpaddl.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %tmp1)
- ret <4 x i32> %tmp2
-}
-
-define <2 x i64> @vpaddlQs32(<4 x i32>* %A) nounwind {
-;CHECK: vpaddlQs32:
-;CHECK: vpaddl.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %tmp1)
- ret <2 x i64> %tmp2
-}
-
-define <8 x i16> @vpaddlQu8(<16 x i8>* %A) nounwind {
-;CHECK: vpaddlQu8:
-;CHECK: vpaddl.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %tmp1)
- ret <8 x i16> %tmp2
-}
-
-define <4 x i32> @vpaddlQu16(<8 x i16>* %A) nounwind {
-;CHECK: vpaddlQu16:
-;CHECK: vpaddl.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %tmp1)
- ret <4 x i32> %tmp2
-}
-
-define <2 x i64> @vpaddlQu32(<4 x i32>* %A) nounwind {
-;CHECK: vpaddlQu32:
-;CHECK: vpaddl.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %tmp1)
- ret <2 x i64> %tmp2
-}
-
-declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) nounwind readnone
-declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) nounwind readnone
-
-declare <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16>) nounwind readnone
-declare <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vpmax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vpmax.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vpmax.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vpmax.ll (removed)
@@ -1,74 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @vpmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vpmaxs8:
-;CHECK: vpmax.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @vpmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vpmaxs16:
-;CHECK: vpmax.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @vpmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vpmaxs32:
-;CHECK: vpmax.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i32> %tmp3
-}
-
-define <8 x i8> @vpmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vpmaxu8:
-;CHECK: vpmax.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @vpmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vpmaxu16:
-;CHECK: vpmax.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @vpmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vpmaxu32:
-;CHECK: vpmax.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i32> %tmp3
-}
-
-define <2 x float> @vpmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vpmaxf32:
-;CHECK: vpmax.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
- %tmp3 = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
- ret <2 x float> %tmp3
-}
-
-declare <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vpmin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vpmin.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vpmin.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vpmin.ll (removed)
@@ -1,74 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @vpmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vpmins8:
-;CHECK: vpmin.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @vpmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vpmins16:
-;CHECK: vpmin.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @vpmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vpmins32:
-;CHECK: vpmin.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i32> %tmp3
-}
-
-define <8 x i8> @vpminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vpminu8:
-;CHECK: vpmin.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @vpminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vpminu16:
-;CHECK: vpmin.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @vpminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vpminu32:
-;CHECK: vpmin.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i32> %tmp3
-}
-
-define <2 x float> @vpminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vpminf32:
-;CHECK: vpmin.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
- %tmp3 = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
- ret <2 x float> %tmp3
-}
-
-declare <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
Added: llvm/trunk/test/CodeGen/ARM/vpminmax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vpminmax.ll?rev=83667&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vpminmax.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/vpminmax.ll Fri Oct 9 15:20:54 2009
@@ -0,0 +1,147 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vpmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpmins8:
+;CHECK: vpmin.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpmins16:
+;CHECK: vpmin.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpmins32:
+;CHECK: vpmin.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vpminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpminu8:
+;CHECK: vpmin.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpminu16:
+;CHECK: vpmin.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpminu32:
+;CHECK: vpmin.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vpminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vpminf32:
+;CHECK: vpmin.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+ ret <2 x float> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+define <8 x i8> @vpmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpmaxs8:
+;CHECK: vpmax.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpmaxs16:
+;CHECK: vpmax.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpmaxs32:
+;CHECK: vpmax.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vpmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpmaxu8:
+;CHECK: vpmax.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpmaxu16:
+;CHECK: vpmax.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpmaxu32:
+;CHECK: vpmax.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vpmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vpmaxf32:
+;CHECK: vpmax.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+ ret <2 x float> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vqRdmulh_lane.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqRdmulh_lane.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vqRdmulh_lane.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vqRdmulh_lane.ll (removed)
@@ -1,47 +0,0 @@
-; RUN: llc -mattr=+neon < %s | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
-target triple = "thumbv7-elf"
-
-define arm_aapcs_vfpcc <8 x i16> @test_vqRdmulhQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
-entry:
-; CHECK: test_vqRdmulhQ_lanes16
-; CHECK: vqrdmulh.s16 q0, q0, d2[1]
- %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; <<8 x i16>> [#uses=1]
- %1 = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %arg0_int16x8_t, <8 x i16> %0) ; <<8 x i16>> [#uses=1]
- ret <8 x i16> %1
-}
-
-declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-
-define arm_aapcs_vfpcc <4 x i32> @test_vqRdmulhQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
-entry:
-; CHECK: test_vqRdmulhQ_lanes32
-; CHECK: vqrdmulh.s32 q0, q0, d2[1]
- %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses=1]
- %1 = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %1
-}
-
-declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-define arm_aapcs_vfpcc <4 x i16> @test_vqRdmulh_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
-entry:
-; CHECK: test_vqRdmulh_lanes16
-; CHECK: vqrdmulh.s16 d0, d0, d1[1]
- %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
- %1 = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i16>> [#uses=1]
- ret <4 x i16> %1
-}
-
-declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-
-define arm_aapcs_vfpcc <2 x i32> @test_vqRdmulh_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
-entry:
-; CHECK: test_vqRdmulh_lanes32
-; CHECK: vqrdmulh.s32 d0, d0, d1[1]
- %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
- %1 = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i32>> [#uses=1]
- ret <2 x i32> %1
-}
-
-declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vqabs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqabs.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vqabs.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vqabs.ll (removed)
@@ -1,57 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @vqabss8(<8 x i8>* %A) nounwind {
-;CHECK: vqabss8:
-;CHECK: vqabs.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %tmp1)
- ret <8 x i8> %tmp2
-}
-
-define <4 x i16> @vqabss16(<4 x i16>* %A) nounwind {
-;CHECK: vqabss16:
-;CHECK: vqabs.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %tmp1)
- ret <4 x i16> %tmp2
-}
-
-define <2 x i32> @vqabss32(<2 x i32>* %A) nounwind {
-;CHECK: vqabss32:
-;CHECK: vqabs.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %tmp1)
- ret <2 x i32> %tmp2
-}
-
-define <16 x i8> @vqabsQs8(<16 x i8>* %A) nounwind {
-;CHECK: vqabsQs8:
-;CHECK: vqabs.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %tmp1)
- ret <16 x i8> %tmp2
-}
-
-define <8 x i16> @vqabsQs16(<8 x i16>* %A) nounwind {
-;CHECK: vqabsQs16:
-;CHECK: vqabs.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %tmp1)
- ret <8 x i16> %tmp2
-}
-
-define <4 x i32> @vqabsQs32(<4 x i32>* %A) nounwind {
-;CHECK: vqabsQs32:
-;CHECK: vqabs.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %tmp1)
- ret <4 x i32> %tmp2
-}
-
-declare <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32>) nounwind readnone
-
-declare <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vqdmlal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqdmlal.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vqdmlal.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vqdmlal.ll (removed)
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <4 x i32> @vqdmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK: vqdmlals16:
-;CHECK: vqdmlal.s16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
- %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
- ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @vqdmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK: vqdmlals32:
-;CHECK: vqdmlal.s32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
- %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
- ret <2 x i64> %tmp4
-}
-
-declare <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vqdmlal_lanes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqdmlal_lanes.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vqdmlal_lanes.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vqdmlal_lanes.ll (removed)
@@ -1,25 +0,0 @@
-; RUN: llc -mattr=+neon < %s | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
-target triple = "thumbv7-elf"
-
-define arm_aapcs_vfpcc <4 x i32> @test_vqdmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
-entry:
-; CHECK: test_vqdmlal_lanes16
-; CHECK: vqdmlal.s16 q0, d2, d3[1]
- %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
- %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %1
-}
-
-declare <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-
-define arm_aapcs_vfpcc <2 x i64> @test_vqdmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
-entry:
-; CHECK: test_vqdmlal_lanes32
-; CHECK: vqdmlal.s32 q0, d2, d3[1]
- %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
- %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %1
-}
-
-declare <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vqdmlsl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqdmlsl.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vqdmlsl.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vqdmlsl.ll (removed)
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <4 x i32> @vqdmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK: vqdmlsls16:
-;CHECK: vqdmlsl.s16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
- %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
- ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @vqdmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK: vqdmlsls32:
-;CHECK: vqdmlsl.s32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
- %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
- ret <2 x i64> %tmp4
-}
-
-declare <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vqdmlsl_lanes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqdmlsl_lanes.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vqdmlsl_lanes.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vqdmlsl_lanes.ll (removed)
@@ -1,25 +0,0 @@
-; RUN: llc -mattr=+neon < %s | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
-target triple = "thumbv7-elf"
-
-define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
-entry:
-; CHECK: test_vqdmlsl_lanes16
-; CHECK: vqdmlsl.s16 q0, d2, d3[1]
- %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
- %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %1
-}
-
-declare <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-
-define arm_aapcs_vfpcc <2 x i64> @test_vqdmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
-entry:
-; CHECK: test_vqdmlsl_lanes32
-; CHECK: vqdmlsl.s32 q0, d2, d3[1]
- %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
- %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %1
-}
-
-declare <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
Added: llvm/trunk/test/CodeGen/ARM/vqdmul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqdmul.ll?rev=83667&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vqdmul.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/vqdmul.ll Fri Oct 9 15:20:54 2009
@@ -0,0 +1,281 @@
+; RUN: llc -mattr=+neon < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define <4 x i16> @vqdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqdmulhs16:
+;CHECK: vqdmulh.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqdmulhs32:
+;CHECK: vqdmulh.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <8 x i16> @vqdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqdmulhQs16:
+;CHECK: vqdmulh.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqdmulhQs32:
+;CHECK: vqdmulh.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqdmulhQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulhQ_lanes16
+; CHECK: vqdmulh.s16 q0, q0, d2[1]
+ %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; <<8 x i16>> [#uses=1]
+ %1 = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %arg0_int16x8_t, <8 x i16> %0) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmulhQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulhQ_lanes32
+; CHECK: vqdmulh.s32 q0, q0, d2[1]
+ %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses=1]
+ %1 = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <4 x i16> @test_vqdmulh_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulh_lanes16
+; CHECK: vqdmulh.s16 d0, d0, d1[1]
+ %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+ %1 = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i16>> [#uses=1]
+ ret <4 x i16> %1
+}
+
+define arm_aapcs_vfpcc <2 x i32> @test_vqdmulh_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulh_lanes32
+; CHECK: vqdmulh.s32 d0, d0, d1[1]
+ %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i32>> [#uses=1]
+ ret <2 x i32> %1
+}
+
+declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i16> @vqrdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqrdmulhs16:
+;CHECK: vqrdmulh.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqrdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqrdmulhs32:
+;CHECK: vqrdmulh.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <8 x i16> @vqrdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqrdmulhQs16:
+;CHECK: vqrdmulh.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqrdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqrdmulhQs32:
+;CHECK: vqrdmulh.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqRdmulhQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulhQ_lanes16
+; CHECK: vqrdmulh.s16 q0, q0, d2[1]
+ %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; <<8 x i16>> [#uses=1]
+ %1 = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %arg0_int16x8_t, <8 x i16> %0) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqRdmulhQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulhQ_lanes32
+; CHECK: vqrdmulh.s32 q0, q0, d2[1]
+ %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses=1]
+ %1 = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <4 x i16> @test_vqRdmulh_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulh_lanes16
+; CHECK: vqrdmulh.s16 d0, d0, d1[1]
+ %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+ %1 = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i16>> [#uses=1]
+ ret <4 x i16> %1
+}
+
+define arm_aapcs_vfpcc <2 x i32> @test_vqRdmulh_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulh_lanes32
+; CHECK: vqrdmulh.s32 d0, d0, d1[1]
+ %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i32>> [#uses=1]
+ ret <2 x i32> %1
+}
+
+declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @vqdmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqdmulls16:
+;CHECK: vqdmull.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqdmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqdmulls32:
+;CHECK: vqdmull.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmull_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmull_lanes16
+; CHECK: vqdmull.s16 q0, d0, d1[1]
+ %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+ %1 = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmull_lanes32
+; CHECK: vqdmull.s32 q0, d0, d1[1]
+ %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %1
+}
+
+declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+define <4 x i32> @vqdmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vqdmlals16:
+;CHECK: vqdmlal.s16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = load <4 x i16>* %C
+ %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vqdmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vqdmlals32:
+;CHECK: vqdmlal.s32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = load <2 x i32>* %C
+ %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+ ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlal_lanes16
+; CHECK: vqdmlal.s16 q0, d2, d3[1]
+ %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+ %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlal_lanes32
+; CHECK: vqdmlal.s32 q0, d2, d3[1]
+ %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %1
+}
+
+declare <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+
+define <4 x i32> @vqdmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vqdmlsls16:
+;CHECK: vqdmlsl.s16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = load <4 x i16>* %C
+ %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+ ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vqdmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vqdmlsls32:
+;CHECK: vqdmlsl.s32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = load <2 x i32>* %C
+ %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+ ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlsl_lanes16
+; CHECK: vqdmlsl.s16 q0, d2, d3[1]
+ %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+ %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlsl_lanes32
+; CHECK: vqdmlsl.s32 q0, d2, d3[1]
+ %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+ %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %1
+}
+
+declare <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vqdmulh.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqdmulh.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vqdmulh.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vqdmulh.ll (removed)
@@ -1,85 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <4 x i16> @vqdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vqdmulhs16:
-;CHECK: vqdmulh.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @vqdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vqdmulhs32:
-;CHECK: vqdmulh.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i32> %tmp3
-}
-
-define <8 x i16> @vqdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vqdmulhQs16:
-;CHECK: vqdmulh.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vqdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vqdmulhQs32:
-;CHECK: vqdmulh.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <4 x i16> @vqrdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vqrdmulhs16:
-;CHECK: vqrdmulh.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @vqrdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vqrdmulhs32:
-;CHECK: vqrdmulh.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i32> %tmp3
-}
-
-define <8 x i16> @vqrdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vqrdmulhQs16:
-;CHECK: vqrdmulh.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vqrdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vqrdmulhQs32:
-;CHECK: vqrdmulh.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vqdmulh_lane.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqdmulh_lane.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vqdmulh_lane.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vqdmulh_lane.ll (removed)
@@ -1,47 +0,0 @@
-; RUN: llc -mattr=+neon < %s | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
-target triple = "thumbv7-elf"
-
-define arm_aapcs_vfpcc <8 x i16> @test_vqdmulhQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
-entry:
-; CHECK: test_vqdmulhQ_lanes16
-; CHECK: vqdmulh.s16 q0, q0, d2[1]
- %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; <<8 x i16>> [#uses=1]
- %1 = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %arg0_int16x8_t, <8 x i16> %0) ; <<8 x i16>> [#uses=1]
- ret <8 x i16> %1
-}
-
-declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-
-define arm_aapcs_vfpcc <4 x i32> @test_vqdmulhQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
-entry:
-; CHECK: test_vqdmulhQ_lanes32
-; CHECK: vqdmulh.s32 q0, q0, d2[1]
- %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses=1]
- %1 = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %1
-}
-
-declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-define arm_aapcs_vfpcc <4 x i16> @test_vqdmulh_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
-entry:
-; CHECK: test_vqdmulh_lanes16
-; CHECK: vqdmulh.s16 d0, d0, d1[1]
- %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
- %1 = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i16>> [#uses=1]
- ret <4 x i16> %1
-}
-
-declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-
-define arm_aapcs_vfpcc <2 x i32> @test_vqdmulh_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
-entry:
-; CHECK: test_vqdmulh_lanes32
-; CHECK: vqdmulh.s32 d0, d0, d1[1]
- %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
- %1 = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i32>> [#uses=1]
- ret <2 x i32> %1
-}
-
-declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vqdmull.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqdmull.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vqdmull.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vqdmull.ll (removed)
@@ -1,22 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <4 x i32> @vqdmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vqdmulls16:
-;CHECK: vqdmull.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @vqdmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vqdmulls32:
-;CHECK: vqdmull.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
-}
-
-declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vqdmull_lane.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqdmull_lane.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vqdmull_lane.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vqdmull_lane.ll (removed)
@@ -1,25 +0,0 @@
-; RUN: llc -mattr=+neon < %s | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
-target triple = "thumbv7-elf"
-
-define arm_aapcs_vfpcc <4 x i32> @test_vqdmull_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
-entry:
-; CHECK: test_vqdmull_lanes16
-; CHECK: vqdmull.s16 q0, d0, d1[1]
- %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
- %1 = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %1
-}
-
-declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-
-define arm_aapcs_vfpcc <2 x i64> @test_vqdmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
-entry:
-; CHECK: test_vqdmull_lanes32
-; CHECK: vqdmull.s32 q0, d0, d1[1]
- %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
- %1 = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %1
-}
-
-declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vqmovn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqmovn.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vqmovn.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vqmovn.ll (removed)
@@ -1,85 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
-;CHECK: vqmovns16:
-;CHECK: vqmovn.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %tmp1)
- ret <8 x i8> %tmp2
-}
-
-define <4 x i16> @vqmovns32(<4 x i32>* %A) nounwind {
-;CHECK: vqmovns32:
-;CHECK: vqmovn.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %tmp1)
- ret <4 x i16> %tmp2
-}
-
-define <2 x i32> @vqmovns64(<2 x i64>* %A) nounwind {
-;CHECK: vqmovns64:
-;CHECK: vqmovn.s64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %tmp1)
- ret <2 x i32> %tmp2
-}
-
-define <8 x i8> @vqmovnu16(<8 x i16>* %A) nounwind {
-;CHECK: vqmovnu16:
-;CHECK: vqmovn.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %tmp1)
- ret <8 x i8> %tmp2
-}
-
-define <4 x i16> @vqmovnu32(<4 x i32>* %A) nounwind {
-;CHECK: vqmovnu32:
-;CHECK: vqmovn.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %tmp1)
- ret <4 x i16> %tmp2
-}
-
-define <2 x i32> @vqmovnu64(<2 x i64>* %A) nounwind {
-;CHECK: vqmovnu64:
-;CHECK: vqmovn.u64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %tmp1)
- ret <2 x i32> %tmp2
-}
-
-define <8 x i8> @vqmovuns16(<8 x i16>* %A) nounwind {
-;CHECK: vqmovuns16:
-;CHECK: vqmovun.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %tmp1)
- ret <8 x i8> %tmp2
-}
-
-define <4 x i16> @vqmovuns32(<4 x i32>* %A) nounwind {
-;CHECK: vqmovuns32:
-;CHECK: vqmovun.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %tmp1)
- ret <4 x i16> %tmp2
-}
-
-define <2 x i32> @vqmovuns64(<2 x i64>* %A) nounwind {
-;CHECK: vqmovuns64:
-;CHECK: vqmovun.s64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %tmp1)
- ret <2 x i32> %tmp2
-}
-
-declare <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64>) nounwind readnone
-
-declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64>) nounwind readnone
-
-declare <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vqneg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqneg.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vqneg.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vqneg.ll (removed)
@@ -1,57 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @vqnegs8(<8 x i8>* %A) nounwind {
-;CHECK: vqnegs8:
-;CHECK: vqneg.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %tmp1)
- ret <8 x i8> %tmp2
-}
-
-define <4 x i16> @vqnegs16(<4 x i16>* %A) nounwind {
-;CHECK: vqnegs16:
-;CHECK: vqneg.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %tmp1)
- ret <4 x i16> %tmp2
-}
-
-define <2 x i32> @vqnegs32(<2 x i32>* %A) nounwind {
-;CHECK: vqnegs32:
-;CHECK: vqneg.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %tmp1)
- ret <2 x i32> %tmp2
-}
-
-define <16 x i8> @vqnegQs8(<16 x i8>* %A) nounwind {
-;CHECK: vqnegQs8:
-;CHECK: vqneg.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %tmp1)
- ret <16 x i8> %tmp2
-}
-
-define <8 x i16> @vqnegQs16(<8 x i16>* %A) nounwind {
-;CHECK: vqnegQs16:
-;CHECK: vqneg.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %tmp1)
- ret <8 x i16> %tmp2
-}
-
-define <4 x i32> @vqnegQs32(<4 x i32>* %A) nounwind {
-;CHECK: vqnegQs32:
-;CHECK: vqneg.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %tmp1)
- ret <4 x i32> %tmp2
-}
-
-declare <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32>) nounwind readnone
-
-declare <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vqrshl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqrshl.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vqrshl.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vqrshl.ll (removed)
@@ -1,165 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @vqrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vqrshls8:
-;CHECK: vqrshl.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @vqrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vqrshls16:
-;CHECK: vqrshl.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @vqrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vqrshls32:
-;CHECK: vqrshl.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i32> %tmp3
-}
-
-define <1 x i64> @vqrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vqrshls64:
-;CHECK: vqrshl.s64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
- %tmp3 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
- ret <1 x i64> %tmp3
-}
-
-define <8 x i8> @vqrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vqrshlu8:
-;CHECK: vqrshl.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @vqrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vqrshlu16:
-;CHECK: vqrshl.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @vqrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vqrshlu32:
-;CHECK: vqrshl.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i32> %tmp3
-}
-
-define <1 x i64> @vqrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vqrshlu64:
-;CHECK: vqrshl.u64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
- %tmp3 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
- ret <1 x i64> %tmp3
-}
-
-define <16 x i8> @vqrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vqrshlQs8:
-;CHECK: vqrshl.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
- %tmp3 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
- ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @vqrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vqrshlQs16:
-;CHECK: vqrshl.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vqrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vqrshlQs32:
-;CHECK: vqrshl.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @vqrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vqrshlQs64:
-;CHECK: vqrshl.s64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
- ret <2 x i64> %tmp3
-}
-
-define <16 x i8> @vqrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vqrshlQu8:
-;CHECK: vqrshl.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
- %tmp3 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
- ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @vqrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vqrshlQu16:
-;CHECK: vqrshl.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vqrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vqrshlQu32:
-;CHECK: vqrshl.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @vqrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vqrshlQu64:
-;CHECK: vqrshl.u64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
- ret <2 x i64> %tmp3
-}
-
-declare <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vqrshrn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqrshrn.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vqrshrn.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vqrshrn.ll (removed)
@@ -1,85 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @vqrshrns8(<8 x i16>* %A) nounwind {
-;CHECK: vqrshrns8:
-;CHECK: vqrshrn.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
- ret <8 x i8> %tmp2
-}
-
-define <4 x i16> @vqrshrns16(<4 x i32>* %A) nounwind {
-;CHECK: vqrshrns16:
-;CHECK: vqrshrn.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
- ret <4 x i16> %tmp2
-}
-
-define <2 x i32> @vqrshrns32(<2 x i64>* %A) nounwind {
-;CHECK: vqrshrns32:
-;CHECK: vqrshrn.s64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
- ret <2 x i32> %tmp2
-}
-
-define <8 x i8> @vqrshrnu8(<8 x i16>* %A) nounwind {
-;CHECK: vqrshrnu8:
-;CHECK: vqrshrn.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
- ret <8 x i8> %tmp2
-}
-
-define <4 x i16> @vqrshrnu16(<4 x i32>* %A) nounwind {
-;CHECK: vqrshrnu16:
-;CHECK: vqrshrn.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
- ret <4 x i16> %tmp2
-}
-
-define <2 x i32> @vqrshrnu32(<2 x i64>* %A) nounwind {
-;CHECK: vqrshrnu32:
-;CHECK: vqrshrn.u64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
- ret <2 x i32> %tmp2
-}
-
-define <8 x i8> @vqrshruns8(<8 x i16>* %A) nounwind {
-;CHECK: vqrshruns8:
-;CHECK: vqrshrun.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
- ret <8 x i8> %tmp2
-}
-
-define <4 x i16> @vqrshruns16(<4 x i32>* %A) nounwind {
-;CHECK: vqrshruns16:
-;CHECK: vqrshrun.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
- ret <4 x i16> %tmp2
-}
-
-define <2 x i32> @vqrshruns32(<2 x i64>* %A) nounwind {
-;CHECK: vqrshruns32:
-;CHECK: vqrshrun.s64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
- ret <2 x i32> %tmp2
-}
-
-declare <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
Modified: llvm/trunk/test/CodeGen/ARM/vqshl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqshl.ll?rev=83667&r1=83666&r2=83667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vqshl.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vqshl.ll Fri Oct 9 15:20:54 2009
@@ -365,3 +365,167 @@
declare <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vqrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqrshls8:
+;CHECK: vqrshl.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqrshls16:
+;CHECK: vqrshl.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqrshls32:
+;CHECK: vqrshl.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqrshls64:
+;CHECK: vqrshl.s64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vqrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqrshlu8:
+;CHECK: vqrshl.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqrshlu16:
+;CHECK: vqrshl.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqrshlu32:
+;CHECK: vqrshl.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqrshlu64:
+;CHECK: vqrshl.u64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vqrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqrshlQs8:
+;CHECK: vqrshl.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqrshlQs16:
+;CHECK: vqrshl.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqrshlQs32:
+;CHECK: vqrshl.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqrshlQs64:
+;CHECK: vqrshl.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vqrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqrshlQu8:
+;CHECK: vqrshl.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqrshlQu16:
+;CHECK: vqrshl.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqrshlQu32:
+;CHECK: vqrshl.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqrshlQu64:
+;CHECK: vqrshl.u64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
Modified: llvm/trunk/test/CodeGen/ARM/vqshrn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vqshrn.ll?rev=83667&r1=83666&r2=83667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vqshrn.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vqshrn.ll Fri Oct 9 15:20:54 2009
@@ -83,3 +83,87 @@
declare <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vqrshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vqrshrns8:
+;CHECK: vqrshrn.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqrshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vqrshrns16:
+;CHECK: vqrshrn.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqrshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vqrshrns32:
+;CHECK: vqrshrn.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+ ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqrshrnu8(<8 x i16>* %A) nounwind {
+;CHECK: vqrshrnu8:
+;CHECK: vqrshrn.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqrshrnu16(<4 x i32>* %A) nounwind {
+;CHECK: vqrshrnu16:
+;CHECK: vqrshrn.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqrshrnu32(<2 x i64>* %A) nounwind {
+;CHECK: vqrshrnu32:
+;CHECK: vqrshrn.u64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+ ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqrshruns8(<8 x i16>* %A) nounwind {
+;CHECK: vqrshruns8:
+;CHECK: vqrshrun.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqrshruns16(<4 x i32>* %A) nounwind {
+;CHECK: vqrshruns16:
+;CHECK: vqrshrun.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqrshruns32(<2 x i64>* %A) nounwind {
+;CHECK: vqrshruns32:
+;CHECK: vqrshrun.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+ ret <2 x i32> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vraddhn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vraddhn.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vraddhn.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vraddhn.ll (removed)
@@ -1,32 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @vraddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vraddhni16:
-;CHECK: vraddhn.i16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
- ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @vraddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vraddhni32:
-;CHECK: vraddhn.i32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
- ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @vraddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vraddhni64:
-;CHECK: vraddhn.i64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
- %tmp3 = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
- ret <2 x i32> %tmp3
-}
-
-declare <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
Added: llvm/trunk/test/CodeGen/ARM/vrec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vrec.ll?rev=83667&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vrec.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/vrec.ll Fri Oct 9 15:20:54 2009
@@ -0,0 +1,119 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <2 x i32> @vrecpei32(<2 x i32>* %A) nounwind {
+;CHECK: vrecpei32:
+;CHECK: vrecpe.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vrecpeQi32(<4 x i32>* %A) nounwind {
+;CHECK: vrecpeQi32:
+;CHECK: vrecpe.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x float> @vrecpef32(<2 x float>* %A) nounwind {
+;CHECK: vrecpef32:
+;CHECK: vrecpe.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %tmp1)
+ ret <2 x float> %tmp2
+}
+
+define <4 x float> @vrecpeQf32(<4 x float>* %A) nounwind {
+;CHECK: vrecpeQf32:
+;CHECK: vrecpe.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp1)
+ ret <4 x float> %tmp2
+}
+
+declare <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
+
+define <2 x float> @vrecpsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vrecpsf32:
+;CHECK: vrecps.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+ ret <2 x float> %tmp3
+}
+
+define <4 x float> @vrecpsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vrecpsQf32:
+;CHECK: vrecps.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+ ret <4 x float> %tmp3
+}
+
+declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x i32> @vrsqrtei32(<2 x i32>* %A) nounwind {
+;CHECK: vrsqrtei32:
+;CHECK: vrsqrte.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %tmp1)
+ ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vrsqrteQi32(<4 x i32>* %A) nounwind {
+;CHECK: vrsqrteQi32:
+;CHECK: vrsqrte.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %tmp1)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x float> @vrsqrtef32(<2 x float>* %A) nounwind {
+;CHECK: vrsqrtef32:
+;CHECK: vrsqrte.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %tmp1)
+ ret <2 x float> %tmp2
+}
+
+define <4 x float> @vrsqrteQf32(<4 x float>* %A) nounwind {
+;CHECK: vrsqrteQf32:
+;CHECK: vrsqrte.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %tmp1)
+ ret <4 x float> %tmp2
+}
+
+declare <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
+
+define <2 x float> @vrsqrtsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vrsqrtsf32:
+;CHECK: vrsqrts.f32
+ %tmp1 = load <2 x float>* %A
+ %tmp2 = load <2 x float>* %B
+ %tmp3 = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+ ret <2 x float> %tmp3
+}
+
+define <4 x float> @vrsqrtsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vrsqrtsQf32:
+;CHECK: vrsqrts.f32
+ %tmp1 = load <4 x float>* %A
+ %tmp2 = load <4 x float>* %B
+ %tmp3 = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+ ret <4 x float> %tmp3
+}
+
+declare <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vrecpe.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vrecpe.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vrecpe.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vrecpe.ll (removed)
@@ -1,39 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <2 x i32> @vrecpei32(<2 x i32>* %A) nounwind {
-;CHECK: vrecpei32:
-;CHECK: vrecpe.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %tmp1)
- ret <2 x i32> %tmp2
-}
-
-define <4 x i32> @vrecpeQi32(<4 x i32>* %A) nounwind {
-;CHECK: vrecpeQi32:
-;CHECK: vrecpe.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %tmp1)
- ret <4 x i32> %tmp2
-}
-
-define <2 x float> @vrecpef32(<2 x float>* %A) nounwind {
-;CHECK: vrecpef32:
-;CHECK: vrecpe.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %tmp1)
- ret <2 x float> %tmp2
-}
-
-define <4 x float> @vrecpeQf32(<4 x float>* %A) nounwind {
-;CHECK: vrecpeQf32:
-;CHECK: vrecpe.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp1)
- ret <4 x float> %tmp2
-}
-
-declare <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32>) nounwind readnone
-
-declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vrecps.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vrecps.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vrecps.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vrecps.ll (removed)
@@ -1,22 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <2 x float> @vrecpsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vrecpsf32:
-;CHECK: vrecps.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
- %tmp3 = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
- ret <2 x float> %tmp3
-}
-
-define <4 x float> @vrecpsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK: vrecpsQf32:
-;CHECK: vrecps.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
- %tmp3 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
- ret <4 x float> %tmp3
-}
-
-declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vrhadd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vrhadd.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vrhadd.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vrhadd.ll (removed)
@@ -1,125 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @vrhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vrhadds8:
-;CHECK: vrhadd.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @vrhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vrhadds16:
-;CHECK: vrhadd.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @vrhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vrhadds32:
-;CHECK: vrhadd.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i32> %tmp3
-}
-
-define <8 x i8> @vrhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vrhaddu8:
-;CHECK: vrhadd.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @vrhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vrhaddu16:
-;CHECK: vrhadd.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @vrhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vrhaddu32:
-;CHECK: vrhadd.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @vrhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vrhaddQs8:
-;CHECK: vrhadd.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
- %tmp3 = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
- ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @vrhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vrhaddQs16:
-;CHECK: vrhadd.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vrhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vrhaddQs32:
-;CHECK: vrhadd.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-define <16 x i8> @vrhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vrhaddQu8:
-;CHECK: vrhadd.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
- %tmp3 = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
- ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @vrhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vrhaddQu16:
-;CHECK: vrhadd.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vrhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vrhaddQu32:
-;CHECK: vrhadd.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-declare <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-declare <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vrshl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vrshl.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vrshl.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vrshl.ll (removed)
@@ -1,293 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @vrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vrshls8:
-;CHECK: vrshl.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @vrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vrshls16:
-;CHECK: vrshl.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @vrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vrshls32:
-;CHECK: vrshl.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i32> %tmp3
-}
-
-define <1 x i64> @vrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vrshls64:
-;CHECK: vrshl.s64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
- %tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
- ret <1 x i64> %tmp3
-}
-
-define <8 x i8> @vrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vrshlu8:
-;CHECK: vrshl.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @vrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vrshlu16:
-;CHECK: vrshl.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @vrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vrshlu32:
-;CHECK: vrshl.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i32> %tmp3
-}
-
-define <1 x i64> @vrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK: vrshlu64:
-;CHECK: vrshl.u64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
- %tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
- ret <1 x i64> %tmp3
-}
-
-define <16 x i8> @vrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vrshlQs8:
-;CHECK: vrshl.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
- %tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
- ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @vrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vrshlQs16:
-;CHECK: vrshl.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vrshlQs32:
-;CHECK: vrshl.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @vrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vrshlQs64:
-;CHECK: vrshl.s64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
- ret <2 x i64> %tmp3
-}
-
-define <16 x i8> @vrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vrshlQu8:
-;CHECK: vrshl.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
- %tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
- ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @vrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vrshlQu16:
-;CHECK: vrshl.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vrshlQu32:
-;CHECK: vrshl.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @vrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vrshlQu64:
-;CHECK: vrshl.u64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
- ret <2 x i64> %tmp3
-}
-
-define <8 x i8> @vrshrs8(<8 x i8>* %A) nounwind {
-;CHECK: vrshrs8:
-;CHECK: vrshr.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
- ret <8 x i8> %tmp2
-}
-
-define <4 x i16> @vrshrs16(<4 x i16>* %A) nounwind {
-;CHECK: vrshrs16:
-;CHECK: vrshr.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
- ret <4 x i16> %tmp2
-}
-
-define <2 x i32> @vrshrs32(<2 x i32>* %A) nounwind {
-;CHECK: vrshrs32:
-;CHECK: vrshr.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
- ret <2 x i32> %tmp2
-}
-
-define <1 x i64> @vrshrs64(<1 x i64>* %A) nounwind {
-;CHECK: vrshrs64:
-;CHECK: vrshr.s64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
- ret <1 x i64> %tmp2
-}
-
-define <8 x i8> @vrshru8(<8 x i8>* %A) nounwind {
-;CHECK: vrshru8:
-;CHECK: vrshr.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
- ret <8 x i8> %tmp2
-}
-
-define <4 x i16> @vrshru16(<4 x i16>* %A) nounwind {
-;CHECK: vrshru16:
-;CHECK: vrshr.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
- ret <4 x i16> %tmp2
-}
-
-define <2 x i32> @vrshru32(<2 x i32>* %A) nounwind {
-;CHECK: vrshru32:
-;CHECK: vrshr.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
- ret <2 x i32> %tmp2
-}
-
-define <1 x i64> @vrshru64(<1 x i64>* %A) nounwind {
-;CHECK: vrshru64:
-;CHECK: vrshr.u64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
- ret <1 x i64> %tmp2
-}
-
-define <16 x i8> @vrshrQs8(<16 x i8>* %A) nounwind {
-;CHECK: vrshrQs8:
-;CHECK: vrshr.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
- ret <16 x i8> %tmp2
-}
-
-define <8 x i16> @vrshrQs16(<8 x i16>* %A) nounwind {
-;CHECK: vrshrQs16:
-;CHECK: vrshr.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
- ret <8 x i16> %tmp2
-}
-
-define <4 x i32> @vrshrQs32(<4 x i32>* %A) nounwind {
-;CHECK: vrshrQs32:
-;CHECK: vrshr.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
- ret <4 x i32> %tmp2
-}
-
-define <2 x i64> @vrshrQs64(<2 x i64>* %A) nounwind {
-;CHECK: vrshrQs64:
-;CHECK: vrshr.s64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
- ret <2 x i64> %tmp2
-}
-
-define <16 x i8> @vrshrQu8(<16 x i8>* %A) nounwind {
-;CHECK: vrshrQu8:
-;CHECK: vrshr.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
- ret <16 x i8> %tmp2
-}
-
-define <8 x i16> @vrshrQu16(<8 x i16>* %A) nounwind {
-;CHECK: vrshrQu16:
-;CHECK: vrshr.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
- ret <8 x i16> %tmp2
-}
-
-define <4 x i32> @vrshrQu32(<4 x i32>* %A) nounwind {
-;CHECK: vrshrQu32:
-;CHECK: vrshr.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
- ret <4 x i32> %tmp2
-}
-
-define <2 x i64> @vrshrQu64(<2 x i64>* %A) nounwind {
-;CHECK: vrshrQu64:
-;CHECK: vrshr.u64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
- ret <2 x i64> %tmp2
-}
-
-declare <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vrshrn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vrshrn.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vrshrn.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vrshrn.ll (removed)
@@ -1,29 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @vrshrns8(<8 x i16>* %A) nounwind {
-;CHECK: vrshrns8:
-;CHECK: vrshrn.i16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
- ret <8 x i8> %tmp2
-}
-
-define <4 x i16> @vrshrns16(<4 x i32>* %A) nounwind {
-;CHECK: vrshrns16:
-;CHECK: vrshrn.i32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
- ret <4 x i16> %tmp2
-}
-
-define <2 x i32> @vrshrns32(<2 x i64>* %A) nounwind {
-;CHECK: vrshrns32:
-;CHECK: vrshrn.i64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
- ret <2 x i32> %tmp2
-}
-
-declare <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vrsqrte.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vrsqrte.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vrsqrte.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vrsqrte.ll (removed)
@@ -1,39 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <2 x i32> @vrsqrtei32(<2 x i32>* %A) nounwind {
-;CHECK: vrsqrtei32:
-;CHECK: vrsqrte.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %tmp1)
- ret <2 x i32> %tmp2
-}
-
-define <4 x i32> @vrsqrteQi32(<4 x i32>* %A) nounwind {
-;CHECK: vrsqrteQi32:
-;CHECK: vrsqrte.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %tmp1)
- ret <4 x i32> %tmp2
-}
-
-define <2 x float> @vrsqrtef32(<2 x float>* %A) nounwind {
-;CHECK: vrsqrtef32:
-;CHECK: vrsqrte.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %tmp1)
- ret <2 x float> %tmp2
-}
-
-define <4 x float> @vrsqrteQf32(<4 x float>* %A) nounwind {
-;CHECK: vrsqrteQf32:
-;CHECK: vrsqrte.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %tmp1)
- ret <4 x float> %tmp2
-}
-
-declare <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32>) nounwind readnone
-
-declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vrsqrts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vrsqrts.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vrsqrts.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vrsqrts.ll (removed)
@@ -1,22 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <2 x float> @vrsqrtsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK: vrsqrtsf32:
-;CHECK: vrsqrts.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
- %tmp3 = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
- ret <2 x float> %tmp3
-}
-
-define <4 x float> @vrsqrtsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK: vrsqrtsQf32:
-;CHECK: vrsqrts.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
- %tmp3 = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
- ret <4 x float> %tmp3
-}
-
-declare <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vrsubhn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vrsubhn.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vrsubhn.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vrsubhn.ll (removed)
@@ -1,32 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @vrsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vrsubhni16:
-;CHECK: vrsubhn.i16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
- ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @vrsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vrsubhni32:
-;CHECK: vrsubhn.i32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
- ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @vrsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vrsubhni64:
-;CHECK: vrsubhn.i64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
- %tmp3 = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
- ret <2 x i32> %tmp3
-}
-
-declare <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vset_lane.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vset_lane.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vset_lane.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vset_lane.ll (removed)
@@ -1,58 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @vset_lane8(<8 x i8>* %A, i8 %B) nounwind {
-;CHECK: vset_lane8:
-;CHECK: vmov.8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = insertelement <8 x i8> %tmp1, i8 %B, i32 1
- ret <8 x i8> %tmp2
-}
-
-define <4 x i16> @vset_lane16(<4 x i16>* %A, i16 %B) nounwind {
-;CHECK: vset_lane16:
-;CHECK: vmov.16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = insertelement <4 x i16> %tmp1, i16 %B, i32 1
- ret <4 x i16> %tmp2
-}
-
-define <2 x i32> @vset_lane32(<2 x i32>* %A, i32 %B) nounwind {
-;CHECK: vset_lane32:
-;CHECK: vmov.32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = insertelement <2 x i32> %tmp1, i32 %B, i32 1
- ret <2 x i32> %tmp2
-}
-
-define <16 x i8> @vsetQ_lane8(<16 x i8>* %A, i8 %B) nounwind {
-;CHECK: vsetQ_lane8:
-;CHECK: vmov.8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = insertelement <16 x i8> %tmp1, i8 %B, i32 1
- ret <16 x i8> %tmp2
-}
-
-define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind {
-;CHECK: vsetQ_lane16:
-;CHECK: vmov.16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = insertelement <8 x i16> %tmp1, i16 %B, i32 1
- ret <8 x i16> %tmp2
-}
-
-define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
-;CHECK: vsetQ_lane32:
-;CHECK: vmov.32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1
- ret <4 x i32> %tmp2
-}
-
-define arm_aapcs_vfpcc <2 x float> @test_vset_lanef32(float %arg0_float32_t, <2 x float> %arg1_float32x2_t) nounwind {
-;CHECK: test_vset_lanef32:
-;CHECK: fcpys
-;CHECK: fcpys
-entry:
- %0 = insertelement <2 x float> %arg1_float32x2_t, float %arg0_float32_t, i32 1 ; <<2 x float>> [#uses=1]
- ret <2 x float> %0
-}
Modified: llvm/trunk/test/CodeGen/ARM/vshift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vshift.ll?rev=83667&r1=83666&r2=83667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vshift.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vshift.ll Fri Oct 9 15:20:54 2009
@@ -280,6 +280,13 @@
ret <2 x i64> %tmp2
}
+; Example that requires splitting and expanding a vector shift.
+define <2 x i64> @update(<2 x i64> %val) nounwind readnone {
+entry:
+ %shr = lshr <2 x i64> %val, < i64 2, i64 2 > ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %shr
+}
+
define <8 x i8> @vashrs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vashrs8:
;CHECK: vneg.s8
Removed: llvm/trunk/test/CodeGen/ARM/vshift_split.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vshift_split.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vshift_split.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vshift_split.ll (removed)
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=-neon
-
-; Example that requires splitting and expanding a vector shift.
-define <2 x i64> @update(<2 x i64> %val) nounwind readnone {
-entry:
- %shr = lshr <2 x i64> %val, < i64 2, i64 2 > ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %shr
-}
Modified: llvm/trunk/test/CodeGen/ARM/vshl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vshl.ll?rev=83667&r1=83666&r2=83667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vshl.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vshl.ll Fri Oct 9 15:20:54 2009
@@ -360,3 +360,295 @@
declare <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrshls8:
+;CHECK: vrshl.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrshls16:
+;CHECK: vrshl.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrshls32:
+;CHECK: vrshl.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrshls64:
+;CHECK: vrshl.s64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrshlu8:
+;CHECK: vrshl.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrshlu16:
+;CHECK: vrshl.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrshlu32:
+;CHECK: vrshl.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrshlu64:
+;CHECK: vrshl.u64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = load <1 x i64>* %B
+ %tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrshlQs8:
+;CHECK: vrshl.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrshlQs16:
+;CHECK: vrshl.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrshlQs32:
+;CHECK: vrshl.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrshlQs64:
+;CHECK: vrshl.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrshlQu8:
+;CHECK: vrshl.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrshlQu16:
+;CHECK: vrshl.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrshlQu32:
+;CHECK: vrshl.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrshlQu64:
+;CHECK: vrshl.u64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @vrshrs8(<8 x i8>* %A) nounwind {
+;CHECK: vrshrs8:
+;CHECK: vrshr.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vrshrs16(<4 x i16>* %A) nounwind {
+;CHECK: vrshrs16:
+;CHECK: vrshr.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vrshrs32(<2 x i32>* %A) nounwind {
+;CHECK: vrshrs32:
+;CHECK: vrshr.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
+ ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vrshrs64(<1 x i64>* %A) nounwind {
+;CHECK: vrshrs64:
+;CHECK: vrshr.s64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
+ ret <1 x i64> %tmp2
+}
+
+define <8 x i8> @vrshru8(<8 x i8>* %A) nounwind {
+;CHECK: vrshru8:
+;CHECK: vrshr.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vrshru16(<4 x i16>* %A) nounwind {
+;CHECK: vrshru16:
+;CHECK: vrshr.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vrshru32(<2 x i32>* %A) nounwind {
+;CHECK: vrshru32:
+;CHECK: vrshr.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
+ ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vrshru64(<1 x i64>* %A) nounwind {
+;CHECK: vrshru64:
+;CHECK: vrshr.u64
+ %tmp1 = load <1 x i64>* %A
+ %tmp2 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
+ ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vrshrQs8(<16 x i8>* %A) nounwind {
+;CHECK: vrshrQs8:
+;CHECK: vrshr.s8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vrshrQs16(<8 x i16>* %A) nounwind {
+;CHECK: vrshrQs16:
+;CHECK: vrshr.s16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vrshrQs32(<4 x i32>* %A) nounwind {
+;CHECK: vrshrQs32:
+;CHECK: vrshr.s32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vrshrQs64(<2 x i64>* %A) nounwind {
+;CHECK: vrshrQs64:
+;CHECK: vrshr.s64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
+ ret <2 x i64> %tmp2
+}
+
+define <16 x i8> @vrshrQu8(<16 x i8>* %A) nounwind {
+;CHECK: vrshrQu8:
+;CHECK: vrshr.u8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vrshrQu16(<8 x i16>* %A) nounwind {
+;CHECK: vrshrQu16:
+;CHECK: vrshr.u16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+ ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vrshrQu32(<4 x i32>* %A) nounwind {
+;CHECK: vrshrQu32:
+;CHECK: vrshr.u32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+ ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vrshrQu64(<2 x i64>* %A) nounwind {
+;CHECK: vrshrQu64:
+;CHECK: vrshr.u64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
+ ret <2 x i64> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
Modified: llvm/trunk/test/CodeGen/ARM/vshrn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vshrn.ll?rev=83667&r1=83666&r2=83667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vshrn.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vshrn.ll Fri Oct 9 15:20:54 2009
@@ -27,3 +27,31 @@
declare <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vrshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vrshrns8:
+;CHECK: vrshrn.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vrshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vrshrns16:
+;CHECK: vrshrn.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vrshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vrshrns32:
+;CHECK: vrshrn.i64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+ ret <2 x i32> %tmp2
+}
+
+declare <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
Modified: llvm/trunk/test/CodeGen/ARM/vsub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vsub.ll?rev=83667&r1=83666&r2=83667&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vsub.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vsub.ll Fri Oct 9 15:20:54 2009
@@ -89,3 +89,189 @@
%tmp3 = sub <4 x float> %tmp1, %tmp2
ret <4 x float> %tmp3
}
+
+define <8 x i8> @vsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsubhni16:
+;CHECK: vsubhn.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsubhni32:
+;CHECK: vsubhn.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsubhni64:
+;CHECK: vsubhn.i64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vrsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrsubhni16:
+;CHECK: vrsubhn.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrsubhni32:
+;CHECK: vrsubhn.i32
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i32>* %B
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrsubhni64:
+;CHECK: vrsubhn.i64
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i64>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubls8:
+;CHECK: vsubl.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vsubls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubls16:
+;CHECK: vsubl.s16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vsubls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubls32:
+;CHECK: vsubl.s32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vsubls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsublu8:
+;CHECK: vsubl.u8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vsublu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsublu16:
+;CHECK: vsubl.u16
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vsublu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsublu32:
+;CHECK: vsubl.u32
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vsublu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vsubls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vsubls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vsubls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vsublu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vsublu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vsublu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubws8:
+;CHECK: vsubw.s8
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vsubws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubws16:
+;CHECK: vsubw.s16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vsubws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubws32:
+;CHECK: vsubw.s32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vsubws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubwu8:
+;CHECK: vsubw.u8
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = call <8 x i16> @llvm.arm.neon.vsubwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubwu16:
+;CHECK: vsubw.u16
+ %tmp1 = load <4 x i32>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = call <4 x i32> @llvm.arm.neon.vsubwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubwu32:
+;CHECK: vsubw.u32
+ %tmp1 = load <2 x i64>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i64> @llvm.arm.neon.vsubwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+declare <8 x i16> @llvm.arm.neon.vsubws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vsubws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vsubws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vsubwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vsubwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vsubwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vsubhn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vsubhn.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vsubhn.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vsubhn.ll (removed)
@@ -1,32 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @vsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vsubhni16:
-;CHECK: vsubhn.i16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = call <8 x i8> @llvm.arm.neon.vsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
- ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @vsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vsubhni32:
-;CHECK: vsubhn.i32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = call <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
- ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @vsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK: vsubhni64:
-;CHECK: vsubhn.i64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
- %tmp3 = call <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
- ret <2 x i32> %tmp3
-}
-
-declare <8 x i8> @llvm.arm.neon.vsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vsubl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vsubl.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vsubl.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vsubl.ll (removed)
@@ -1,63 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vsubls8:
-;CHECK: vsubl.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vsubls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vsubls16:
-;CHECK: vsubl.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vsubls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vsubls32:
-;CHECK: vsubl.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vsubls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
-}
-
-define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vsublu8:
-;CHECK: vsubl.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vsublu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vsublu16:
-;CHECK: vsubl.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vsublu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vsublu32:
-;CHECK: vsubl.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vsublu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
-}
-
-declare <8 x i16> @llvm.arm.neon.vsubls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vsubls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vsubls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vsublu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vsublu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vsublu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vsubw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vsubw.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vsubw.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vsubw.ll (removed)
@@ -1,63 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vsubws8:
-;CHECK: vsubw.s8
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vsubws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vsubws16:
-;CHECK: vsubw.s16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vsubws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vsubws32:
-;CHECK: vsubw.s32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vsubws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
-}
-
-define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vsubwu8:
-;CHECK: vsubw.u8
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vsubwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vsubwu16:
-;CHECK: vsubw.u16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vsubwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vsubwu32:
-;CHECK: vsubw.u32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vsubwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
-}
-
-declare <8 x i16> @llvm.arm.neon.vsubws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vsubws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vsubws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vsubwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vsubwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vsubwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
Removed: llvm/trunk/test/CodeGen/ARM/vtst.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vtst.ll?rev=83666&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vtst.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vtst.ll (removed)
@@ -1,67 +0,0 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
-
-define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK: vtsti8:
-;CHECK: vtst.i8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = and <8 x i8> %tmp1, %tmp2
- %tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer
- %tmp5 = sext <8 x i1> %tmp4 to <8 x i8>
- ret <8 x i8> %tmp5
-}
-
-define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK: vtsti16:
-;CHECK: vtst.i16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = and <4 x i16> %tmp1, %tmp2
- %tmp4 = icmp ne <4 x i16> %tmp3, zeroinitializer
- %tmp5 = sext <4 x i1> %tmp4 to <4 x i16>
- ret <4 x i16> %tmp5
-}
-
-define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK: vtsti32:
-;CHECK: vtst.i32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = and <2 x i32> %tmp1, %tmp2
- %tmp4 = icmp ne <2 x i32> %tmp3, zeroinitializer
- %tmp5 = sext <2 x i1> %tmp4 to <2 x i32>
- ret <2 x i32> %tmp5
-}
-
-define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: vtstQi8:
-;CHECK: vtst.i8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
- %tmp3 = and <16 x i8> %tmp1, %tmp2
- %tmp4 = icmp ne <16 x i8> %tmp3, zeroinitializer
- %tmp5 = sext <16 x i1> %tmp4 to <16 x i8>
- ret <16 x i8> %tmp5
-}
-
-define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK: vtstQi16:
-;CHECK: vtst.i16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = and <8 x i16> %tmp1, %tmp2
- %tmp4 = icmp ne <8 x i16> %tmp3, zeroinitializer
- %tmp5 = sext <8 x i1> %tmp4 to <8 x i16>
- ret <8 x i16> %tmp5
-}
-
-define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK: vtstQi32:
-;CHECK: vtst.i32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = and <4 x i32> %tmp1, %tmp2
- %tmp4 = icmp ne <4 x i32> %tmp3, zeroinitializer
- %tmp5 = sext <4 x i1> %tmp4 to <4 x i32>
- ret <4 x i32> %tmp5
-}
More information about the llvm-commits
mailing list