[llvm-commits] [llvm] r117047 - in /llvm/trunk: lib/Target/ARM/ARMInstrNEON.td test/MC/ARM/neon-fp-encoding.ll
Bob Wilson
bob.wilson at apple.com
Thu Oct 21 12:23:24 PDT 2010
Can you try find some way to factor this so that we don't have so many operand encoding lines? If every one of the NEON instruction classes has its own copy of these encodings, the file is going to be huge and even less manageable than it is now.
On Oct 21, 2010, at 11:55 AM, Owen Anderson wrote:
> Author: resistor
> Date: Thu Oct 21 13:55:04 2010
> New Revision: 117047
>
> URL: http://llvm.org/viewvc/llvm-project?rev=117047&view=rev
> Log:
> Add correct NEON encodings for vhadd and vrhadd.
>
> Modified:
> llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
> llvm/trunk/test/MC/ARM/neon-fp-encoding.ll
>
> Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=117047&r1=117046&r2=117047&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
> +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Thu Oct 21 13:55:04 2010
> @@ -1289,10 +1289,22 @@
> Format f, InstrItinClass itin, string OpcodeStr, string Dt,
> ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
> : N3V<op24, op23, op21_20, op11_8, 0, op4,
> - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), f, itin,
> - OpcodeStr, Dt, "$dst, $src1, $src2", "",
> - [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
> + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), f, itin,
> + OpcodeStr, Dt, "$Dd, $Dn, $Dm", "",
> + [(set DPR:$Dd, (ResTy (IntOp (OpTy DPR:$Dn), (OpTy DPR:$Dm))))]> {
> let isCommutable = Commutable;
> +
> + // Instruction operands.
> + bits<5> Dd;
> + bits<5> Dn;
> + bits<5> Dm;
> +
> + let Inst{15-12} = Dd{3-0};
> + let Inst{22} = Dd{4};
> + let Inst{19-16} = Dn{3-0};
> + let Inst{7} = Dn{4};
> + let Inst{3-0} = Dm{3-0};
> + let Inst{5} = Dm{4};
> }
> class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
> string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
> @@ -1320,10 +1332,25 @@
> Format f, InstrItinClass itin, string OpcodeStr, string Dt,
> ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
> : N3V<op24, op23, op21_20, op11_8, 1, op4,
> - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), f, itin,
> - OpcodeStr, Dt, "$dst, $src1, $src2", "",
> - [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
> + (outs QPR:$Qd), (ins QPR:$Qn, QPR:$Qm), f, itin,
> + OpcodeStr, Dt, "$Qd, $Qn, $Qm", "",
> + [(set QPR:$Qd, (ResTy (IntOp (OpTy QPR:$Qn), (OpTy QPR:$Qm))))]> {
> let isCommutable = Commutable;
> +
> + // Instruction operands.
> + bits<4> Qd;
> + bits<4> Qn;
> + bits<4> Qm;
> +
> + let Inst{15-13} = Qd{2-0};
> + let Inst{22} = Qd{3};
> + let Inst{12} = 0;
> + let Inst{19-17} = Qn{2-0};
> + let Inst{7} = Qn{3};
> + let Inst{16} = 0;
> + let Inst{3-1} = Qm{2-0};
> + let Inst{5} = Qm{3};
> + let Inst{0} = 0;
> }
> class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
> string OpcodeStr, string Dt,
>
> Modified: llvm/trunk/test/MC/ARM/neon-fp-encoding.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/neon-fp-encoding.ll?rev=117047&r1=117046&r2=117047&view=diff
> ==============================================================================
> --- llvm/trunk/test/MC/ARM/neon-fp-encoding.ll (original)
> +++ llvm/trunk/test/MC/ARM/neon-fp-encoding.ll Thu Oct 21 13:55:04 2010
> @@ -179,3 +179,251 @@
> %tmp4 = add <2 x i64> %tmp1, %tmp3
> ret <2 x i64> %tmp4
> }
> +
> +declare <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
> +declare <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
> +declare <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
> +
> +; CHECK: vhadds_8xi8
> +define <8 x i8> @vhadds_8xi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
> + %tmp1 = load <8 x i8>* %A
> + %tmp2 = load <8 x i8>* %B
> +; CHECK: vhadd.s8 d16, d16, d17 @ encoding: [0xa1,0x00,0x40,0xf2]
> + %tmp3 = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
> + ret <8 x i8> %tmp3
> +}
> +
> +; CHECK: vhadds_4xi16
> +define <4 x i16> @vhadds_4xi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
> + %tmp1 = load <4 x i16>* %A
> + %tmp2 = load <4 x i16>* %B
> +; CHECK: vhadd.s16 d16, d16, d17 @ encoding: [0xa1,0x00,0x50,0xf2]
> + %tmp3 = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
> + ret <4 x i16> %tmp3
> +}
> +
> +; CHECK: vhadds_2xi32
> +define <2 x i32> @vhadds_2xi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
> + %tmp1 = load <2 x i32>* %A
> + %tmp2 = load <2 x i32>* %B
> +; CHECK: vhadd.s32 d16, d16, d17 @ encoding: [0xa1,0x00,0x60,0xf2]
> + %tmp3 = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
> + ret <2 x i32> %tmp3
> +}
> +
> +declare <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
> +declare <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
> +declare <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
> +
> +; CHECK: vhaddu_8xi8
> +define <8 x i8> @vhaddu_8xi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
> + %tmp1 = load <8 x i8>* %A
> + %tmp2 = load <8 x i8>* %B
> +; CHECK: vhadd.u8 d16, d16, d17 @ encoding: [0xa1,0x00,0x40,0xf3]
> + %tmp3 = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
> + ret <8 x i8> %tmp3
> +}
> +
> +; CHECK: vhaddu_4xi16
> +define <4 x i16> @vhaddu_4xi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
> + %tmp1 = load <4 x i16>* %A
> + %tmp2 = load <4 x i16>* %B
> +; CHECK: vhadd.u16 d16, d16, d17 @ encoding: [0xa1,0x00,0x50,0xf3]
> + %tmp3 = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
> + ret <4 x i16> %tmp3
> +}
> +
> +; CHECK: vhaddu_2xi32
> +define <2 x i32> @vhaddu_2xi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
> + %tmp1 = load <2 x i32>* %A
> + %tmp2 = load <2 x i32>* %B
> +; CHECK: vhadd.u32 d16, d16, d17 @ encoding: [0xa1,0x00,0x60,0xf3]
> + %tmp3 = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
> + ret <2 x i32> %tmp3
> +}
> +
> +declare <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
> +declare <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
> +declare <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
> +
> +; CHECK: vhadds_16xi8
> +define <16 x i8> @vhadds_16xi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
> + %tmp1 = load <16 x i8>* %A
> + %tmp2 = load <16 x i8>* %B
> +; CHECK: vhadd.s8 q8, q8, q9 @ encoding: [0xe2,0x00,0x40,0xf2]
> + %tmp3 = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
> + ret <16 x i8> %tmp3
> +}
> +
> +; CHECK: vhadds_8xi16
> +define <8 x i16> @vhadds_8xi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
> + %tmp1 = load <8 x i16>* %A
> + %tmp2 = load <8 x i16>* %B
> +; CHECK: vhadd.s16 q8, q8, q9 @ encoding: [0xe2,0x00,0x50,0xf2]
> + %tmp3 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
> + ret <8 x i16> %tmp3
> +}
> +
> +; CHECK: vhadds_4xi32
> +define <4 x i32> @vhadds_4xi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
> + %tmp1 = load <4 x i32>* %A
> + %tmp2 = load <4 x i32>* %B
> +; CHECK: vhadd.s32 q8, q8, q9 @ encoding: [0xe2,0x00,0x60,0xf2]
> + %tmp3 = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
> + ret <4 x i32> %tmp3
> +}
> +
> +declare <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
> +declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
> +declare <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
> +
> +; CHECK: vhaddu_16xi8
> +define <16 x i8> @vhaddu_16xi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
> + %tmp1 = load <16 x i8>* %A
> + %tmp2 = load <16 x i8>* %B
> +; CHECK: vhadd.u8 q8, q8, q9 @ encoding: [0xe2,0x00,0x40,0xf3]
> + %tmp3 = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
> + ret <16 x i8> %tmp3
> +}
> +
> +; CHECK: vhaddu_8xi16
> +define <8 x i16> @vhaddu_8xi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
> + %tmp1 = load <8 x i16>* %A
> + %tmp2 = load <8 x i16>* %B
> +; CHECK: vhadd.u16 q8, q8, q9 @ encoding: [0xe2,0x00,0x50,0xf3]
> + %tmp3 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
> + ret <8 x i16> %tmp3
> +}
> +
> +; CHECK: vhaddu_4xi32
> +define <4 x i32> @vhaddu_4xi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
> + %tmp1 = load <4 x i32>* %A
> + %tmp2 = load <4 x i32>* %B
> +; CHECK: vhadd.u32 q8, q8, q9 @ encoding: [0xe2,0x00,0x60,0xf3]
> + %tmp3 = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
> + ret <4 x i32> %tmp3
> +}
> +
> +declare <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
> +declare <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
> +declare <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
> +
> +; CHECK: vrhadds_8xi8
> +define <8 x i8> @vrhadds_8xi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
> + %tmp1 = load <8 x i8>* %A
> + %tmp2 = load <8 x i8>* %B
> +; CHECK: vrhadd.s8 d16, d16, d17 @ encoding: [0xa1,0x01,0x40,0xf2]
> + %tmp3 = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
> + ret <8 x i8> %tmp3
> +}
> +
> +; CHECK: vrhadds_4xi16
> +define <4 x i16> @vrhadds_4xi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
> + %tmp1 = load <4 x i16>* %A
> + %tmp2 = load <4 x i16>* %B
> +; CHECK: vrhadd.s16 d16, d16, d17 @ encoding: [0xa1,0x01,0x50,0xf2]
> + %tmp3 = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
> + ret <4 x i16> %tmp3
> +}
> +
> +; CHECK: vrhadds_2xi32
> +define <2 x i32> @vrhadds_2xi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
> + %tmp1 = load <2 x i32>* %A
> + %tmp2 = load <2 x i32>* %B
> +; CHECK: vrhadd.s32 d16, d16, d17 @ encoding: [0xa1,0x01,0x60,0xf2]
> + %tmp3 = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
> + ret <2 x i32> %tmp3
> +}
> +
> +declare <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
> +declare <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
> +declare <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
> +
> +; CHECK: vrhaddu_8xi8
> +define <8 x i8> @vrhaddu_8xi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
> + %tmp1 = load <8 x i8>* %A
> + %tmp2 = load <8 x i8>* %B
> +; CHECK: vrhadd.u8 d16, d16, d17 @ encoding: [0xa1,0x01,0x40,0xf3]
> + %tmp3 = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
> + ret <8 x i8> %tmp3
> +}
> +
> +; CHECK: vrhaddu_4xi16
> +define <4 x i16> @vrhaddu_4xi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
> + %tmp1 = load <4 x i16>* %A
> + %tmp2 = load <4 x i16>* %B
> +; CHECK: vrhadd.u16 d16, d16, d17 @ encoding: [0xa1,0x01,0x50,0xf3]
> + %tmp3 = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
> + ret <4 x i16> %tmp3
> +}
> +
> +; CHECK: vrhaddu_2xi32
> +define <2 x i32> @vrhaddu_2xi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
> + %tmp1 = load <2 x i32>* %A
> + %tmp2 = load <2 x i32>* %B
> +; CHECK: vrhadd.u32 d16, d16, d17 @ encoding: [0xa1,0x01,0x60,0xf3]
> + %tmp3 = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
> + ret <2 x i32> %tmp3
> +}
> +
> +declare <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
> +declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
> +declare <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
> +
> +; CHECK: vrhadds_16xi8
> +define <16 x i8> @vrhadds_16xi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
> + %tmp1 = load <16 x i8>* %A
> + %tmp2 = load <16 x i8>* %B
> +; CHECK: vrhadd.s8 q8, q8, q9 @ encoding: [0xe2,0x01,0x40,0xf2]
> + %tmp3 = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
> + ret <16 x i8> %tmp3
> +}
> +
> +; CHECK: vrhadds_8xi16
> +define <8 x i16> @vrhadds_8xi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
> + %tmp1 = load <8 x i16>* %A
> + %tmp2 = load <8 x i16>* %B
> +; CHECK: vrhadd.s16 q8, q8, q9 @ encoding: [0xe2,0x01,0x50,0xf2]
> + %tmp3 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
> + ret <8 x i16> %tmp3
> +}
> +
> +; CHECK: vrhadds_4xi32
> +define <4 x i32> @vrhadds_4xi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
> + %tmp1 = load <4 x i32>* %A
> + %tmp2 = load <4 x i32>* %B
> +; CHECK: vrhadd.s32 q8, q8, q9 @ encoding: [0xe2,0x01,0x60,0xf2]
> + %tmp3 = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
> + ret <4 x i32> %tmp3
> +}
> +
> +declare <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
> +declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
> +declare <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
> +
> +; CHECK: vrhaddu_16xi8
> +define <16 x i8> @vrhaddu_16xi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
> + %tmp1 = load <16 x i8>* %A
> + %tmp2 = load <16 x i8>* %B
> +; CHECK: vrhadd.u8 q8, q8, q9 @ encoding: [0xe2,0x01,0x40,0xf3]
> + %tmp3 = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
> + ret <16 x i8> %tmp3
> +}
> +
> +; CHECK: vrhaddu_8xi16
> +define <8 x i16> @vrhaddu_8xi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
> + %tmp1 = load <8 x i16>* %A
> + %tmp2 = load <8 x i16>* %B
> +; CHECK: vrhadd.u16 q8, q8, q9 @ encoding: [0xe2,0x01,0x50,0xf3]
> + %tmp3 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
> + ret <8 x i16> %tmp3
> +}
> +
> +; CHECK: vrhaddu_4xi32
> +define <4 x i32> @vrhaddu_4xi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
> + %tmp1 = load <4 x i32>* %A
> + %tmp2 = load <4 x i32>* %B
> +; CHECK: vrhadd.u32 q8, q8, q9 @ encoding: [0xe2,0x01,0x60,0xf3]
> + %tmp3 = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
> + ret <4 x i32> %tmp3
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list