[llvm-commits] [llvm] r118453 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp lib/Target/ARM/ARMISelLowering.h lib/Target/ARM/ARMInstrNEON.td test/CodeGen/ARM/vceq.ll test/CodeGen/ARM/vcge.ll test/CodeGen/ARM/vcgt.ll test/MC/ARM/neon-cmp-encoding.s
Owen Anderson
resistor at mac.com
Mon Nov 8 15:21:22 PST 2010
Author: resistor
Date: Mon Nov 8 17:21:22 2010
New Revision: 118453
URL: http://llvm.org/viewvc/llvm-project?rev=118453&view=rev
Log:
Add support for ARM's specialized vector-compare-against-zero instructions.
Modified:
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
llvm/trunk/lib/Target/ARM/ARMISelLowering.h
llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
llvm/trunk/test/CodeGen/ARM/vceq.ll
llvm/trunk/test/CodeGen/ARM/vcge.ll
llvm/trunk/test/CodeGen/ARM/vcgt.ll
llvm/trunk/test/MC/ARM/neon-cmp-encoding.s
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=118453&r1=118452&r2=118453&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Mon Nov 8 17:21:22 2010
@@ -3074,7 +3074,38 @@
if (Swap)
std::swap(Op0, Op1);
- SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+ // If one of the operands is a constant vector zero, attempt to fold the
+ // comparison to a specialized compare-against-zero form.
+ SDValue SingleOp;
+ if (ISD::isBuildVectorAllZeros(Op1.getNode()))
+ SingleOp = Op0;
+ else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
+ if (Opc == ARMISD::VCGE)
+ Opc = ARMISD::VCLEZ;
+ else if (Opc == ARMISD::VCGT)
+ Opc = ARMISD::VCLTZ;
+ SingleOp = Op1;
+ }
+
+ SDValue Result;
+ if (SingleOp.getNode()) {
+ switch (Opc) {
+ case ARMISD::VCEQ:
+ Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
+ case ARMISD::VCGE:
+ Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
+ case ARMISD::VCLEZ:
+ Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
+ case ARMISD::VCGT:
+ Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
+ case ARMISD::VCLTZ:
+ Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
+ default:
+ Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+ }
+ } else {
+ Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+ }
if (Invert)
Result = DAG.getNOT(dl, Result, VT);
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=118453&r1=118452&r2=118453&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Mon Nov 8 17:21:22 2010
@@ -87,9 +87,14 @@
PRELOAD, // Preload
VCEQ, // Vector compare equal.
+ VCEQZ, // Vector compare equal to zero.
VCGE, // Vector compare greater than or equal.
+ VCGEZ, // Vector compare greater than or equal to zero.
+ VCLEZ, // Vector compare less than or equal to zero.
VCGEU, // Vector compare unsigned greater than or equal.
VCGT, // Vector compare greater than.
+ VCGTZ, // Vector compare greater than zero.
+ VCLTZ, // Vector compare less than zero.
VCGTU, // Vector compare unsigned greater than.
VTST, // Vector test bits.
Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=118453&r1=118452&r2=118453&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Mon Nov 8 17:21:22 2010
@@ -16,11 +16,17 @@
//===----------------------------------------------------------------------===//
def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
+def SDTARMVCMPZ : SDTypeProfile<1, 1, []>;
def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
+def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
+def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
+def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
+def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
+def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>;
@@ -2150,36 +2156,44 @@
// First with only element sizes of 8, 16 and 32 bits:
multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op4, string opc, string Dt,
- string asm> {
+ string asm, SDNode OpNode> {
// 64-bit vector types.
def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
(outs DPR:$dst), (ins DPR:$src), NoItinerary,
- opc, !strconcat(Dt, "8"), asm, "", []>;
+ opc, !strconcat(Dt, "8"), asm, "",
+ [(set DPR:$dst, (v8i8 (OpNode (v8i8 DPR:$src))))]>;
def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
(outs DPR:$dst), (ins DPR:$src), NoItinerary,
- opc, !strconcat(Dt, "16"), asm, "", []>;
+ opc, !strconcat(Dt, "16"), asm, "",
+ [(set DPR:$dst, (v4i16 (OpNode (v4i16 DPR:$src))))]>;
def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
(outs DPR:$dst), (ins DPR:$src), NoItinerary,
- opc, !strconcat(Dt, "32"), asm, "", []>;
+ opc, !strconcat(Dt, "32"), asm, "",
+ [(set DPR:$dst, (v2i32 (OpNode (v2i32 DPR:$src))))]>;
def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
(outs DPR:$dst), (ins DPR:$src), NoItinerary,
- opc, "f32", asm, "", []> {
+ opc, "f32", asm, "",
+ [(set DPR:$dst, (v2f32 (OpNode (v2f32 DPR:$src))))]> {
let Inst{10} = 1; // overwrite F = 1
}
// 128-bit vector types.
def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
(outs QPR:$dst), (ins QPR:$src), NoItinerary,
- opc, !strconcat(Dt, "8"), asm, "", []>;
+ opc, !strconcat(Dt, "8"), asm, "",
+ [(set QPR:$dst, (v16i8 (OpNode (v16i8 QPR:$src))))]>;
def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
(outs QPR:$dst), (ins QPR:$src), NoItinerary,
- opc, !strconcat(Dt, "16"), asm, "", []>;
+ opc, !strconcat(Dt, "16"), asm, "",
+ [(set QPR:$dst, (v8i16 (OpNode (v8i16 QPR:$src))))]>;
def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
(outs QPR:$dst), (ins QPR:$src), NoItinerary,
- opc, !strconcat(Dt, "32"), asm, "", []>;
+ opc, !strconcat(Dt, "32"), asm, "",
+ [(set QPR:$dst, (v4i32 (OpNode (v4i32 QPR:$src))))]>;
def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
(outs QPR:$dst), (ins QPR:$src), NoItinerary,
- opc, "f32", asm, "", []> {
+ opc, "f32", asm, "",
+ [(set QPR:$dst, (v4f32 (OpNode (v4f32 QPR:$src))))]> {
let Inst{10} = 1; // overwrite F = 1
}
}
@@ -3220,9 +3234,9 @@
NEONvceq, 1>;
def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
NEONvceq, 1>;
-// For disassembly only.
+
defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
- "$dst, $src, #0">;
+ "$dst, $src, #0", NEONvceqz>;
// VCGE : Vector Compare Greater Than or Equal
defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
@@ -3233,14 +3247,11 @@
NEONvcge, 0>;
def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
NEONvcge, 0>;
-// For disassembly only.
-// FIXME: This instruction's encoding MAY NOT BE correct.
+
defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
- "$dst, $src, #0">;
-// For disassembly only.
-// FIXME: This instruction's encoding MAY NOT BE correct.
+ "$dst, $src, #0", NEONvcgez>;
defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
- "$dst, $src, #0">;
+ "$dst, $src, #0", NEONvclez>;
// VCGT : Vector Compare Greater Than
defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
@@ -3251,14 +3262,11 @@
NEONvcgt, 0>;
def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
NEONvcgt, 0>;
-// For disassembly only.
-// FIXME: This instruction's encoding MAY NOT BE correct.
+
defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
- "$dst, $src, #0">;
-// For disassembly only.
-// FIXME: This instruction's encoding MAY NOT BE correct.
+ "$dst, $src, #0", NEONvcgtz>;
defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
- "$dst, $src, #0">;
+ "$dst, $src, #0", NEONvcltz>;
// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
Modified: llvm/trunk/test/CodeGen/ARM/vceq.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vceq.ll?rev=118453&r1=118452&r2=118453&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vceq.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vceq.ll Mon Nov 8 17:21:22 2010
@@ -79,3 +79,14 @@
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
}
+
+define <8 x i8> @vceqi8Z(<8 x i8>* %A) nounwind {
+;CHECK: vceqi8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vceq.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp3 = icmp eq <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+ %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+ ret <8 x i8> %tmp4
+}
Modified: llvm/trunk/test/CodeGen/ARM/vcge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vcge.ll?rev=118453&r1=118452&r2=118453&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vcge.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vcge.ll Mon Nov 8 17:21:22 2010
@@ -160,3 +160,25 @@
declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone
declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i8> @vcgei8Z(<8 x i8>* %A) nounwind {
+;CHECK: vcgei8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vcge.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp3 = icmp sge <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+ %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+ ret <8 x i8> %tmp4
+}
+
+define <8 x i8> @vclei8Z(<8 x i8>* %A) nounwind {
+;CHECK: vclei8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vcle.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp3 = icmp sle <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+ %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+ ret <8 x i8> %tmp4
+}
Modified: llvm/trunk/test/CodeGen/ARM/vcgt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vcgt.ll?rev=118453&r1=118452&r2=118453&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vcgt.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vcgt.ll Mon Nov 8 17:21:22 2010
@@ -173,3 +173,25 @@
declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone
declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i8> @vcgti8Z(<8 x i8>* %A) nounwind {
+;CHECK: vcgti8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vcgt.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp3 = icmp sgt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+ %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+ ret <8 x i8> %tmp4
+}
+
+define <8 x i8> @vclti8Z(<8 x i8>* %A) nounwind {
+;CHECK: vclti8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vclt.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp3 = icmp slt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+ %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+ ret <8 x i8> %tmp4
+}
Modified: llvm/trunk/test/MC/ARM/neon-cmp-encoding.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/neon-cmp-encoding.s?rev=118453&r1=118452&r2=118453&view=diff
==============================================================================
--- llvm/trunk/test/MC/ARM/neon-cmp-encoding.s (original)
+++ llvm/trunk/test/MC/ARM/neon-cmp-encoding.s Mon Nov 8 17:21:22 2010
@@ -102,3 +102,14 @@
vtst.16 q8, q8, q9
@ CHECK: vtst.32 q8, q8, q9 @ encoding: [0xf2,0x08,0x60,0xf2]
vtst.32 q8, q8, q9
+
+@ CHECK: vceq.i8 d16, d16, #0 @ encoding: [0x20,0x01,0xf1,0xf3]
+ vceq.i8 d16, d16, #0
+@ CHECK: vcge.s8 d16, d16, #0 @ encoding: [0xa0,0x00,0xf1,0xf3]
+ vcge.s8 d16, d16, #0
+@ CHECK: vcle.s8 d16, d16, #0 @ encoding: [0xa0,0x01,0xf1,0xf3]
+ vcle.s8 d16, d16, #0
+@ CHECK: vcgt.s8 d16, d16, #0 @ encoding: [0x20,0x00,0xf1,0xf3]
+ vcgt.s8 d16, d16, #0
+@ CHECK: vclt.s8 d16, d16, #0 @ encoding: [0x20,0x02,0xf1,0xf3]
+ vclt.s8 d16, d16, #0
More information about the llvm-commits
mailing list