[llvm] r350553 - [ARM] ComputeKnownBits to handle extract vectors
Diogo N. Sampaio via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 7 11:01:47 PST 2019
Author: dnsampaio
Date: Mon Jan 7 11:01:47 2019
New Revision: 350553
URL: http://llvm.org/viewvc/llvm-project?rev=350553&view=rev
Log:
[ARM] ComputeKnownBits to handle extract vectors
This patch adds the sign/zero extension done by
vgetlane to ARM computeKnownBitsForTargetNode.
Differential revision: https://reviews.llvm.org/D56098
Modified:
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
llvm/trunk/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=350553&r1=350552&r2=350553&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Mon Jan 7 11:01:47 2019
@@ -13633,6 +13633,33 @@ void ARMTargetLowering::computeKnownBits
Known.One &= Mask;
return;
}
+ case ARMISD::VGETLANEs:
+ case ARMISD::VGETLANEu: {
+ const SDValue &SrcSV = Op.getOperand(0);
+ EVT VecVT = SrcSV.getValueType();
+ assert(VecVT.isVector() && "VGETLANE expected a vector type");
+ const unsigned NumSrcElts = VecVT.getVectorNumElements();
+ ConstantSDNode *Pos = cast<ConstantSDNode>(Op.getOperand(1).getNode());
+ assert(Pos->getAPIntValue().ult(NumSrcElts) &&
+ "VGETLANE index out of bounds");
+ unsigned Idx = Pos->getZExtValue();
+ APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
+ Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1);
+
+ EVT VT = Op.getValueType();
+ const unsigned DstSz = VT.getScalarSizeInBits();
+ const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
+ assert(SrcSz == Known.getBitWidth());
+ assert(DstSz > SrcSz);
+ if (Op.getOpcode() == ARMISD::VGETLANEs)
+ Known = Known.sext(DstSz);
+ else {
+ Known = Known.zext(DstSz);
+ Known.Zero.setBitsFrom(SrcSz);
+ }
+ assert(DstSz == Known.getBitWidth());
+ break;
+ }
}
}
Modified: llvm/trunk/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll?rev=350553&r1=350552&r2=350553&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll Mon Jan 7 11:01:47 2019
@@ -1,50 +1,52 @@
; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
-; CHECK-LABEL: f:
define float @f(<4 x i16>* nocapture %in) {
- ; CHECK: vld1
- ; CHECK: vmovl.u16
- ; CHECK-NOT: vand
+; CHECK-LABEL: f:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.16 {d16}, [r0:64]
+; CHECK-NEXT: vmovl.u16 q8, d16
+; CHECK-NEXT: vcvt.f32.u32 q0, q8
+; CHECK-NEXT: vadd.f32 s4, s0, s1
+; CHECK-NEXT: vadd.f32 s0, s4, s2
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%1 = load <4 x i16>, <4 x i16>* %in
- ; CHECK: vcvt.f32.u32
%2 = uitofp <4 x i16> %1 to <4 x float>
%3 = extractelement <4 x float> %2, i32 0
%4 = extractelement <4 x float> %2, i32 1
%5 = extractelement <4 x float> %2, i32 2
- ; CHECK: vadd.f32
%6 = fadd float %3, %4
%7 = fadd float %6, %5
ret float %7
}
-; CHECK-LABEL: g:
define float @g(<4 x i16>* nocapture %in) {
- ; CHECK: vldr
+; CHECK-LABEL: g:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r0]
+; CHECK-NEXT: vmov.u16 r0, d16[0]
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vcvt.f32.u32 s0, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
%1 = load <4 x i16>, <4 x i16>* %in
-
- ; For now we're generating a vmov.16 and a uxth instruction.
- ; The uxth is redundant, and we should be able to extend without
- ; having to generate cross-domain copies. Once we can do this
- ; we should modify the checks below.
-
- ; CHECK: uxth
%2 = extractelement <4 x i16> %1, i32 0
- ; CHECK: vcvt.f32.u32
%3 = uitofp i16 %2 to float
ret float %3
}
; Make sure we generate zext from <4 x i8> to <4 x 32>.
-
-; CHECK-LABEL: h:
-; CHECK: vld1.32
-; CHECK: vmovl.u8 q8, d16
-; CHECK: vmovl.u16 q8, d16
-; CHECK: vmov r0, r1, d16
-; CHECK: vmov r2, r3, d17
define <4 x i32> @h(<4 x i8> *%in) {
+; CHECK-LABEL: h:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32]
+; CHECK-NEXT: vmovl.u8 q8, d16
+; CHECK-NEXT: vmovl.u16 q8, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: bx lr
%1 = load <4 x i8>, <4 x i8>* %in, align 4
%2 = extractelement <4 x i8> %1, i32 0
%3 = zext i8 %2 to i32
@@ -60,3 +62,79 @@ define <4 x i32> @h(<4 x i8> *%in) {
%13 = insertelement <4 x i32> %10, i32 %12, i32 3
ret <4 x i32> %13
}
+
+define float @i(<4 x i16>* nocapture %in) {
+ ; FIXME: The vmov.u + sxt can convert to a vmov.s
+; CHECK-LABEL: i:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r0]
+; CHECK-NEXT: vmov.u16 r0, d16[0]
+; CHECK-NEXT: sxth r0, r0
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vcvt.f32.s32 s0, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+ %1 = load <4 x i16>, <4 x i16>* %in
+ %2 = extractelement <4 x i16> %1, i32 0
+ %3 = sitofp i16 %2 to float
+ ret float %3
+}
+
+define float @j(<8 x i8>* nocapture %in) {
+; CHECK-LABEL: j:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r0]
+; CHECK-NEXT: vmov.u8 r0, d16[7]
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vcvt.f32.u32 s0, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+ %1 = load <8 x i8>, <8 x i8>* %in
+ %2 = extractelement <8 x i8> %1, i32 7
+ %3 = uitofp i8 %2 to float
+ ret float %3
+}
+
+define float @k(<8 x i8>* nocapture %in) {
+; FIXME: The vmov.u + sxt can convert to a vmov.s
+; CHECK-LABEL: k:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vldr d16, [r0]
+; CHECK-NEXT: vmov.u8 r0, d16[7]
+; CHECK-NEXT: sxtb r0, r0
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vcvt.f32.s32 s0, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+ %1 = load <8 x i8>, <8 x i8>* %in
+ %2 = extractelement <8 x i8> %1, i32 7
+ %3 = sitofp i8 %2 to float
+ ret float %3
+}
+
+define float @KnownUpperZero(<4 x i16> %v) {
+; FIXME: uxtb are not required
+; CHECK-LABEL: KnownUpperZero:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov.i16 d16, #0x3
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vand d16, d17, d16
+; CHECK-NEXT: vmov.u16 r0, d16[0]
+; CHECK-NEXT: vmov.u16 r1, d16[3]
+; CHECK-NEXT: uxtb r0, r0
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: uxtb r0, r1
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcvt.f32.s32 s0, s0
+; CHECK-NEXT: vcvt.f32.s32 s2, s2
+; CHECK-NEXT: vadd.f32 s0, s2, s0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+ %1 = and <4 x i16> %v, <i16 3,i16 3,i16 3,i16 3>
+ %2 = extractelement <4 x i16> %1, i32 3
+ %3 = extractelement <4 x i16> %1, i32 0
+ %sinf1 = sitofp i16 %2 to float
+ %sinf2 = sitofp i16 %3 to float
+ %sum = fadd float %sinf1, %sinf2
+ ret float %sum
+}
More information about the llvm-commits
mailing list