[llvm] r333760 - [Hexagon] Select HVX code for vector CTPOP, CTLZ, and CTTZ
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 1 07:52:58 PDT 2018
Author: kparzysz
Date: Fri Jun 1 07:52:58 2018
New Revision: 333760
URL: http://llvm.org/viewvc/llvm-project?rev=333760&view=rev
Log:
[Hexagon] Select HVX code for vector CTPOP, CTLZ, and CTTZ
Added:
llvm/trunk/test/CodeGen/Hexagon/autohvx/bitcount-128b.ll
llvm/trunk/test/CodeGen/Hexagon/autohvx/bitcount-64b.ll
Modified:
llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.h
llvm/trunk/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
llvm/trunk/lib/Target/Hexagon/HexagonPatternsHVX.td
Modified: llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.h?rev=333760&r1=333759&r2=333760&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.h (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.h Fri Jun 1 07:52:58 2018
@@ -425,6 +425,7 @@ namespace HexagonISD {
SDValue LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxMul(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const;
Modified: llvm/trunk/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp?rev=333760&r1=333759&r2=333760&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp Fri Jun 1 07:52:58 2018
@@ -69,21 +69,25 @@ HexagonTargetLowering::initializeHVXLowe
setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal);
setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
- setOperationAction(ISD::AND, ByteV, Legal);
- setOperationAction(ISD::OR, ByteV, Legal);
- setOperationAction(ISD::XOR, ByteV, Legal);
for (MVT T : LegalV) {
setIndexedLoadAction(ISD::POST_INC, T, Legal);
setIndexedStoreAction(ISD::POST_INC, T, Legal);
- setOperationAction(ISD::ADD, T, Legal);
- setOperationAction(ISD::SUB, T, Legal);
+ setOperationAction(ISD::AND, T, Legal);
+ setOperationAction(ISD::OR, T, Legal);
+ setOperationAction(ISD::XOR, T, Legal);
+ setOperationAction(ISD::ADD, T, Legal);
+ setOperationAction(ISD::SUB, T, Legal);
+ setOperationAction(ISD::CTPOP, T, Legal);
+ setOperationAction(ISD::CTLZ, T, Legal);
if (T != ByteV) {
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal);
+ setOperationAction(ISD::BSWAP, T, Legal);
}
+ setOperationAction(ISD::CTTZ, T, Custom);
setOperationAction(ISD::LOAD, T, Custom);
setOperationAction(ISD::MUL, T, Custom);
setOperationAction(ISD::MULHS, T, Custom);
@@ -104,6 +108,9 @@ HexagonTargetLowering::initializeHVXLowe
setOperationAction(ISD::SRA, T, Custom);
setOperationAction(ISD::SHL, T, Custom);
setOperationAction(ISD::SRL, T, Custom);
+
+ // Promote all shuffles to operate on vectors of bytes.
+ setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
}
setCondCodeAction(ISD::SETNE, T, Expand);
@@ -115,16 +122,6 @@ HexagonTargetLowering::initializeHVXLowe
setCondCodeAction(ISD::SETULT, T, Expand);
}
- for (MVT T : LegalV) {
- if (T == ByteV)
- continue;
- // Promote all shuffles to operate on vectors of bytes.
- setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
- setPromoteTo(ISD::AND, T, ByteV);
- setPromoteTo(ISD::OR, T, ByteV);
- setPromoteTo(ISD::XOR, T, ByteV);
- }
-
for (MVT T : LegalW) {
// Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
// independent) handling of it would convert it to a load, which is
@@ -145,6 +142,9 @@ HexagonTargetLowering::initializeHVXLowe
setOperationAction(ISD::LOAD, T, Custom);
setOperationAction(ISD::STORE, T, Custom);
+ setOperationAction(ISD::CTLZ, T, Custom);
+ setOperationAction(ISD::CTTZ, T, Custom);
+ setOperationAction(ISD::CTPOP, T, Custom);
setOperationAction(ISD::ADD, T, Legal);
setOperationAction(ISD::SUB, T, Legal);
@@ -1158,6 +1158,40 @@ HexagonTargetLowering::LowerHvxZeroExt(S
}
SDValue
+HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
+ // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
+ // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
+ const SDLoc &dl(Op);
+ MVT ResTy = ty(Op);
+ SDValue InpV = Op.getOperand(0);
+ assert(ResTy == ty(InpV));
+
+ // Calculate the vectors of 1 and bitwidth(x).
+ MVT ElemTy = ty(InpV).getVectorElementType();
+ unsigned ElemWidth = ElemTy.getSizeInBits();
+ uint32_t Splat1 = 0, SplatW = 0;
+ assert(isPowerOf2_32(ElemWidth) && ElemWidth <= 32);
+ for (unsigned i = 0; i != 32/ElemWidth; ++i) {
+ Splat1 = (Splat1 << ElemWidth) | 1;
+ SplatW = (SplatW << ElemWidth) | ElemWidth;
+ }
+ SDValue Vec1 = DAG.getNode(HexagonISD::VSPLATW, dl, ResTy,
+ DAG.getConstant(Splat1, dl, MVT::i32));
+ SDValue VecW = DAG.getNode(HexagonISD::VSPLATW, dl, ResTy,
+ DAG.getConstant(SplatW, dl, MVT::i32));
+ SDValue VecN1 = DAG.getNode(HexagonISD::VSPLATW, dl, ResTy,
+ DAG.getConstant(-1, dl, MVT::i32));
+ // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
+ // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
+ // it separately in custom combine or selection).
+ SDValue A = DAG.getNode(ISD::AND, dl, ResTy,
+ {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}),
+ DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})});
+ return DAG.getNode(ISD::SUB, dl, ResTy,
+ {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
+}
+
+SDValue
HexagonTargetLowering::LowerHvxMul(SDValue Op, SelectionDAG &DAG) const {
MVT ResTy = ty(Op);
assert(ResTy.isVector() && isHvxSingleTy(ResTy));
@@ -1422,6 +1456,8 @@ HexagonTargetLowering::LowerHvxOperation
case ISD::LOAD:
case ISD::STORE:
return SplitHvxMemOp(Op, DAG);
+ case ISD::CTLZ:
+ case ISD::CTTZ:
case ISD::MUL:
case ISD::MULHS:
case ISD::MULHU:
@@ -1451,6 +1487,7 @@ HexagonTargetLowering::LowerHvxOperation
case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
+ case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
case ISD::SRA:
case ISD::SHL:
case ISD::SRL: return LowerHvxShift(Op, DAG);
Modified: llvm/trunk/lib/Target/Hexagon/HexagonPatternsHVX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonPatternsHVX.td?rev=333760&r1=333759&r2=333760&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonPatternsHVX.td (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonPatternsHVX.td Fri Jun 1 07:52:58 2018
@@ -250,7 +250,19 @@ let Predicates = [UseHVX] in {
def: Pat<(VecPI32 (HexagonVSPLATW I32:$Rs)), (Rep (Vsplatrw $Rs))>;
}
+class Vneg1<ValueType VecTy>
+ : PatFrag<(ops), (VecTy (HexagonVSPLATW (i32 -1)))>;
+
+class Vnot<ValueType VecTy>
+ : PatFrag<(ops node:$Vs), (xor $Vs, Vneg1<VecTy>)>;
+
let Predicates = [UseHVX] in {
+ let AddedComplexity = 200 in {
+ def: Pat<(Vnot<VecI8> HVI8:$Vs), (V6_vnot HvxVR:$Vs)>;
+ def: Pat<(Vnot<VecI16> HVI16:$Vs), (V6_vnot HvxVR:$Vs)>;
+ def: Pat<(Vnot<VecI32> HVI32:$Vs), (V6_vnot HvxVR:$Vs)>;
+ }
+
def: OpR_RR_pat<V6_vaddb, Add, VecI8, HVI8>;
def: OpR_RR_pat<V6_vaddh, Add, VecI16, HVI16>;
def: OpR_RR_pat<V6_vaddw, Add, VecI32, HVI32>;
@@ -378,6 +390,21 @@ let Predicates = [UseHVX] in {
(V6_vdelta HvxVR:$Vs, (V6_lvsplatw (A2_tfrsi 0x01010101)))>;
def: Pat<(VecI32 (bswap HVI32:$Vs)),
(V6_vdelta HvxVR:$Vs, (V6_lvsplatw (A2_tfrsi 0x03030303)))>;
+
+ def: Pat<(VecI8 (ctpop HVI8:$Vs)),
+ (V6_vpackeb (V6_vpopcounth (HiVec (V6_vunpackub HvxVR:$Vs))),
+ (V6_vpopcounth (LoVec (V6_vunpackub HvxVR:$Vs))))>;
+ def: Pat<(VecI16 (ctpop HVI16:$Vs)), (V6_vpopcounth HvxVR:$Vs)>;
+ def: Pat<(VecI32 (ctpop HVI32:$Vs)),
+ (V6_vaddw (LoVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))),
+ (HiVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))))>;
+
+ def: Pat<(VecI8 (ctlz HVI8:$Vs)),
+ (V6_vsubb (V6_vpackeb (V6_vcl0h (HiVec (V6_vunpackub HvxVR:$Vs))),
+ (V6_vcl0h (LoVec (V6_vunpackub HvxVR:$Vs)))),
+ (V6_lvsplatw (A2_tfrsi 0x08080808)))>;
+ def: Pat<(VecI16 (ctlz HVI16:$Vs)), (V6_vcl0h HvxVR:$Vs)>;
+ def: Pat<(VecI32 (ctlz HVI32:$Vs)), (V6_vcl0w HvxVR:$Vs)>;
}
class HvxSel_pat<InstHexagon MI, PatFrag RegPred>
Added: llvm/trunk/test/CodeGen/Hexagon/autohvx/bitcount-128b.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/autohvx/bitcount-128b.ll?rev=333760&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/autohvx/bitcount-128b.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/autohvx/bitcount-128b.ll Fri Jun 1 07:52:58 2018
@@ -0,0 +1,124 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; CHECK-LABEL: f0
+; CHECK: v[[V00:[0-9]+]]:[[V01:[0-9]+]].uh = vunpack(v0.ub)
+; CHECK-DAG: v[[V02:[0-9]+]].h = vpopcount(v[[V00]].h)
+; CHECK-DAG: v[[V03:[0-9]+]].h = vpopcount(v[[V01]].h)
+; CHECK: v0.b = vpacke(v[[V02]].h,v[[V03]].h)
+define <128 x i8> @f0(<128 x i8> %a0) #0 {
+ %t0 = call <128 x i8> @llvm.ctpop.v128i8(<128 x i8> %a0)
+ ret <128 x i8> %t0
+}
+
+; CHECK-LABEL: f1
+; CHECK: v0.h = vpopcount(v0.h)
+define <64 x i16> @f1(<64 x i16> %a0) #0 {
+ %t0 = call <64 x i16> @llvm.ctpop.v64i16(<64 x i16> %a0)
+ ret <64 x i16> %t0
+}
+
+; CHECK-LABEL: f2
+; CHECK: v[[V20:[0-9]+]].h = vpopcount(v0.h)
+; CHECK: v[[V21:[0-9]+]]:[[V22:[0-9]+]].uw = vzxt(v[[V20]].uh)
+; CHECK: v0.w = vadd(v[[V22]].w,v[[V21]].w)
+define <32 x i32> @f2(<32 x i32> %a0) #0 {
+ %t0 = call <32 x i32> @llvm.ctpop.v32i32(<32 x i32> %a0)
+ ret <32 x i32> %t0
+}
+
+; CHECK-LABEL: f3
+; CHECK-DAG: r[[R30:[0-9]+]] = ##134744072
+; CHECK-DAG: v[[V31:[0-9]+]]:[[V32:[0-9]+]].uh = vunpack(v0.ub)
+; CHECK: v[[V33:[0-9]+]] = vsplat(r[[R30]])
+; CHECK-DAG: v[[V34:[0-9]+]].uh = vcl0(v[[V31]].uh)
+; CHECK-DAG: v[[V35:[0-9]+]].uh = vcl0(v[[V32]].uh)
+; CHECK: v[[V36:[0-9]+]].b = vpacke(v[[V34]].h,v[[V35]].h)
+; CHECK: v0.b = vsub(v[[V36]].b,v[[V33]].b)
+define <128 x i8> @f3(<128 x i8> %a0) #0 {
+ %t0 = call <128 x i8> @llvm.ctlz.v128i8(<128 x i8> %a0)
+ ret <128 x i8> %t0
+}
+
+; CHECK-LABEL: f4
+; CHECK: v0.uh = vcl0(v0.uh)
+define <64 x i16> @f4(<64 x i16> %a0) #0 {
+ %t0 = call <64 x i16> @llvm.ctlz.v64i16(<64 x i16> %a0)
+ ret <64 x i16> %t0
+}
+
+; CHECK-LABEL: f5
+; CHECK: v0.uw = vcl0(v0.uw)
+define <32 x i32> @f5(<32 x i32> %a0) #0 {
+ %t0 = call <32 x i32> @llvm.ctlz.v32i32(<32 x i32> %a0)
+ ret <32 x i32> %t0
+}
+
+; CHECK-LABEL: f6
+; r = 0x01010101
+; CHECK-DAG: r[[R60:[0-9]+]] = ##16843009
+; CHECK-DAG: v[[V61:[0-9]+]] = vnot(v0)
+; r = 0x08080808
+; CHECK-DAG: r[[R62:[0-9]+]] = ##134744072
+; CHECK: v[[V63:[0-9]+]] = vsplat(r[[R60]])
+; CHECK-DAG: v[[V64:[0-9]+]] = vsplat(r[[R62]])
+; CHECK: v[[V65:[0-9]+]].b = vsub(v0.b,v[[V63]].b)
+; CHECK: v[[V66:[0-9]+]] = vand(v[[V61]],v[[V65]])
+; Ctlz:
+; CHECK: v[[V67:[0-9]+]]:[[V68:[0-9]+]].uh = vunpack(v[[V66]].ub)
+; CHECK: v[[V69:[0-9]+]].uh = vcl0(v[[V68]].uh)
+; CHECK: v[[V6A:[0-9]+]].uh = vcl0(v[[V67]].uh)
+; CHECK: v[[V6B:[0-9]+]].b = vpacke(v[[V6A]].h,v[[V69]].h)
+; CHECK: v[[V6C:[0-9]+]].b = vsub(v[[V6B]].b,v[[V64]].b)
+; CHECK: v0.b = vsub(v[[V64]].b,v[[V6C]].b)
+define <128 x i8> @f6(<128 x i8> %a0) #0 {
+ %t0 = call <128 x i8> @llvm.cttz.v128i8(<128 x i8> %a0)
+ ret <128 x i8> %t0
+}
+
+; CHECK-LABEL: f7
+; r = 0x00010001
+; CHECK-DAG: r[[R70:[0-9]+]] = ##65537
+; CHECK-DAG: v[[V71:[0-9]+]] = vnot(v0)
+; r = 0x00100010 // halfword bitwidths
+; CHECK-DAG: r[[R72:[0-9]+]] = ##1048592
+; CHECK: v[[V73:[0-9]+]] = vsplat(r[[R70]])
+; CHECK: v[[V74:[0-9]+]] = vsplat(r[[R72]])
+; CHECK: v[[V75:[0-9]+]].h = vsub(v0.h,v[[V73]].h)
+; CHECK: v[[V76:[0-9]+]] = vand(v[[V71]],v[[V75]])
+; Ctlz:
+; CHECK: v[[V77:[0-9]+]].uh = vcl0(v[[V76]].uh)
+; CHECK: v0.h = vsub(v[[V74]].h,v[[V77]].h)
+define <64 x i16> @f7(<64 x i16> %a0) #0 {
+ %t0 = call <64 x i16> @llvm.cttz.v64i16(<64 x i16> %a0)
+ ret <64 x i16> %t0
+}
+
+; CHECK-LABEL: f8
+; CHECK-DAG: r[[R80:[0-9]+]] = #1
+; CHECK-DAG: v[[V81:[0-9]+]] = vnot(v0)
+; CHECK-DAG: r[[R82:[0-9]+]] = #32
+; CHECK: v[[V83:[0-9]+]] = vsplat(r[[R80]])
+; CHECK: v[[V84:[0-9]+]] = vsplat(r[[R82]])
+; CHECK: v[[V85:[0-9]+]].w = vsub(v0.w,v[[V83]].w)
+; CHECK: v[[V86:[0-9]+]] = vand(v[[V81]],v[[V85]])
+; Ctlz:
+; CHECK: v[[V87:[0-9]+]].uw = vcl0(v[[V86]].uw)
+; CHECK: v0.w = vsub(v[[V84]].w,v[[V87]].w)
+define <32 x i32> @f8(<32 x i32> %a0) #0 {
+ %t0 = call <32 x i32> @llvm.cttz.v32i32(<32 x i32> %a0)
+ ret <32 x i32> %t0
+}
+
+declare <128 x i8> @llvm.ctpop.v128i8(<128 x i8>) #0
+declare <64 x i16> @llvm.ctpop.v64i16(<64 x i16>) #0
+declare <32 x i32> @llvm.ctpop.v32i32(<32 x i32>) #0
+
+declare <128 x i8> @llvm.ctlz.v128i8(<128 x i8>) #0
+declare <64 x i16> @llvm.ctlz.v64i16(<64 x i16>) #0
+declare <32 x i32> @llvm.ctlz.v32i32(<32 x i32>) #0
+
+declare <128 x i8> @llvm.cttz.v128i8(<128 x i8>) #0
+declare <64 x i16> @llvm.cttz.v64i16(<64 x i16>) #0
+declare <32 x i32> @llvm.cttz.v32i32(<32 x i32>) #0
+
+attributes #0 = { readnone nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b,-packets" }
Added: llvm/trunk/test/CodeGen/Hexagon/autohvx/bitcount-64b.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/autohvx/bitcount-64b.ll?rev=333760&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/autohvx/bitcount-64b.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/autohvx/bitcount-64b.ll Fri Jun 1 07:52:58 2018
@@ -0,0 +1,125 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; CHECK-LABEL: f0
+; CHECK: v[[V00:[0-9]+]]:[[V01:[0-9]+]].uh = vunpack(v0.ub)
+; CHECK-DAG: v[[V02:[0-9]+]].h = vpopcount(v[[V00]].h)
+; CHECK-DAG: v[[V03:[0-9]+]].h = vpopcount(v[[V01]].h)
+; CHECK: v0.b = vpacke(v[[V02]].h,v[[V03]].h)
+define <64 x i8> @f0(<64 x i8> %a0) #0 {
+ %t0 = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a0)
+ ret <64 x i8> %t0
+}
+
+; CHECK-LABEL: f1
+; CHECK: v0.h = vpopcount(v0.h)
+define <32 x i16> @f1(<32 x i16> %a0) #0 {
+ %t0 = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a0)
+ ret <32 x i16> %t0
+}
+
+; CHECK-LABEL: f2
+; CHECK: v[[V20:[0-9]+]].h = vpopcount(v0.h)
+; CHECK: v[[V21:[0-9]+]]:[[V22:[0-9]+]].uw = vzxt(v[[V20]].uh)
+; CHECK: v0.w = vadd(v[[V22]].w,v[[V21]].w)
+define <16 x i32> @f2(<16 x i32> %a0) #0 {
+ %t0 = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a0)
+ ret <16 x i32> %t0
+}
+
+; CHECK-LABEL: f3
+; CHECK-DAG: r[[R30:[0-9]+]] = ##134744072
+; CHECK-DAG: v[[V31:[0-9]+]]:[[V32:[0-9]+]].uh = vunpack(v0.ub)
+; CHECK: v[[V33:[0-9]+]] = vsplat(r[[R30]])
+; CHECK-DAG: v[[V34:[0-9]+]].uh = vcl0(v[[V31]].uh)
+; CHECK-DAG: v[[V35:[0-9]+]].uh = vcl0(v[[V32]].uh)
+; CHECK: v[[V36:[0-9]+]].b = vpacke(v[[V34]].h,v[[V35]].h)
+; CHECK: v0.b = vsub(v[[V36]].b,v[[V33]].b)
+define <64 x i8> @f3(<64 x i8> %a0) #0 {
+ %t0 = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a0)
+ ret <64 x i8> %t0
+}
+
+; CHECK-LABEL: f4
+; CHECK: v0.uh = vcl0(v0.uh)
+define <32 x i16> @f4(<32 x i16> %a0) #0 {
+ %t0 = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a0)
+ ret <32 x i16> %t0
+}
+
+; CHECK-LABEL: f5
+; CHECK: v0.uw = vcl0(v0.uw)
+define <16 x i32> @f5(<16 x i32> %a0) #0 {
+ %t0 = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a0)
+ ret <16 x i32> %t0
+}
+
+; CHECK-LABEL: f6
+; r = 0x01010101
+; CHECK-DAG: r[[R60:[0-9]+]] = ##16843009
+; CHECK-DAG: v[[V61:[0-9]+]] = vnot(v0)
+; r = 0x08080808
+; CHECK-DAG: r[[R62:[0-9]+]] = ##134744072
+; CHECK: v[[V63:[0-9]+]] = vsplat(r[[R60]])
+; CHECK-DAG: v[[V64:[0-9]+]] = vsplat(r[[R62]])
+; CHECK: v[[V65:[0-9]+]].b = vsub(v0.b,v[[V63]].b)
+; CHECK: v[[V66:[0-9]+]] = vand(v[[V61]],v[[V65]])
+; Ctlz:
+; CHECK: v[[V67:[0-9]+]]:[[V68:[0-9]+]].uh = vunpack(v[[V66]].ub)
+; CHECK: v[[V69:[0-9]+]].uh = vcl0(v[[V68]].uh)
+; CHECK: v[[V6A:[0-9]+]].uh = vcl0(v[[V67]].uh)
+; CHECK: v[[V6B:[0-9]+]].b = vpacke(v[[V6A]].h,v[[V69]].h)
+; CHECK: v[[V6C:[0-9]+]].b = vsub(v[[V6B]].b,v[[V64]].b)
+; CHECK: v0.b = vsub(v[[V64]].b,v[[V6C]].b)
+define <64 x i8> @f6(<64 x i8> %a0) #0 {
+ %t0 = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a0)
+ ret <64 x i8> %t0
+}
+
+; CHECK-LABEL: f7
+; r = 0x00010001
+; CHECK-DAG: r[[R70:[0-9]+]] = ##65537
+; CHECK-DAG: v[[V71:[0-9]+]] = vnot(v0)
+; r = 0x00100010 // halfword bitwidths
+; CHECK-DAG: r[[R72:[0-9]+]] = ##1048592
+; CHECK: v[[V73:[0-9]+]] = vsplat(r[[R70]])
+; CHECK: v[[V74:[0-9]+]] = vsplat(r[[R72]])
+; CHECK: v[[V75:[0-9]+]].h = vsub(v0.h,v[[V73]].h)
+; CHECK: v[[V76:[0-9]+]] = vand(v[[V71]],v[[V75]])
+; Ctlz:
+; CHECK: v[[V77:[0-9]+]].uh = vcl0(v[[V76]].uh)
+; CHECK: v0.h = vsub(v[[V74]].h,v[[V77]].h)
+define <32 x i16> @f7(<32 x i16> %a0) #0 {
+ %t0 = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a0)
+ ret <32 x i16> %t0
+}
+
+; CHECK-LABEL: f8
+; CHECK-DAG: r[[R80:[0-9]+]] = #1
+; CHECK-DAG: v[[V81:[0-9]+]] = vnot(v0)
+; CHECK-DAG: r[[R82:[0-9]+]] = #32
+; CHECK: v[[V83:[0-9]+]] = vsplat(r[[R80]])
+; CHECK: v[[V84:[0-9]+]] = vsplat(r[[R82]])
+; CHECK: v[[V85:[0-9]+]].w = vsub(v0.w,v[[V83]].w)
+; CHECK: v[[V86:[0-9]+]] = vand(v[[V81]],v[[V85]])
+; Ctlz:
+; CHECK: v[[V87:[0-9]+]].uw = vcl0(v[[V86]].uw)
+; CHECK: v0.w = vsub(v[[V84]].w,v[[V87]].w)
+define <16 x i32> @f8(<16 x i32> %a0) #0 {
+ %t0 = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a0)
+ ret <16 x i32> %t0
+}
+
+
+declare <64 x i8> @llvm.ctpop.v64i8(<64 x i8>) #0
+declare <32 x i16> @llvm.ctpop.v32i16(<32 x i16>) #0
+declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) #0
+
+declare <64 x i8> @llvm.ctlz.v64i8(<64 x i8>) #0
+declare <32 x i16> @llvm.ctlz.v32i16(<32 x i16>) #0
+declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>) #0
+
+declare <64 x i8> @llvm.cttz.v64i8(<64 x i8>) #0
+declare <32 x i16> @llvm.cttz.v32i16(<32 x i16>) #0
+declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>) #0
+
+attributes #0 = { readnone nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b,-packets" }
More information about the llvm-commits
mailing list