[llvm] r323229 - [Hexagon] Implement basic vector operations on vectors vNi1

Tue Jan 23 09:54:00 PST 2018

Author: kparzysz
Date: Tue Jan 23 09:53:59 2018
New Revision: 323229

URL: http://llvm.org/viewvc/llvm-project?rev=323229&view=rev
Log:
[Hexagon] Implement basic vector operations on vectors vNi1

In addition to that, make sure that there are no boolean vector types that
are associated with multiple register classes. Specifically, remove v32i1
and v64i1 from integer register classes. These types will correspond to
results of vector comparisons, and as such should belong to the vector
predicate class. Having them in scalar registers as well makes legalization
ambiguous.

Added:
    llvm/trunk/test/CodeGen/Hexagon/autohvx/isel-concat-vectors-bool.ll
    llvm/trunk/test/CodeGen/Hexagon/vect/vect-bool-basic-compile.ll
    llvm/trunk/test/CodeGen/Hexagon/vect/vect-bool-isel-crash.ll
    llvm/trunk/test/CodeGen/Hexagon/vect/zext-v4i1.ll
Modified:
    llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
    llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.h
    llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
    llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp
    llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.h
    llvm/trunk/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
    llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td
    llvm/trunk/lib/Target/Hexagon/HexagonRegisterInfo.td

Modified: llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp?rev=323229&r1=323228&r2=323229&view=diff
==============================================================================

--- llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp Tue Jan 23 09:53:59 2018
@@ -64,51 +64,6 @@ FunctionPass *createHexagonISelDag(Hexag
 }
 }
 
-// Intrinsics that return a a predicate.
-static bool doesIntrinsicReturnPredicate(unsigned ID) {
-  switch (ID) {
-    default:
-      return false;
-    case Intrinsic::hexagon_C2_cmpeq:
-    case Intrinsic::hexagon_C2_cmpgt:
-    case Intrinsic::hexagon_C2_cmpgtu:
-    case Intrinsic::hexagon_C2_cmpgtup:
-    case Intrinsic::hexagon_C2_cmpgtp:
-    case Intrinsic::hexagon_C2_cmpeqp:
-    case Intrinsic::hexagon_C2_bitsset:
-    case Intrinsic::hexagon_C2_bitsclr:
-    case Intrinsic::hexagon_C2_cmpeqi:
-    case Intrinsic::hexagon_C2_cmpgti:
-    case Intrinsic::hexagon_C2_cmpgtui:
-    case Intrinsic::hexagon_C2_cmpgei:
-    case Intrinsic::hexagon_C2_cmpgeui:
-    case Intrinsic::hexagon_C2_cmplt:
-    case Intrinsic::hexagon_C2_cmpltu:
-    case Intrinsic::hexagon_C2_bitsclri:
-    case Intrinsic::hexagon_C2_and:
-    case Intrinsic::hexagon_C2_or:
-    case Intrinsic::hexagon_C2_xor:
-    case Intrinsic::hexagon_C2_andn:
-    case Intrinsic::hexagon_C2_not:
-    case Intrinsic::hexagon_C2_orn:
-    case Intrinsic::hexagon_C2_pxfer_map:
-    case Intrinsic::hexagon_C2_any8:
-    case Intrinsic::hexagon_C2_all8:
-    case Intrinsic::hexagon_A2_vcmpbeq:
-    case Intrinsic::hexagon_A2_vcmpbgtu:
-    case Intrinsic::hexagon_A2_vcmpheq:
-    case Intrinsic::hexagon_A2_vcmphgt:
-    case Intrinsic::hexagon_A2_vcmphgtu:
-    case Intrinsic::hexagon_A2_vcmpweq:
-    case Intrinsic::hexagon_A2_vcmpwgt:
-    case Intrinsic::hexagon_A2_vcmpwgtu:
-    case Intrinsic::hexagon_C2_tfrrp:
-    case Intrinsic::hexagon_S2_tstbit_i:
-    case Intrinsic::hexagon_S2_tstbit_r:
-      return true;
-  }
-}
-
 void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) {
   SDValue Chain = LD->getChain();
   SDValue Base = LD->getBasePtr();
@@ -553,85 +508,6 @@ void HexagonDAGToDAGISel::SelectSHL(SDNo
   return Default();
 }
 
-
-//
-// If there is an zero_extend followed an intrinsic in DAG (this means - the
-// result of the intrinsic is predicate); convert the zero_extend to
-// transfer instruction.
-//
-// Zero extend -> transfer is lowered here. Otherwise, zero_extend will be
-// converted into a MUX as predicate registers defined as 1 bit in the
-// compiler. Architecture defines them as 8-bit registers.
-// We want to preserve all the lower 8-bits and, not just 1 LSB bit.
-//
-void HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
-  SDLoc dl(N);
-
-  SDValue Op0 = N->getOperand(0);
-  EVT OpVT = Op0.getValueType();
-  unsigned OpBW = OpVT.getSizeInBits();
-
-  // Special handling for zero-extending a vector of booleans.
-  if (OpVT.isVector() && OpVT.getVectorElementType() == MVT::i1 && OpBW <= 64) {
-    SDNode *Mask = CurDAG->getMachineNode(Hexagon::C2_mask, dl, MVT::i64, Op0);
-    unsigned NE = OpVT.getVectorNumElements();
-    EVT ExVT = N->getValueType(0);
-    unsigned ES = ExVT.getScalarSizeInBits();
-    uint64_t MV = 0, Bit = 1;
-    for (unsigned i = 0; i < NE; ++i) {
-      MV |= Bit;
-      Bit <<= ES;
-    }
-    SDValue Ones = CurDAG->getTargetConstant(MV, dl, MVT::i64);
-    SDNode *OnesReg = CurDAG->getMachineNode(Hexagon::CONST64, dl,
-                                             MVT::i64, Ones);
-    if (ExVT.getSizeInBits() == 32) {
-      SDNode *And = CurDAG->getMachineNode(Hexagon::A2_andp, dl, MVT::i64,
-                                           SDValue(Mask,0), SDValue(OnesReg,0));
-      SDValue SubR = CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32);
-      ReplaceNode(N, CurDAG->getMachineNode(Hexagon::EXTRACT_SUBREG, dl, ExVT,
-                                            SDValue(And, 0), SubR));
-      return;
-    }
-    ReplaceNode(N,
-                CurDAG->getMachineNode(Hexagon::A2_andp, dl, ExVT,
-                                       SDValue(Mask, 0), SDValue(OnesReg, 0)));
-    return;
-  }
-
-  SDNode *Int = N->getOperand(0).getNode();
-  if ((Int->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) {
-    unsigned ID = cast<ConstantSDNode>(Int->getOperand(0))->getZExtValue();
-    if (doesIntrinsicReturnPredicate(ID)) {
-      // Now we need to differentiate target data types.
-      if (N->getValueType(0) == MVT::i64) {
-        // Convert the zero_extend to Rs = Pd followed by A2_combinew(0,Rs).
-        SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
-        SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl,
-                                                  MVT::i32, SDValue(Int, 0));
-        SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl,
-                                                  MVT::i32, TargetConst0);
-        SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl,
-                                                  MVT::i64, MVT::Other,
-                                                  SDValue(Result_2, 0),
-                                                  SDValue(Result_1, 0));
-        ReplaceNode(N, Result_3);
-        return;
-      }
-      if (N->getValueType(0) == MVT::i32) {
-        // Convert the zero_extend to Rs = Pd
-        SDNode* RsPd = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl,
-                                              MVT::i32, SDValue(Int, 0));
-        ReplaceNode(N, RsPd);
-        return;
-      }
-      llvm_unreachable("Unexpected value type");
-    }
-  }
-  SelectCode(N);
-}
-
-
 //
 // Handling intrinsics for circular load and bitreverse load.
 //
@@ -781,6 +657,56 @@ void HexagonDAGToDAGISel::SelectBitcast(
   CurDAG->RemoveDeadNode(N);
 }
 
+// Handle these nodes here to avoid having to write patterns for all
+// combinations of input/output types. In all cases, the resulting
+// instruction is the same.
+void HexagonDAGToDAGISel::SelectTypecast(SDNode *N) {
+  SDValue Op = N->getOperand(0);
+  MVT OpTy = Op.getValueType().getSimpleVT();
+  SDNode *T = CurDAG->MorphNodeTo(N, N->getOpcode(),
+                                  CurDAG->getVTList(OpTy), {Op});
+  ReplaceNode(T, Op.getNode());
+  CurDAG->RemoveDeadNode(T);
+}
+
+void HexagonDAGToDAGISel::SelectP2D(SDNode *N) {
+  MVT ResTy = N->getValueType(0).getSimpleVT();
+  SDNode *T = CurDAG->getMachineNode(Hexagon::C2_mask, SDLoc(N), ResTy,
+                                     N->getOperand(0));
+  ReplaceNode(N, T);
+}
+
+void HexagonDAGToDAGISel::SelectD2P(SDNode *N) {
+  const SDLoc &dl(N);
+  MVT ResTy = N->getValueType(0).getSimpleVT();
+  SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
+  SDNode *T = CurDAG->getMachineNode(Hexagon::A4_vcmpbgtui, dl, ResTy,
+                                     N->getOperand(0), Zero);
+  ReplaceNode(N, T);
+}
+
+void HexagonDAGToDAGISel::SelectV2Q(SDNode *N) {
+  const SDLoc &dl(N);
+  MVT ResTy = N->getValueType(0).getSimpleVT();
+
+  SDValue C = CurDAG->getTargetConstant(-1, dl, MVT::i32);
+  SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
+  SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandvrt, dl, ResTy,
+                                     N->getOperand(0), SDValue(R,0));
+  ReplaceNode(N, T);
+}
+
+void HexagonDAGToDAGISel::SelectQ2V(SDNode *N) {
+  const SDLoc &dl(N);
+  MVT ResTy = N->getValueType(0).getSimpleVT();
+
+  SDValue C = CurDAG->getTargetConstant(-1, dl, MVT::i32);
+  SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
+  SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandqrt, dl, ResTy,
+                                     N->getOperand(0), SDValue(R,0));
+  ReplaceNode(N, T);
+}
+
 void HexagonDAGToDAGISel::Select(SDNode *N) {
   if (N->isMachineOpcode())
     return N->setNodeId(-1);  // Already selected.
@@ -793,9 +719,13 @@ void HexagonDAGToDAGISel::Select(SDNode
   case ISD::SHL:                  return SelectSHL(N);
   case ISD::LOAD:                 return SelectLoad(N);
   case ISD::STORE:                return SelectStore(N);
-  case ISD::ZERO_EXTEND:          return SelectZeroExtend(N);
   case ISD::INTRINSIC_W_CHAIN:    return SelectIntrinsicWChain(N);
   case ISD::INTRINSIC_WO_CHAIN:   return SelectIntrinsicWOChain(N);
+  case HexagonISD::TYPECAST:      return SelectTypecast(N);
+  case HexagonISD::P2D:           return SelectP2D(N);
+  case HexagonISD::D2P:           return SelectD2P(N);
+  case HexagonISD::Q2V:           return SelectQ2V(N);
+  case HexagonISD::V2Q:           return SelectV2Q(N);
   }
 
   if (HST->useHVXOps()) {

Modified: llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.h?rev=323229&r1=323228&r2=323229&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.h (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAG.h Tue Jan 23 09:53:59 2018
@@ -104,6 +104,11 @@ public:
   void SelectV65Gather(SDNode *N);
   void SelectV65GatherPred(SDNode *N);
   void SelectHVXDualOutput(SDNode *N);
+  void SelectTypecast(SDNode *N);
+  void SelectP2D(SDNode *N);
+  void SelectD2P(SDNode *N);
+  void SelectQ2V(SDNode *N);
+  void SelectV2Q(SDNode *N);
 
   // Include the declarations autogenerated from the selection patterns.
   #define GET_DAGISEL_DECL

Modified: llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp?rev=323229&r1=323228&r2=323229&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp Tue Jan 23 09:53:59 2018
@@ -1963,8 +1963,8 @@ void HvxSelector::selectRor(SDNode *N) {
   SDNode *NewN = nullptr;
 
   if (auto *CN = dyn_cast<ConstantSDNode>(RotV.getNode())) {
-    unsigned S = CN->getZExtValue();
-    if (S % HST.getVectorLength() == 0) {
+    unsigned S = CN->getZExtValue() % HST.getVectorLength();
+    if (S == 0) {
       NewN = VecV.getNode();
     } else if (isUInt<3>(S)) {
       SDValue C = DAG.getTargetConstant(S, dl, MVT::i32);

Modified: llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp?rev=323229&r1=323228&r2=323229&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp Tue Jan 23 09:53:59 2018
@@ -1972,16 +1972,16 @@ HexagonTargetLowering::HexagonTargetLowe
 
   // Extending loads from (native) vectors of i8 into (native) vectors of i16
   // are legal.
-  setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
+  setLoadExtAction(ISD::EXTLOAD,  MVT::v2i16, MVT::v2i8, Legal);
   setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
   setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
-  setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
+  setLoadExtAction(ISD::EXTLOAD,  MVT::v4i16, MVT::v4i8, Legal);
   setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
   setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
 
   // Types natively supported:
-  for (MVT NativeVT : {MVT::v32i1, MVT::v64i1, MVT::v4i8, MVT::v8i8, MVT::v2i16,
-                       MVT::v4i16, MVT::v1i32, MVT::v2i32, MVT::v1i64}) {
+  for (MVT NativeVT : {MVT::v8i1, MVT::v4i1, MVT::v2i1, MVT::v4i8,
+                       MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
     setOperationAction(ISD::BUILD_VECTOR,       NativeVT, Custom);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom);
     setOperationAction(ISD::INSERT_VECTOR_ELT,  NativeVT, Custom);
@@ -1997,6 +1997,8 @@ HexagonTargetLowering::HexagonTargetLowe
     setOperationAction(ISD::XOR, NativeVT, Legal);
   }
 
+  // Custom-lower bitcasts from i8 to v8i1.
+  setOperationAction(ISD::BITCAST,        MVT::i8,    Custom);
   setOperationAction(ISD::SETCC,          MVT::v2i16, Custom);
   setOperationAction(ISD::VSELECT,        MVT::v2i16, Custom);
   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8,  Custom);
@@ -2103,11 +2105,24 @@ HexagonTargetLowering::HexagonTargetLowe
       setOperationAction(ISD::INSERT_VECTOR_ELT,  T, Custom);
       setOperationAction(ISD::EXTRACT_SUBVECTOR,  T, Custom);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom);
+      setOperationAction(ISD::ANY_EXTEND,         T, Custom);
+      setOperationAction(ISD::SIGN_EXTEND,        T, Custom);
+      setOperationAction(ISD::ZERO_EXTEND,        T, Custom);
       if (T != ByteV)
         setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom);
     }
 
     for (MVT T : LegalV) {
+      MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
+      setOperationAction(ISD::BUILD_VECTOR,       BoolV, Custom);
+      setOperationAction(ISD::CONCAT_VECTORS,     BoolV, Custom);
+      setOperationAction(ISD::INSERT_SUBVECTOR,   BoolV, Custom);
+      setOperationAction(ISD::INSERT_VECTOR_ELT,  BoolV, Custom);
+      setOperationAction(ISD::EXTRACT_SUBVECTOR,  BoolV, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, BoolV, Custom);
+    }
+
+    for (MVT T : LegalV) {
       if (T == ByteV)
         continue;
       // Promote all shuffles and concats to operate on vectors of bytes.
@@ -2260,6 +2275,7 @@ const char* HexagonTargetLowering::getTa
   case HexagonISD::CP:            return "HexagonISD::CP";
   case HexagonISD::DCFETCH:       return "HexagonISD::DCFETCH";
   case HexagonISD::EH_RETURN:     return "HexagonISD::EH_RETURN";
+  case HexagonISD::TSTBIT:        return "HexagonISD::TSTBIT";
   case HexagonISD::EXTRACTU:      return "HexagonISD::EXTRACTU";
   case HexagonISD::INSERT:        return "HexagonISD::INSERT";
   case HexagonISD::JT:            return "HexagonISD::JT";
@@ -2277,6 +2293,11 @@ const char* HexagonTargetLowering::getTa
   case HexagonISD::VROR:          return "HexagonISD::VROR";
   case HexagonISD::READCYCLE:     return "HexagonISD::READCYCLE";
   case HexagonISD::VZERO:         return "HexagonISD::VZERO";
+  case HexagonISD::D2P:           return "HexagonISD::D2P";
+  case HexagonISD::P2D:           return "HexagonISD::P2D";
+  case HexagonISD::V2Q:           return "HexagonISD::V2Q";
+  case HexagonISD::Q2V:           return "HexagonISD::Q2V";
+  case HexagonISD::TYPECAST:      return "HexagonISD::TYPECAST";
   case HexagonISD::OP_END:        break;
   }
   return nullptr;
@@ -2573,6 +2594,52 @@ HexagonTargetLowering::LowerVECTOR_SHIFT
   return DAG.getNode(ISD::BITCAST, dl, VT, Result);
 }
 
+SDValue
+HexagonTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
+  MVT ResTy = ty(Op);
+  SDValue InpV = Op.getOperand(0);
+  MVT InpTy = ty(InpV);
+  assert(ResTy.getSizeInBits() == InpTy.getSizeInBits());
+  const SDLoc &dl(Op);
+
+  // Handle conversion from i8 to v8i1.
+  if (ResTy == MVT::v8i1) {
+    SDValue Sc = DAG.getBitcast(tyScalar(InpTy), InpV);
+    SDValue Ext = DAG.getZExtOrTrunc(Sc, dl, MVT::i32);
+    return getNode(Hexagon::C2_tfrrp, dl, ResTy, Ext, DAG);
+  }
+
+  return SDValue();
+}
+
+// Any-, sign-, and zero-extends of boolean vectors to integer types are
+// all the same.
+
+SDValue
+HexagonTargetLowering::LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+  return LowerSIGN_EXTEND(Op, DAG);
+}
+
+SDValue
+HexagonTargetLowering::LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+  MVT ResTy = ty(Op);
+  SDValue InpV = Op.getOperand(0);
+  MVT ElemTy = ty(InpV).getVectorElementType();
+  if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
+    return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG);
+  return Op;
+}
+
+SDValue
+HexagonTargetLowering::LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+  MVT ResTy = ty(Op);
+  SDValue InpV = Op.getOperand(0);
+  MVT ElemTy = ty(InpV).getVectorElementType();
+  if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
+    return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG);
+  return Op;
+}
+
 bool
 HexagonTargetLowering::getBuildVectorConstInts(ArrayRef<SDValue> Values,
       MVT VecTy, SelectionDAG &DAG,
@@ -2757,8 +2824,54 @@ HexagonTargetLowering::extractVector(SDV
   unsigned VecWidth = VecTy.getSizeInBits();
   unsigned ValWidth = ValTy.getSizeInBits();
   unsigned ElemWidth = VecTy.getVectorElementType().getSizeInBits();
-  assert(VecWidth == 32 || VecWidth == 64);
   assert((VecWidth % ElemWidth) == 0);
+  auto *IdxN = dyn_cast<ConstantSDNode>(IdxV);
+
+  // Special case for v{8,4,2}i1 (the only boolean vectors legal in Hexagon
+  // without any coprocessors).
+  if (ElemWidth == 1) {
+    assert(VecWidth == VecTy.getVectorNumElements() && "Sanity failure");
+    assert(VecWidth == 8 || VecWidth == 4 || VecWidth == 2);
+    // Check if this is an extract of the lowest bit.
+    if (IdxN) {
+      // Extracting the lowest bit is a no-op, but it changes the type,
+      // so it must be kept as an operation to avoid errors related to
+      // type mismatches.
+      if (IdxN->isNullValue() && ValTy.getSizeInBits() == 1)
+        return DAG.getNode(HexagonISD::TYPECAST, dl, MVT::i1, VecV);
+    }
+
+    // If the value extracted is a single bit, use tstbit.
+    if (ValWidth == 1) {
+      SDValue A0 = getNode(Hexagon::C2_tfrpr, dl, MVT::i32, {VecV}, DAG);
+      return DAG.getNode(HexagonISD::TSTBIT, dl, MVT::i1, A0, IdxV);
+    }
+
+    // Each bool vector (v2i1, v4i1, v8i1) always occupies 8 bits in
+    // a predicate register. The elements of the vector are repeated
+    // in the register (if necessary) so that the total number is 8.
+    // The extracted subvector will need to be expanded in such a way.
+    unsigned Scale = VecWidth / ValWidth;
+
+    // Generate (p2d VecV) >> 8*Idx to move the interesting bytes to
+    // position 0.
+    assert(ty(IdxV) == MVT::i32);
+    SDValue S0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
+                             DAG.getConstant(8, dl, MVT::i32));
+    SDValue T0 = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
+    SDValue T1 = DAG.getNode(ISD::SRL, dl, MVT::i64, T0, S0);
+    while (Scale > 1) {
+      // The longest possible subvector is at most 32 bits, so it is always
+      // contained in the low subregister.
+      T1 = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, T1);
+      T1 = expandPredicate(T1, dl, DAG);
+      Scale /= 2;
+    }
+
+    return DAG.getNode(HexagonISD::D2P, dl, ResTy, T1);
+  }
+
+  assert(VecWidth == 32 || VecWidth == 64);
 
   // Cast everything to scalar integer types.
   MVT ScalarTy = tyScalar(VecTy);
@@ -2767,8 +2880,8 @@ HexagonTargetLowering::extractVector(SDV
   SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
   SDValue ExtV;
 
-  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(IdxV)) {
-    unsigned Off = C->getZExtValue() * ElemWidth;
+  if (IdxN) {
+    unsigned Off = IdxN->getZExtValue() * ElemWidth;
     if (VecWidth == 64 && ValWidth == 32) {
       assert(Off == 0 || Off == 32);
       unsigned SubIdx = Off == 0 ? Hexagon::isub_lo : Hexagon::isub_hi;
@@ -2802,6 +2915,33 @@ HexagonTargetLowering::insertVector(SDVa
                                     const SDLoc &dl, MVT ValTy,
                                     SelectionDAG &DAG) const {
   MVT VecTy = ty(VecV);
+  if (VecTy.getVectorElementType() == MVT::i1) {
+    MVT ValTy = ty(ValV);
+    assert(ValTy.getVectorElementType() == MVT::i1);
+    SDValue ValR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, ValV);
+    unsigned VecLen = VecTy.getVectorNumElements();
+    unsigned Scale = VecLen / ValTy.getVectorNumElements();
+    assert(Scale > 1);
+
+    for (unsigned R = Scale; R > 1; R /= 2) {
+      ValR = contractPredicate(ValR, dl, DAG);
+      ValR = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
+                         DAG.getUNDEF(MVT::i32), ValR);
+    }
+    // The longest possible subvector is at most 32 bits, so it is always
+    // contained in the low subregister.
+    ValR = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, ValR);
+
+    unsigned ValBytes = 64 / Scale;
+    SDValue Width = DAG.getConstant(ValBytes*8, dl, MVT::i32);
+    SDValue Idx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
+                              DAG.getConstant(8, dl, MVT::i32));
+    SDValue VecR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
+    SDValue Ins = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32,
+                              {VecR, ValR, Width, Idx});
+    return DAG.getNode(HexagonISD::D2P, dl, VecTy, Ins);
+  }
+
   unsigned VecWidth = VecTy.getSizeInBits();
   unsigned ValWidth = ValTy.getSizeInBits();
   assert(VecWidth == 32 || VecWidth == 64);
@@ -2837,6 +2977,24 @@ HexagonTargetLowering::insertVector(SDVa
 }
 
 SDValue
+HexagonTargetLowering::expandPredicate(SDValue Vec32, const SDLoc &dl,
+                                       SelectionDAG &DAG) const {
+  assert(ty(Vec32).getSizeInBits() == 32);
+  if (isUndef(Vec32))
+    return DAG.getUNDEF(MVT::i64);
+  return getNode(Hexagon::S2_vsxtbh, dl, MVT::i64, {Vec32}, DAG);
+}
+
+SDValue
+HexagonTargetLowering::contractPredicate(SDValue Vec64, const SDLoc &dl,
+                                         SelectionDAG &DAG) const {
+  assert(ty(Vec64).getSizeInBits() == 64);
+  if (isUndef(Vec64))
+    return DAG.getUNDEF(MVT::i32);
+  return getNode(Hexagon::S2_vtrunehb, dl, MVT::i32, {Vec64}, DAG);
+}
+
+SDValue
 HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG)
       const {
   if (Ty.isVector()) {
@@ -2857,19 +3015,38 @@ HexagonTargetLowering::getZero(const SDL
 SDValue
 HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   MVT VecTy = ty(Op);
-  unsigned BW = VecTy.getSizeInBits();
-
   if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy, true))
     return LowerHvxBuildVector(Op, DAG);
 
-  if (BW == 32 || BW == 64) {
-    const SDLoc &dl(Op);
-    SmallVector<SDValue,8> Ops;
-    for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i)
-      Ops.push_back(Op.getOperand(i));
-    if (BW == 32)
-      return buildVector32(Ops, dl, VecTy, DAG);
+  unsigned BW = VecTy.getSizeInBits();
+  const SDLoc &dl(Op);
+  SmallVector<SDValue,8> Ops;
+  for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i)
+    Ops.push_back(Op.getOperand(i));
+
+  if (BW == 32)
+    return buildVector32(Ops, dl, VecTy, DAG);
+  if (BW == 64)
     return buildVector64(Ops, dl, VecTy, DAG);
+
+  if (VecTy == MVT::v8i1 || VecTy == MVT::v4i1 || VecTy == MVT::v2i1) {
+    // For each i1 element in the resulting predicate register, put 1
+    // shifted by the index of the element into a general-purpose register,
+    // then or them together and transfer it back into a predicate register.
+    SDValue Rs[8];
+    SDValue Z = getZero(dl, MVT::i32, DAG);
+    // Always produce 8 bits, repeat inputs if necessary.
+    unsigned Rep = 8 / VecTy.getVectorNumElements();
+    for (unsigned i = 0; i != 8; ++i) {
+      SDValue S = DAG.getConstant(1 << i, dl, MVT::i32);
+      Rs[i] = DAG.getSelect(dl, MVT::i32, Ops[i/Rep], S, Z);
+    }
+    for (ArrayRef<SDValue> A(Rs); A.size() != 1; A = A.drop_back(A.size()/2)) {
+      for (unsigned i = 0, e = A.size()/2; i != e; ++i)
+        Rs[i] = DAG.getNode(ISD::OR, dl, MVT::i32, Rs[2*i], Rs[2*i+1]);
+    }
+    // Move the value directly to a predicate register.
+    return getNode(Hexagon::C2_tfrrp, dl, VecTy, {Rs[0]}, DAG);
   }
 
   return SDValue();
@@ -2879,14 +3056,68 @@ SDValue
 HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
                                            SelectionDAG &DAG) const {
   MVT VecTy = ty(Op);
-  assert(!Subtarget.useHVXOps() || !Subtarget.isHVXVectorType(VecTy));
+  const SDLoc &dl(Op);
 
   if (VecTy.getSizeInBits() == 64) {
     assert(Op.getNumOperands() == 2);
-    return DAG.getNode(HexagonISD::COMBINE, SDLoc(Op), VecTy, Op.getOperand(1),
+    return DAG.getNode(HexagonISD::COMBINE, dl, VecTy, Op.getOperand(1),
                        Op.getOperand(0));
   }
 
+  MVT ElemTy = VecTy.getVectorElementType();
+  if (ElemTy == MVT::i1) {
+    if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy, true))
+      return LowerHvxConcatVectors(Op, DAG);
+
+    assert(VecTy == MVT::v2i1 || VecTy == MVT::v4i1 || VecTy == MVT::v8i1);
+    MVT OpTy = ty(Op.getOperand(0));
+    // Scale is how many times the operands need to be contracted to match
+    // the representation in the target register.
+    unsigned Scale = VecTy.getVectorNumElements() / OpTy.getVectorNumElements();
+    assert(Scale == Op.getNumOperands() && Scale > 1);
+
+    // First, convert all bool vectors to integers, then generate pairwise
+    // inserts to form values of doubled length. Up until there are only
+    // two values left to concatenate, all of these values will fit in a
+    // 32-bit integer, so keep them as i32 to use 32-bit inserts.
+    SmallVector<SDValue,4> Words[2];
+    unsigned IdxW = 0;
+
+    for (SDValue P : Op.getNode()->op_values()) {
+      SDValue W = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, P);
+      for (unsigned R = Scale; R > 1; R /= 2) {
+        W = contractPredicate(W, dl, DAG);
+        W = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
+                        DAG.getUNDEF(MVT::i32), W);
+      }
+      W = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, W);
+      Words[IdxW].push_back(W);
+    }
+
+    while (Scale > 2) {
+      SDValue WidthV = DAG.getConstant(64 / Scale, dl, MVT::i32);
+      Words[IdxW ^ 1].clear();
+
+      for (unsigned i = 0, e = Words[IdxW].size(); i != e; i += 2) {
+        SDValue W0 = Words[IdxW][i], W1 = Words[IdxW][i+1];
+        // Insert W1 into W0 right next to the significant bits of W0.
+        SDValue T = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32,
+                                {W0, W1, WidthV, WidthV});
+        Words[IdxW ^ 1].push_back(T);
+      }
+      IdxW ^= 1;
+      Scale /= 2;
+    }
+
+    // Another sanity check. At this point there should only be two words
+    // left, and Scale should be 2.
+    assert(Scale == 2 && Words[IdxW].size() == 2);
+
+    SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
+                             Words[IdxW][1], Words[IdxW][0]);
+    return DAG.getNode(HexagonISD::D2P, dl, VecTy, WW);
+  }
+
   return SDValue();
 }
 
@@ -2895,7 +3126,7 @@ HexagonTargetLowering::LowerEXTRACT_VECT
                                                SelectionDAG &DAG) const {
   SDValue Vec = Op.getOperand(0);
   MVT VecTy = ty(Vec);
-  if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy))
+  if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy, true))
     return LowerHvxExtractElement(Op, DAG);
 
   MVT ElemTy = ty(Vec).getVectorElementType();
@@ -2907,7 +3138,7 @@ HexagonTargetLowering::LowerEXTRACT_SUBV
                                               SelectionDAG &DAG) const {
   SDValue Vec = Op.getOperand(0);
   MVT VecTy = ty(Vec);
-  if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy))
+  if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy, true))
     return LowerHvxExtractSubvector(Op, DAG);
 
   return extractVector(Vec, Op.getOperand(1), SDLoc(Op), ty(Op), ty(Op), DAG);
@@ -2917,7 +3148,7 @@ SDValue
 HexagonTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
                                               SelectionDAG &DAG) const {
   MVT VecTy = ty(Op);
-  if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy))
+  if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy, true))
     return LowerHvxInsertElement(Op, DAG);
 
   return insertVector(Op.getOperand(0), Op.getOperand(1), Op.getOperand(2),
@@ -2927,7 +3158,7 @@ HexagonTargetLowering::LowerINSERT_VECTO
 SDValue
 HexagonTargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
                                              SelectionDAG &DAG) const {
-  if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(ty(Op)))
+  if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(ty(Op), true))
     return LowerHvxInsertSubvector(Op, DAG);
 
   SDValue ValV = Op.getOperand(1);
@@ -2993,6 +3224,10 @@ HexagonTargetLowering::LowerOperation(SD
     case ISD::EXTRACT_VECTOR_ELT:   return LowerEXTRACT_VECTOR_ELT(Op, DAG);
     case ISD::BUILD_VECTOR:         return LowerBUILD_VECTOR(Op, DAG);
     case ISD::VECTOR_SHUFFLE:       return LowerVECTOR_SHUFFLE(Op, DAG);
+    case ISD::ANY_EXTEND:           return LowerANY_EXTEND(Op, DAG);
+    case ISD::SIGN_EXTEND:          return LowerSIGN_EXTEND(Op, DAG);
+    case ISD::ZERO_EXTEND:          return LowerZERO_EXTEND(Op, DAG);
+    case ISD::BITCAST:              return LowerBITCAST(Op, DAG);
     case ISD::SRA:
     case ISD::SHL:
     case ISD::SRL:                  return LowerVECTOR_SHIFT(Op, DAG);
@@ -3029,6 +3264,27 @@ HexagonTargetLowering::LowerOperation(SD
   return SDValue();
 }
 
+void
+HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
+                                          SmallVectorImpl<SDValue> &Results,
+                                          SelectionDAG &DAG) const {
+  const SDLoc &dl(N);
+  switch (N->getOpcode()) {
+    case ISD::SRL:
+    case ISD::SRA:
+    case ISD::SHL:
+      return;
+    case ISD::BITCAST:
+      // Handle a bitcast from v8i1 to i8.
+      if (N->getValueType(0) == MVT::i8) {
+        SDValue P = getNode(Hexagon::C2_tfrpr, dl, MVT::i32,
+                            N->getOperand(0), DAG);
+        Results.push_back(P);
+      }
+      break;
+  }
+}
+
 /// Returns relocation base for the given PIC jumptable.
 SDValue
 HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table,

Modified: llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.h?rev=323229&r1=323228&r2=323229&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.h (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.h Tue Jan 23 09:53:59 2018
@@ -56,6 +56,7 @@ namespace HexagonISD {
       VASR,
       VLSR,
 
+      TSTBIT,
       INSERT,
       EXTRACTU,
       VCOMBINE,
@@ -68,8 +69,16 @@ namespace HexagonISD {
       EH_RETURN,
       DCFETCH,
       READCYCLE,
+      D2P,         // Convert 8-byte value to 8-bit predicate register. [*]
+      P2D,         // Convert 8-bit predicate register to 8-byte value. [*]
+      V2Q,         // Convert HVX vector to a vector predicate reg. [*]
+      Q2V,         // Convert vector predicate to an HVX vector. [*]
+                   // [*] The equivalence is defined as "Q <=> (V != 0)",
+                   //     where the != operation compares bytes.
+                   // Note: V != 0 is implemented as V >u 0.
       VZERO,
-
+      TYPECAST,    // No-op that's used to convert between different legal
+                   // types in a register.
       OP_END
     };
 
@@ -125,6 +134,9 @@ namespace HexagonISD {
         const override;
 
     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+    void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+                            SelectionDAG &DAG) const override;
+
     const char *getTargetNodeName(unsigned Opcode) const override;
 
     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
@@ -135,6 +147,10 @@ namespace HexagonISD {
     SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
 
     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
@@ -293,6 +309,11 @@ namespace HexagonISD {
                           MVT ValTy, MVT ResTy, SelectionDAG &DAG) const;
     SDValue insertVector(SDValue VecV, SDValue ValV, SDValue IdxV,
                          const SDLoc &dl, MVT ValTy, SelectionDAG &DAG) const;
+    SDValue expandPredicate(SDValue Vec32, const SDLoc &dl,
+                            SelectionDAG &DAG) const;
+    SDValue contractPredicate(SDValue Vec64, const SDLoc &dl,
+                              SelectionDAG &DAG) const;
+
     bool isUndef(SDValue Op) const {
       if (Op.isMachineOpcode())
         return Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
@@ -326,7 +347,8 @@ namespace HexagonISD {
     MVT tyVector(MVT Ty, MVT ElemTy) const {
       if (Ty.isVector() && Ty.getVectorElementType() == ElemTy)
         return Ty;
-      unsigned TyWidth = Ty.getSizeInBits(), ElemWidth = ElemTy.getSizeInBits();
+      unsigned TyWidth = Ty.getSizeInBits();
+      unsigned ElemWidth = ElemTy.getSizeInBits();
       assert((TyWidth % ElemWidth) == 0);
       return MVT::getVectorVT(ElemTy, TyWidth/ElemWidth);
     }
@@ -347,14 +369,34 @@ namespace HexagonISD {
     SDValue getByteShuffle(const SDLoc &dl, SDValue Op0, SDValue Op1,
                            ArrayRef<int> Mask, SelectionDAG &DAG) const;
 
-    MVT getVecBoolVT() const;
-
-    SDValue buildHvxVectorSingle(ArrayRef<SDValue> Values, const SDLoc &dl,
-                                 MVT VecTy, SelectionDAG &DAG) const;
+    SDValue buildHvxVectorReg(ArrayRef<SDValue> Values, const SDLoc &dl,
+                              MVT VecTy, SelectionDAG &DAG) const;
     SDValue buildHvxVectorPred(ArrayRef<SDValue> Values, const SDLoc &dl,
                                MVT VecTy, SelectionDAG &DAG) const;
+    SDValue createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
+                                unsigned BitBytes, bool ZeroFill,
+                                SelectionDAG &DAG) const;
+    SDValue extractHvxElementReg(SDValue VecV, SDValue IdxV, const SDLoc &dl,
+                                 MVT ResTy, SelectionDAG &DAG) const;
+    SDValue extractHvxElementPred(SDValue VecV, SDValue IdxV, const SDLoc &dl,
+                                  MVT ResTy, SelectionDAG &DAG) const;
+    SDValue insertHvxElementReg(SDValue VecV, SDValue IdxV, SDValue ValV,
+                                const SDLoc &dl, SelectionDAG &DAG) const;
+    SDValue insertHvxElementPred(SDValue VecV, SDValue IdxV, SDValue ValV,
+                                 const SDLoc &dl, SelectionDAG &DAG) const;
+    SDValue extractHvxSubvectorReg(SDValue VecV, SDValue IdxV, const SDLoc &dl,
+                                   MVT ResTy, SelectionDAG &DAG) const;
+    SDValue extractHvxSubvectorPred(SDValue VecV, SDValue IdxV, const SDLoc &dl,
+                                    MVT ResTy, SelectionDAG &DAG) const;
+    SDValue insertHvxSubvectorReg(SDValue VecV, SDValue SubV, SDValue IdxV,
+                                  const SDLoc &dl, SelectionDAG &DAG) const;
+    SDValue insertHvxSubvectorPred(SDValue VecV, SDValue SubV, SDValue IdxV,
+                                   const SDLoc &dl, SelectionDAG &DAG) const;
+    SDValue extendHvxVectorPred(SDValue VecV, const SDLoc &dl, MVT ResTy,
+                                bool ZeroExt, SelectionDAG &DAG) const;
 
     SDValue LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) const;

Modified: llvm/trunk/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp?rev=323229&r1=323228&r2=323229&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp Tue Jan 23 09:53:59 2018
@@ -141,21 +141,17 @@ HexagonTargetLowering::getByteShuffle(co
                               opCastElem(Op1, MVT::i8, DAG), ByteMask);
 }
 
-MVT
-HexagonTargetLowering::getVecBoolVT() const {
-  return MVT::getVectorVT(MVT::i1, 8*Subtarget.getVectorLength());
-}
-
 SDValue
-HexagonTargetLowering::buildHvxVectorSingle(ArrayRef<SDValue> Values,
-                                            const SDLoc &dl, MVT VecTy,
-                                            SelectionDAG &DAG) const {
+HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
+                                         const SDLoc &dl, MVT VecTy,
+                                         SelectionDAG &DAG) const {
   unsigned VecLen = Values.size();
   MachineFunction &MF = DAG.getMachineFunction();
   MVT ElemTy = VecTy.getVectorElementType();
   unsigned ElemWidth = ElemTy.getSizeInBits();
   unsigned HwLen = Subtarget.getVectorLength();
 
+  // TODO: Recognize constant splats.
   SmallVector<ConstantInt*, 128> Consts(VecLen);
   bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
   if (AllConst) {
@@ -187,12 +183,28 @@ HexagonTargetLowering::buildHvxVectorSin
     Words.assign(Values.begin(), Values.end());
   }
 
+  unsigned NumWords = Words.size();
+  bool IsUndef = true, IsSplat = true;
+  SDValue SplatV;
+  for (unsigned i = 0; i != NumWords && IsSplat; ++i) {
+    if (isUndef(Words[i]))
+      continue;
+    IsUndef = false;
+    if (!SplatV.getNode())
+      SplatV = Words[i];
+    else if (SplatV != Words[i])
+      IsSplat = false;
+  }
+  if (IsSplat) {
+    assert(SplatV.getNode());
+    return DAG.getNode(HexagonISD::VSPLAT, dl, VecTy, SplatV);
+  }
+
   // Construct two halves in parallel, then or them together.
   assert(4*Words.size() == Subtarget.getVectorLength());
   SDValue HalfV0 = getNode(Hexagon::V6_vd0, dl, VecTy, {}, DAG);
   SDValue HalfV1 = getNode(Hexagon::V6_vd0, dl, VecTy, {}, DAG);
   SDValue S = DAG.getConstant(4, dl, MVT::i32);
-  unsigned NumWords = Words.size();
   for (unsigned i = 0; i != NumWords/2; ++i) {
     SDValue N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
                             {HalfV0, Words[i]});
@@ -209,6 +221,95 @@ HexagonTargetLowering::buildHvxVectorSin
 }
 
 SDValue
+HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
+      unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
+  MVT PredTy = ty(PredV);
+  unsigned HwLen = Subtarget.getVectorLength();
+  MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
+
+  if (Subtarget.isHVXVectorType(PredTy, true)) {
+    // Move the vector predicate SubV to a vector register, and scale it
+    // down to match the representation (bytes per type element) that VecV
+    // uses. The scaling down will pick every 2nd or 4th (every Scale-th
+    // in general) element and put them at at the front of the resulting
+    // vector. This subvector will then be inserted into the Q2V of VecV.
+    // To avoid having an operation that generates an illegal type (short
+    // vector), generate a full size vector.
+    //
+    SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV);
+    SmallVector<int,128> Mask(HwLen);
+    // Scale = BitBytes(PredV) / Given BitBytes.
+    unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
+    unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
+
+    for (unsigned i = 0; i != HwLen; ++i) {
+      unsigned Num = i % Scale;
+      unsigned Off = i / Scale;
+      Mask[BlockLen*Num + Off] = i;
+    }
+    SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask);
+    if (!ZeroFill)
+      return S;
+    // Fill the bytes beyond BlockLen with 0s.
+    MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
+    SDValue Q = getNode(Hexagon::V6_pred_scalar2, dl, BoolTy,
+                        {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
+    SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q);
+    return DAG.getNode(ISD::AND, dl, ByteTy, S, M);
+  }
+
+  // Make sure that this is a valid scalar predicate.
+  assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1);
+
+  unsigned Bytes = 8 / PredTy.getVectorNumElements();
+  SmallVector<SDValue,4> Words[2];
+  unsigned IdxW = 0;
+
+  auto Lo32 = [&DAG, &dl] (SDValue P) {
+    return DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, P);
+  };
+  auto Hi32 = [&DAG, &dl] (SDValue P) {
+    return DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, P);
+  };
+
+  SDValue W0 = isUndef(PredV)
+                  ? DAG.getUNDEF(MVT::i64)
+                  : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
+  Words[IdxW].push_back(Hi32(W0));
+  Words[IdxW].push_back(Lo32(W0));
+
+  while (Bytes < BitBytes) {
+    IdxW ^= 1;
+    Words[IdxW].clear();
+
+    if (Bytes < 4) {
+      for (const SDValue &W : Words[IdxW ^ 1]) {
+        SDValue T = expandPredicate(W, dl, DAG);
+        Words[IdxW].push_back(Hi32(T));
+        Words[IdxW].push_back(Lo32(T));
+      }
+    } else {
+      for (const SDValue &W : Words[IdxW ^ 1]) {
+        Words[IdxW].push_back(W);
+        Words[IdxW].push_back(W);
+      }
+    }
+    Bytes *= 2;
+  }
+
+  assert(Bytes == BitBytes);
+
+  SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy);
+  SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32);
+  for (const SDValue &W : Words[IdxW]) {
+    Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4);
+    Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W);
+  }
+
+  return Vec;
+}
+
+SDValue
 HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
                                           const SDLoc &dl, MVT VecTy,
                                           SelectionDAG &DAG) const {
@@ -254,52 +355,19 @@ HexagonTargetLowering::buildHvxVectorPre
   }
 
   MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
-  SDValue ByteVec = buildHvxVectorSingle(Bytes, dl, ByteTy, DAG);
-  SDValue Cmp = DAG.getSetCC(dl, VecTy, ByteVec, getZero(dl, ByteTy, DAG),
-                             ISD::SETUGT);
-  return Cmp;
-}
-
-SDValue
-HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
-      const {
-  const SDLoc &dl(Op);
-  MVT VecTy = ty(Op);
-
-  unsigned Size = Op.getNumOperands();
-  SmallVector<SDValue,128> Ops;
-  for (unsigned i = 0; i != Size; ++i)
-    Ops.push_back(Op.getOperand(i));
-
-  if (VecTy.getVectorElementType() == MVT::i1)
-    return buildHvxVectorPred(Ops, dl, VecTy, DAG);
-
-  if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) {
-    ArrayRef<SDValue> A(Ops);
-    MVT SingleTy = typeSplit(VecTy).first;
-    SDValue V0 = buildHvxVectorSingle(A.take_front(Size/2), dl, SingleTy, DAG);
-    SDValue V1 = buildHvxVectorSingle(A.drop_front(Size/2), dl, SingleTy, DAG);
-    return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
-  }
-
-  return buildHvxVectorSingle(Ops, dl, VecTy, DAG);
+  SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG);
+  return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
 }
 
 SDValue
-HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
-      const {
-  // Change the type of the extracted element to i32.
-  SDValue VecV = Op.getOperand(0);
+HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
+      const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
   MVT ElemTy = ty(VecV).getVectorElementType();
+
   unsigned ElemWidth = ElemTy.getSizeInBits();
   assert(ElemWidth >= 8 && ElemWidth <= 32);
   (void)ElemWidth;
 
-  const SDLoc &dl(Op);
-  SDValue IdxV = Op.getOperand(1);
-  if (ty(IdxV) != MVT::i32)
-    IdxV = DAG.getBitcast(MVT::i32, IdxV);
-
   SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
   SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
                                {VecV, ByteIdx});
@@ -316,13 +384,29 @@ HexagonTargetLowering::LowerHvxExtractEl
 }
 
 SDValue
-HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
-      const {
-  const SDLoc &dl(Op);
-  SDValue VecV = Op.getOperand(0);
-  SDValue ValV = Op.getOperand(1);
-  SDValue IdxV = Op.getOperand(2);
+HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
+      const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
+  // Implement other return types if necessary.
+  assert(ResTy == MVT::i1);
+
+  unsigned HwLen = Subtarget.getVectorLength();
+  MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
+  SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
+
+  unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
+  SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
+  IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
+
+  SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG);
+  SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
+  return getNode(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG);
+}
+
+SDValue
+HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
+      SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
   MVT ElemTy = ty(VecV).getVectorElementType();
+
   unsigned ElemWidth = ElemTy.getSizeInBits();
   assert(ElemWidth >= 8 && ElemWidth <= 32);
   (void)ElemWidth;
@@ -336,7 +420,7 @@ HexagonTargetLowering::LowerHvxInsertEle
     SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV});
     SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV});
     SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32,
-                               {DAG.getConstant(HwLen/4, dl, MVT::i32), MaskV});
+                               {DAG.getConstant(HwLen, dl, MVT::i32), MaskV});
     SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV});
     return TorV;
   };
@@ -349,9 +433,8 @@ HexagonTargetLowering::LowerHvxInsertEle
   // 1. Extract the existing word from the target vector.
   SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32,
                                 {ByteIdx, DAG.getConstant(2, dl, MVT::i32)});
-  SDValue Ex0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
-                            {opCastElem(VecV, MVT::i32, DAG), WordIdx});
-  SDValue Ext = LowerHvxExtractElement(Ex0, DAG);
+  SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx,
+                                     dl, MVT::i32, DAG);
 
   // 2. Treating the extracted word as a 32-bit vector, insert the given
   //    value into it.
@@ -365,48 +448,401 @@ HexagonTargetLowering::LowerHvxInsertEle
 }
 
 SDValue
+HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
+      SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
+  unsigned HwLen = Subtarget.getVectorLength();
+  MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
+  SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
+
+  unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
+  SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
+  IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
+  ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV);
+
+  SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG);
+  return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV);
+}
+
+SDValue
+HexagonTargetLowering::extractHvxSubvectorReg(SDValue VecV, SDValue IdxV,
+      const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
+  MVT VecTy = ty(VecV);
+  unsigned HwLen = Subtarget.getVectorLength();
+  unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue();
+  MVT ElemTy = VecTy.getVectorElementType();
+  unsigned ElemWidth = ElemTy.getSizeInBits();
+
+  // If the source vector is a vector pair, get the single vector containing
+  // the subvector of interest. The subvector will never overlap two single
+  // vectors.
+  if (VecTy.getSizeInBits() == 16*HwLen) {
+    unsigned SubIdx;
+    if (Idx * ElemWidth >= 8*HwLen) {
+      SubIdx = Hexagon::vsub_hi;
+      Idx -= VecTy.getVectorNumElements() / 2;
+    } else {
+      SubIdx = Hexagon::vsub_lo;
+    }
+    VecTy = typeSplit(VecTy).first;
+    VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV);
+    if (VecTy == ResTy)
+      return VecV;
+  }
+
+  // The only meaningful subvectors of a single HVX vector are those that
+  // fit in a scalar register.
+  assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64);
+
+  MVT WordTy = tyVector(VecTy, MVT::i32);
+  SDValue WordVec = DAG.getBitcast(WordTy, VecV);
+  unsigned WordIdx = (Idx*ElemWidth) / 32;
+
+  SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32);
+  SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG);
+  if (ResTy.getSizeInBits() == 32)
+    return DAG.getBitcast(ResTy, W0);
+
+  SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32);
+  SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG);
+  SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64, {W1, W0});
+  return DAG.getBitcast(ResTy, WW);
+}
+
+SDValue
+HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
+      const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
+  MVT VecTy = ty(VecV);
+  unsigned HwLen = Subtarget.getVectorLength();
+  MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
+  SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
+  // IdxV is required to be a constant.
+  unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue();
+
+  unsigned ResLen = ResTy.getVectorNumElements();
+  unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
+  unsigned Offset = Idx * BitBytes;
+  SDValue Undef = DAG.getUNDEF(ByteTy);
+  SmallVector<int,128> Mask;
+
+  if (Subtarget.isHVXVectorType(ResTy, true)) {
+    // Converting between two vector predicates. Since the result is shorter
+    // than the source, it will correspond to a vector predicate with the
+    // relevant bits replicated. The replication count is the ratio of the
+    // source and target vector lengths.
+    unsigned Rep = VecTy.getVectorNumElements() / ResLen;
+    assert(isPowerOf2_32(Rep) && HwLen % Rep == 0);
+    for (unsigned i = 0; i != HwLen/Rep; ++i) {
+      for (unsigned j = 0; j != Rep; ++j)
+        Mask.push_back(i + Offset);
+    }
+    SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
+    return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV);
+  }
+
+  // Converting between a vector predicate and a scalar predicate. In the
+  // vector predicate, a group of BitBytes bits will correspond to a single
+  // i1 element of the source vector type. Those bits will all have the same
+  // value. The same will be true for ByteVec, where each byte corresponds
+  // to a bit in the vector predicate.
+  // The algorithm is to traverse the ByteVec, going over the i1 values from
+  // the source vector, and generate the corresponding representation in an
+  // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
+  // elements so that the interesting 8 bytes will be in the low end of the
+  // vector.
+  unsigned Rep = 8 / ResLen;
+  // Make sure the output fill the entire vector register, so repeat the
+  // 8-byte groups as many times as necessary.
+  for (unsigned r = 0; r != HwLen/ResLen; ++r) {
+    // This will generate the indexes of the 8 interesting bytes.
+    for (unsigned i = 0; i != ResLen; ++i) {
+      for (unsigned j = 0; j != Rep; ++j)
+        Mask.push_back(Offset + i*BitBytes);
+    }
+  }
+
+  SDValue Zero = getZero(dl, MVT::i32, DAG);
+  SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
+  // Combine the two low words from ShuffV into a v8i8, and byte-compare
+  // them against 0.
+  SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero});
+  SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
+                           {ShuffV, DAG.getConstant(4, dl, MVT::i32)});
+  SDValue Vec64 = DAG.getNode(HexagonISD::COMBINE, dl, MVT::v8i8, {W1, W0});
+  return getNode(Hexagon::A4_vcmpbgtui, dl, ResTy,
+                 {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG);
+}
+
+SDValue
+HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
+      SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
+  MVT VecTy = ty(VecV);
+  MVT SubTy = ty(SubV);
+  unsigned HwLen = Subtarget.getVectorLength();
+  MVT ElemTy = VecTy.getVectorElementType();
+  unsigned ElemWidth = ElemTy.getSizeInBits();
+
+  bool IsPair = VecTy.getSizeInBits() == 16*HwLen;
+  MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth);
+  // The two single vectors that VecV consists of, if it's a pair.
+  SDValue V0, V1;
+  SDValue SingleV = VecV;
+  SDValue PickHi;
+
+  if (IsPair) {
+    V0 = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, VecV);
+    V1 = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, VecV);
+
+    SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(),
+                                    dl, MVT::i32);
+    PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT);
+    if (SubTy.getSizeInBits() == 8*HwLen) {
+      if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) {
+        unsigned Idx = CN->getZExtValue();
+        assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2);
+        unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
+        return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV);
+      }
+      // If IdxV is not a constant, generate the two variants: with the
+      // SubV as the high and as the low subregister, and select the right
+      // pair based on the IdxV.
+      SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1});
+      SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV});
+      return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
+    }
+    // The subvector being inserted must be entirely contained in one of
+    // the vectors V0 or V1. Set SingleV to the correct one, and update
+    // IdxV to be the index relative to the beginning of that vector.
+    SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV);
+    IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV);
+    SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0);
+  }
+
+  // The only meaningful subvectors of a single HVX vector are those that
+  // fit in a scalar register.
+  assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
+  // Convert IdxV to be index in bytes.
+  auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
+  if (!IdxN || !IdxN->isNullValue()) {
+    IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
+                       DAG.getConstant(ElemWidth/8, dl, MVT::i32));
+    SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV);
+  }
+  // When inserting a single word, the rotation back to the original position
+  // would be by HwLen-Idx, but if two words are inserted, it will need to be
+  // by (HwLen-4)-Idx.
+  unsigned RolBase = HwLen;
+  if (VecTy.getSizeInBits() == 32) {
+    SDValue V = DAG.getBitcast(MVT::i32, SubV);
+    SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, V);
+  } else {
+    SDValue V = DAG.getBitcast(MVT::i64, SubV);
+    SDValue R0 = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, V);
+    SDValue R1 = DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, V);
+    SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0);
+    SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV,
+                          DAG.getConstant(4, dl, MVT::i32));
+    SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1);
+    RolBase = HwLen-4;
+  }
+  // If the vector wasn't ror'ed, don't ror it back.
+  if (RolBase != 4 || !IdxN || !IdxN->isNullValue()) {
+    SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32,
+                               DAG.getConstant(RolBase, dl, MVT::i32), IdxV);
+    SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV);
+  }
+
+  if (IsPair) {
+    SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1});
+    SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV});
+    return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
+  }
+  return SingleV;
+}
+
+SDValue
+HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
+      SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
+  MVT VecTy = ty(VecV);
+  MVT SubTy = ty(SubV);
+  assert(Subtarget.isHVXVectorType(VecTy, true));
+  // VecV is an HVX vector predicate. SubV may be either an HVX vector
+  // predicate as well, or it can be a scalar predicate.
+
+  unsigned VecLen = VecTy.getVectorNumElements();
+  unsigned HwLen = Subtarget.getVectorLength();
+  assert(HwLen % VecLen == 0 && "Unexpected vector type");
+
+  unsigned Scale = VecLen / SubTy.getVectorNumElements();
+  unsigned BitBytes = HwLen / VecLen;
+  unsigned BlockLen = HwLen / Scale;
+
+  MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
+  SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
+  SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG);
+  SDValue ByteIdx;
+
+  auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
+  if (!IdxN || !IdxN->isNullValue()) {
+    ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
+                          DAG.getConstant(BitBytes, dl, MVT::i32));
+    ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx);
+  }
+
+  // ByteVec is the target vector VecV rotated in such a way that the
+  // subvector should be inserted at index 0. Generate a predicate mask
+  // and use vmux to do the insertion.
+  MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
+  SDValue Q = getNode(Hexagon::V6_pred_scalar2, dl, BoolTy,
+                      {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
+  ByteVec = getNode(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG);
+  // Rotate ByteVec back, and convert to a vector predicate.
+  if (!IdxN || !IdxN->isNullValue()) {
+    SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32);
+    SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx);
+    ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi);
+  }
+  return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
+}
+
+SDValue
+HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
+      MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
+  // Sign- and any-extending of a vector predicate to a vector register is
+  // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
+  // a vector of 1s (where the 1s are of type matching the vector type).
+  assert(Subtarget.isHVXVectorType(ResTy));
+  if (!ZeroExt)
+    return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV);
+
+  assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
+  SDValue True = DAG.getNode(HexagonISD::VSPLAT, dl, ResTy,
+                             DAG.getConstant(1, dl, MVT::i32));
+  SDValue False = getZero(dl, ResTy, DAG);
+  return DAG.getSelect(dl, ResTy, VecV, True, False);
+}
+
+SDValue
+HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
+      const {
+  const SDLoc &dl(Op);
+  MVT VecTy = ty(Op);
+
+  unsigned Size = Op.getNumOperands();
+  SmallVector<SDValue,128> Ops;
+  for (unsigned i = 0; i != Size; ++i)
+    Ops.push_back(Op.getOperand(i));
+
+  if (VecTy.getVectorElementType() == MVT::i1)
+    return buildHvxVectorPred(Ops, dl, VecTy, DAG);
+
+  if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) {
+    ArrayRef<SDValue> A(Ops);
+    MVT SingleTy = typeSplit(VecTy).first;
+    SDValue V0 = buildHvxVectorReg(A.take_front(Size/2), dl, SingleTy, DAG);
+    SDValue V1 = buildHvxVectorReg(A.drop_front(Size/2), dl, SingleTy, DAG);
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
+  }
+
+  return buildHvxVectorReg(Ops, dl, VecTy, DAG);
+}
+
+SDValue
+HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
+      const {
+  // This should only be called for vectors of i1. The "scalar" vector
+  // concatenation does not need special lowering (assuming that only
+  // two vectors are concatenated at a time).
+  MVT VecTy = ty(Op);
+  assert(VecTy.getVectorElementType() == MVT::i1);
+
+  const SDLoc &dl(Op);
+  unsigned HwLen = Subtarget.getVectorLength();
+  unsigned NumOp = Op.getNumOperands();
+  assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
+
+  // Count how many bytes (in a vector register) each bit in VecTy
+  // corresponds to.
+  unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
+
+  SmallVector<SDValue,8> Prefixes;
+  for (SDValue V : Op.getNode()->op_values()) {
+    SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG);
+    Prefixes.push_back(P);
+  }
+
+  unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements();
+  MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
+  SDValue S = DAG.getConstant(InpLen*BitBytes, dl, MVT::i32);
+  SDValue Res = getZero(dl, ByteTy, DAG);
+  for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
+    Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S);
+    Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]);
+  }
+  return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res);
+}
+
+SDValue
+HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
+      const {
+  // Change the type of the extracted element to i32.
+  SDValue VecV = Op.getOperand(0);
+  MVT ElemTy = ty(VecV).getVectorElementType();
+  const SDLoc &dl(Op);
+  SDValue IdxV = Op.getOperand(1);
+  if (ElemTy == MVT::i1)
+    return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG);
+
+  return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG);
+}
+
+SDValue
+HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
+      const {
+  const SDLoc &dl(Op);
+  SDValue VecV = Op.getOperand(0);
+  SDValue ValV = Op.getOperand(1);
+  SDValue IdxV = Op.getOperand(2);
+  MVT ElemTy = ty(VecV).getVectorElementType();
+  if (ElemTy == MVT::i1)
+    return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
+
+  return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
+}
+
+SDValue
 HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
       const {
   SDValue SrcV = Op.getOperand(0);
   MVT SrcTy = ty(SrcV);
-  unsigned SrcElems = SrcTy.getVectorNumElements();
+  MVT DstTy = ty(Op);
   SDValue IdxV = Op.getOperand(1);
   unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue();
-  MVT DstTy = ty(Op);
-  assert(Idx == 0 || DstTy.getVectorNumElements() % Idx == 0);
+  assert(Idx % DstTy.getVectorNumElements() == 0);
+  (void)Idx;
   const SDLoc &dl(Op);
-  if (Idx == 0)
-    return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, DstTy, SrcV);
-  if (Idx == SrcElems/2)
-    return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, DstTy, SrcV);
-  return SDValue();
+
+  MVT ElemTy = SrcTy.getVectorElementType();
+  if (ElemTy == MVT::i1)
+    return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG);
+
+  return extractHvxSubvectorReg(SrcV, IdxV, dl, DstTy, DAG);
 }
 
 SDValue
 HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
       const {
-  // Idx may be variable.
+  // Idx does not need to be a constant.
+  SDValue VecV = Op.getOperand(0);
+  SDValue ValV = Op.getOperand(1);
   SDValue IdxV = Op.getOperand(2);
-  auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
-  if (!IdxN)
-    return SDValue();
-  unsigned Idx = IdxN->getZExtValue();
-
-  SDValue DstV = Op.getOperand(0);
-  SDValue SrcV = Op.getOperand(1);
-  MVT DstTy = ty(DstV);
-  MVT SrcTy = ty(SrcV);
-  unsigned DstElems = DstTy.getVectorNumElements();
-  unsigned SrcElems = SrcTy.getVectorNumElements();
-  if (2*SrcElems != DstElems)
-    return SDValue();
 
   const SDLoc &dl(Op);
-  if (Idx == 0)
-    return DAG.getTargetInsertSubreg(Hexagon::vsub_lo, dl, DstTy, DstV, SrcV);
-  if (Idx == SrcElems)
-    return DAG.getTargetInsertSubreg(Hexagon::vsub_hi, dl, DstTy, DstV, SrcV);
-  return SDValue();
+  MVT VecTy = ty(VecV);
+  MVT ElemTy = VecTy.getVectorElementType();
+  if (ElemTy == MVT::i1)
+    return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG);
+
+  return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG);
 }
 
 SDValue

Modified: llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td?rev=323229&r1=323228&r2=323229&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td Tue Jan 23 09:53:59 2018
@@ -214,7 +214,7 @@ def NegImm32: SDNodeXForm<imm, [{
 
 // Helpers for type promotions/contractions.
 def I1toI32:  OutPatFrag<(ops node:$Rs), (C2_muxii (i1 $Rs), 1, 0)>;
-def I32toI1:  OutPatFrag<(ops node:$Rs), (i1 (C2_tfrrp (i32 $Rs)))>;
+def I32toI1:  OutPatFrag<(ops node:$Rs), (i1 (C2_cmpgtui (i32 $Rs), (i32 0)))>;
 def ToZext64: OutPatFrag<(ops node:$Rs), (i64 (A4_combineir 0, (i32 $Rs)))>;
 def ToSext64: OutPatFrag<(ops node:$Rs), (i64 (A2_sxtw (i32 $Rs)))>;
 
@@ -422,9 +422,14 @@ def: Pat<(i64 (sext I1:$Pu)),
          (Combinew (C2_muxii PredRegs:$Pu, -1, 0),
                    (C2_muxii PredRegs:$Pu, -1, 0))>;
 
-def: Pat<(i32 (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>;
-def: Pat<(i32 (zext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>;
-def: Pat<(i64 (zext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>;
+def: Pat<(i32   (sext I1:$Pu)),   (C2_muxii I1:$Pu, -1, 0)>;
+def: Pat<(i32   (zext I1:$Pu)),   (C2_muxii I1:$Pu, 1, 0)>;
+def: Pat<(i64   (zext I1:$Pu)),   (ToZext64 (C2_muxii I1:$Pu, 1, 0))>;
+def: Pat<(v2i16 (sext V2I1:$Pu)), (S2_vtrunehb (C2_mask V2I1:$Pu))>;
+def: Pat<(v2i32 (sext V2I1:$Pu)), (C2_mask V2I1:$Pu)>;
+def: Pat<(v4i8  (sext V4I1:$Pu)), (S2_vtrunehb (C2_mask V4I1:$Pu))>;
+def: Pat<(v4i16 (sext V4I1:$Pu)), (C2_mask V4I1:$Pu)>;
+def: Pat<(v8i8  (sext V8I1:$Pu)), (C2_mask V8I1:$Pu)>;
 
 def: Pat<(i64 (sext I32:$Rs)), (A2_sxtw I32:$Rs)>;
 def: Pat<(Zext64 I32:$Rs),     (ToZext64 $Rs)>;
@@ -441,6 +446,12 @@ let AddedComplexity = 20 in {
 def: Pat<(i32 (anyext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>;
 def: Pat<(i64 (anyext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>;
 
+def: Pat<(v8i8  (zext   V8I1:$Pu)),  (C2_mask V8I1:$Pu)>;
+def: Pat<(v4i16 (zext   V4I1:$Pu)),  (C2_mask V4I1:$Pu)>;
+def: Pat<(v2i32 (zext   V2I1:$Pu)),  (C2_mask V2I1:$Pu)>;
+def: Pat<(v4i8  (zext   V4I1:$Pu)),  (LoReg (C2_mask V4I1:$Pu))>;
+def: Pat<(v2i16 (zext   V2I1:$Pu)),  (LoReg (C2_mask V2I1:$Pu))>;
+
 def: Pat<(v4i16 (zext   V4I8:$Rs)),  (S2_vzxtbh V4I8:$Rs)>;
 def: Pat<(v2i32 (zext   V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
 def: Pat<(v4i16 (anyext V4I8:$Rs)),  (S2_vzxtbh V4I8:$Rs)>;
@@ -475,25 +486,40 @@ def: Pat<(v2i16 (trunc V2I32:$Rs)),
 //
 
 def: Pat<(not I1:$Ps),      (C2_not I1:$Ps)>;
+def: Pat<(not V8I1:$Ps),    (C2_not V8I1:$Ps)>;
 def: Pat<(add I1:$Ps, -1),  (C2_not I1:$Ps)>;
 
-def: OpR_RR_pat<C2_and,   And,       i1, I1>;
-def: OpR_RR_pat<C2_or,    Or,        i1, I1>;
-def: OpR_RR_pat<C2_xor,   Xor,       i1, I1>;
-def: OpR_RR_pat<C2_andn,  Not2<And>, i1, I1>;
-def: OpR_RR_pat<C2_orn,   Not2<Or>,  i1, I1>;
+multiclass BoolOpR_RR_pat<InstHexagon MI, PatFrag Op> {
+  def: OpR_RR_pat<MI, Op,   i1,   I1>;
+  def: OpR_RR_pat<MI, Op, v2i1, V2I1>;
+  def: OpR_RR_pat<MI, Op, v4i1, V4I1>;
+  def: OpR_RR_pat<MI, Op, v8i1, V8I1>;
+}
+
+multiclass BoolAccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op> {
+  def: AccRRR_pat<MI, AccOp, Op,   I1,   I1>;
+  def: AccRRR_pat<MI, AccOp, Op, V2I1, V2I1>;
+  def: AccRRR_pat<MI, AccOp, Op, V4I1, V4I1>;
+  def: AccRRR_pat<MI, AccOp, Op, V8I1, V8I1>;
+}
+
+defm: BoolOpR_RR_pat<C2_and,   And>;
+defm: BoolOpR_RR_pat<C2_or,    Or>;
+defm: BoolOpR_RR_pat<C2_xor,   Xor>;
+defm: BoolOpR_RR_pat<C2_andn,  Not2<And>>;
+defm: BoolOpR_RR_pat<C2_orn,   Not2<Or>>;
 
 // op(Ps, op(Pt, Pu))
-def: AccRRR_pat<C4_and_and,   And, Su<And>,       I1, I1>;
-def: AccRRR_pat<C4_and_or,    And, Su<Or>,        I1, I1>;
-def: AccRRR_pat<C4_or_and,    Or,  Su<And>,       I1, I1>;
-def: AccRRR_pat<C4_or_or,     Or,  Su<Or>,        I1, I1>;
+defm: BoolAccRRR_pat<C4_and_and,   And, Su<And>>;
+defm: BoolAccRRR_pat<C4_and_or,    And, Su<Or>>;
+defm: BoolAccRRR_pat<C4_or_and,    Or,  Su<And>>;
+defm: BoolAccRRR_pat<C4_or_or,     Or,  Su<Or>>;
 
 // op(Ps, op(Pt, ~Pu))
-def: AccRRR_pat<C4_and_andn,  And, Su<Not2<And>>, I1, I1>;
-def: AccRRR_pat<C4_and_orn,   And, Su<Not2<Or>>,  I1, I1>;
-def: AccRRR_pat<C4_or_andn,   Or,  Su<Not2<And>>, I1, I1>;
-def: AccRRR_pat<C4_or_orn,    Or,  Su<Not2<Or>>,  I1, I1>;
+defm: BoolAccRRR_pat<C4_and_andn,  And, Su<Not2<And>>>;
+defm: BoolAccRRR_pat<C4_and_orn,   And, Su<Not2<Or>>>;
+defm: BoolAccRRR_pat<C4_or_andn,   Or,  Su<Not2<And>>>;
+defm: BoolAccRRR_pat<C4_or_orn,    Or,  Su<Not2<Or>>>;
 
 
 // --(5) Compare ---------------------------------------------------------
@@ -938,6 +964,7 @@ def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)),
 def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)),
          (Combinew (S2_vsplatrb I32:$Rs), (S2_vsplatrb I32:$Rs))>;
 
+
 // --(8) Shift/permute ---------------------------------------------------
 //
 
@@ -1568,7 +1595,6 @@ def: Pat<(i32 (ctpop I32:$Rs)),   (S5_po
 def: Pat<(bitreverse I32:$Rs),    (S2_brev I32:$Rs)>;
 def: Pat<(bitreverse I64:$Rss),   (S2_brevp I64:$Rss)>;
 
-
 let AddedComplexity = 20 in { // Complexity greater than and/or/xor
   def: Pat<(and I32:$Rs, IsNPow2_32:$V),
            (S2_clrbit_i IntRegs:$Rs, (LogN2_32 $V))>;
@@ -1631,6 +1657,15 @@ let AddedComplexity = 10 in   // Complex
 def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), IntRegs:$Rt)),
          (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>;
 
+def SDTTestBit:
+  SDTypeProfile<1, 2, [SDTCisVT<0, i1>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
+def HexagonTSTBIT: SDNode<"HexagonISD::TSTBIT", SDTTestBit>;
+
+def: Pat<(HexagonTSTBIT I32:$Rs, u5_0ImmPred:$u5),
+         (S2_tstbit_i I32:$Rs, imm:$u5)>;
+def: Pat<(HexagonTSTBIT I32:$Rs, I32:$Rt),
+         (S2_tstbit_r I32:$Rs, I32:$Rt)>;
+
 let AddedComplexity = 20 in {   // Complexity greater than cmp reg-imm.
   def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)),
            (S4_ntstbit_i I32:$Rs, imm:$u5)>;
@@ -3005,6 +3040,13 @@ let Predicates = [UseHVX] in {
   def: Pat<(HexagonVINSERTW0 HVI32:$Vu, I32:$Rt),
            (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
 
+  def: Pat<(VecI8 (HexagonVSPLAT I32:$Rs)),
+           (V6_lvsplatw (S2_vsplatrb I32:$Rs))>;
+  def: Pat<(VecI16 (HexagonVSPLAT I32:$Rs)),
+           (V6_lvsplatw (A2_combine_ll I32:$Rs, I32:$Rs))>;
+  def: Pat<(VecI32 (HexagonVSPLAT I32:$Rs)),
+           (V6_lvsplatw I32:$Rs)>;
+
   def: Pat<(add HVI8:$Vs,  HVI8:$Vt),   (V6_vaddb HvxVR:$Vs, HvxVR:$Vt)>;
   def: Pat<(add HVI16:$Vs, HVI16:$Vt),  (V6_vaddh HvxVR:$Vs, HvxVR:$Vt)>;
   def: Pat<(add HVI32:$Vs, HVI32:$Vt),  (V6_vaddw HvxVR:$Vs, HvxVR:$Vt)>;

Modified: llvm/trunk/lib/Target/Hexagon/HexagonRegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonRegisterInfo.td?rev=323229&r1=323228&r2=323229&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonRegisterInfo.td (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonRegisterInfo.td Tue Jan 23 09:53:59 2018
@@ -242,7 +242,7 @@ def VecQ32
 // FIXME: the register order should be defined in terms of the preferred
 // allocation order...
 //
-def IntRegs : RegisterClass<"Hexagon", [i32, f32, v32i1, v4i8, v2i16], 32,
+def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32,
   (add (sequence "R%u", 0, 9), (sequence "R%u", 12, 28),
        R10, R11, R29, R30, R31)>;
 
@@ -254,8 +254,7 @@ def GeneralSubRegs : RegisterClass<"Hexa
 def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32,
   (add R7, R6, R5, R4, R3, R2, R1, R0)> ;
 
-def DoubleRegs : RegisterClass<"Hexagon",
-  [i64, f64, v64i1, v8i8, v4i16, v2i32], 64,
+def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64,
   (add (sequence "D%u", 0, 4), (sequence "D%u", 6, 13), D5, D14, D15)>;
 
 def GeneralDoubleLow8Regs : RegisterClass<"Hexagon", [i64], 64,

Added: llvm/trunk/test/CodeGen/Hexagon/autohvx/isel-concat-vectors-bool.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/autohvx/isel-concat-vectors-bool.ll?rev=323229&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/autohvx/isel-concat-vectors-bool.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/autohvx/isel-concat-vectors-bool.ll Tue Jan 23 09:53:59 2018
@@ -0,0 +1,20 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; Check for successful compilation.
+; CHECK: sfcmp
+
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+
+define void @fred() #0 {
+b0:
+  %v1 = fcmp olt <8 x float> undef, zeroinitializer
+  %v2 = load <8 x float>, <8 x float>* null, align 8
+  %v3 = fcmp olt <8 x float> %v2, undef
+  %v4 = and <8 x i1> %v1, %v3
+  %v5 = zext <8 x i1> %v4 to <8 x i32>
+  store <8 x i32> %v5, <8 x i32>* undef, align 8
+  unreachable
+}
+
+attributes #0 = { noinline norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" }

Added: llvm/trunk/test/CodeGen/Hexagon/vect/vect-bool-basic-compile.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/vect/vect-bool-basic-compile.ll?rev=323229&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/vect/vect-bool-basic-compile.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/vect/vect-bool-basic-compile.ll Tue Jan 23 09:53:59 2018
@@ -0,0 +1,65 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; Check that we can compile these functions. Don't check anything else for now.
+; CHECK-LABEL: test_0:
+; CHECK: tstbit
+; CHECK-LABEL: test_1:
+; CHECK: tstbit
+; CHECK-LABEL: test_2:
+; CHECK: tstbit
+
+define i32 @test_0(i32 %a0, i32 %a1) #0 {
+  %t0 = trunc i32 %a0 to i1
+  %t1 = trunc i32 %a1 to i1
+
+  %t2 = insertelement <2 x i1> undef, i1 %t0, i32 0
+  %t3 = insertelement <2 x i1> %t2, i1 %t1, i32 1
+
+  %t4 = shufflevector <2 x i1> %t3, <2 x i1> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %t5 = bitcast <8 x i1> %t4 to i8
+  %t6 = zext i8 %t5 to i32
+  ret i32 %t6
+}
+
+define i32 @test_1(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 {
+  %t0 = trunc i32 %a0 to i1
+  %t1 = trunc i32 %a1 to i1
+  %t2 = trunc i32 %a2 to i1
+  %t3 = trunc i32 %a3 to i1
+
+  %t4 = insertelement <4 x i1> undef, i1 %t0, i32 0
+  %t5 = insertelement <4 x i1> %t4, i1 %t1, i32 1
+  %t6 = insertelement <4 x i1> %t5, i1 %t2, i32 2
+  %t7 = insertelement <4 x i1> %t6, i1 %t3, i32 3
+
+  %t8 = shufflevector <4 x i1> %t7, <4 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %t9 = bitcast <8 x i1> %t8 to i8
+  %ta = zext i8 %t9 to i32
+  ret i32 %ta
+}
+
+define i32 @test_2(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) #0 {
+  %t0 = trunc i32 %a0 to i1
+  %t1 = trunc i32 %a1 to i1
+  %t2 = trunc i32 %a2 to i1
+  %t3 = trunc i32 %a3 to i1
+  %t4 = trunc i32 %a4 to i1
+  %t5 = trunc i32 %a5 to i1
+  %t6 = trunc i32 %a6 to i1
+  %t7 = trunc i32 %a7 to i1
+
+  %t8 = insertelement <8 x i1> undef, i1 %t0, i32 0
+  %t9 = insertelement <8 x i1> %t8, i1 %t1, i32 1
+  %ta = insertelement <8 x i1> %t9, i1 %t2, i32 2
+  %tb = insertelement <8 x i1> %ta, i1 %t3, i32 3
+  %tc = insertelement <8 x i1> %tb, i1 %t4, i32 4
+  %td = insertelement <8 x i1> %tc, i1 %t5, i32 5
+  %te = insertelement <8 x i1> %td, i1 %t6, i32 6
+  %tf = insertelement <8 x i1> %te, i1 %t7, i32 7
+
+  %tg = bitcast <8 x i1> %tf to i8
+  %th = zext i8 %tg to i32
+  ret i32 %th
+}
+
+attributes #0 = { nounwind readnone }

Added: llvm/trunk/test/CodeGen/Hexagon/vect/vect-bool-isel-crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/vect/vect-bool-isel-crash.ll?rev=323229&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/vect/vect-bool-isel-crash.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/vect/vect-bool-isel-crash.ll Tue Jan 23 09:53:59 2018
@@ -0,0 +1,31 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; Check for a successful compilation.
+; CHECK: jumpr r31
+
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+
+define void @fred() #0 {
+b0:
+  %v1 = icmp sgt <8 x i32> undef, undef
+  %v2 = extractelement <8 x i1> %v1, i32 4
+  %v3 = select i1 %v2, i32 0, i32 undef
+  %v4 = add nsw i32 %v3, 0
+  %v5 = add nsw i32 0, %v4
+  %v6 = extractelement <8 x i1> %v1, i32 6
+  %v7 = select i1 %v6, i32 0, i32 undef
+  %v8 = add nsw i32 %v7, %v5
+  %v9 = add nsw i32 0, %v8
+  %v10 = add nsw i32 0, %v9
+  %v11 = load i32, i32* undef, align 4
+  %v12 = mul nsw i32 %v11, %v10
+  %v13 = add nsw i32 %v12, 16384
+  %v14 = ashr i32 %v13, 15
+  %v15 = select i1 undef, i32 %v14, i32 255
+  %v16 = trunc i32 %v15 to i8
+  store i8 %v16, i8* undef, align 1
+  ret void
+}
+
+attributes #0 = { norecurse nounwind }

Added: llvm/trunk/test/CodeGen/Hexagon/vect/zext-v4i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/vect/zext-v4i1.ll?rev=323229&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/vect/zext-v4i1.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/vect/zext-v4i1.ll Tue Jan 23 09:53:59 2018
@@ -0,0 +1,39 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; Check that this compiles successfully.
+; CHECK: vcmph.eq
+
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+
+define void @fred() #0 {
+b0:
+  switch i32 undef, label %b14 [
+    i32 5, label %b2
+    i32 3, label %b1
+  ]
+
+b1:                                               ; preds = %b0
+  br label %b14
+
+b2:                                               ; preds = %b0
+  %v3 = icmp eq <8 x i16> undef, zeroinitializer
+  %v4 = zext <8 x i1> %v3 to <8 x i16>
+  %v5 = add <8 x i16> zeroinitializer, %v4
+  %v6 = add <8 x i16> %v5, zeroinitializer
+  %v7 = add <8 x i16> %v6, zeroinitializer
+  %v8 = extractelement <8 x i16> %v7, i32 0
+  %v9 = add i16 %v8, 0
+  %v10 = add i16 %v9, 0
+  %v11 = add i16 %v10, 0
+  %v12 = icmp eq i16 %v11, 11
+  br i1 %v12, label %b14, label %b13
+
+b13:                                              ; preds = %b2
+  unreachable
+
+b14:                                              ; preds = %b2, %b1, %b0
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" }